X-Git-Url: http://git.veekun.com/zzz-dywypi.git/blobdiff_plain/e2fe83e21702a0fb951c4a15e4b41f2070280e68..dd70d7ecc6c580aca3271fe2ae8c0951e97f9d5e:/plugins/WWWJDIC/plugin.py diff --git a/plugins/WWWJDIC/plugin.py b/plugins/WWWJDIC/plugin.py index cefd09b..8d727a4 100644 --- a/plugins/WWWJDIC/plugin.py +++ b/plugins/WWWJDIC/plugin.py @@ -39,6 +39,10 @@ import urllib2 from BeautifulSoup import BeautifulSoup, NavigableString +def urlencode(string): + """Encodes some string as URL-encoded UTF-8.""" + return urllib.quote(string.encode('utf8')) + class WWWJDIC(callbacks.Plugin): """Add the help for "@plugin help WWWJDIC" here This should describe *how* to use this plugin.""" @@ -72,19 +76,41 @@ class WWWJDIC(callbacks.Plugin): # Even the raw results come wrapped in minimal HTML. This sucks. # They're just in this form though: - #
- #
entry 1
- #
entry 2
- # So grab everything from that paragraph that isn't a tag (
) or
- # blank space and spit it back out.
+ #
+ # entry 1 + # entry 2 + # So grab everything from that pre tag, split by lines, and spit it + # back out. soup = BeautifulSoup(res) - thing_ct = 0 - for thing in soup.p: - if not isinstance(thing, NavigableString): - continue + if not soup.pre: + # Nothing found! Try again but allow non-P words + res = urllib2.urlopen( + u"http://www.csse.monash.edu.au/~jwb/cgi-bin/wwwjdic.cgi?1ZUQ" + + url_thing + ) + soup = BeautifulSoup(res) + + if not soup.pre: + # Still nothing. Bail. + reply = u"Hmm, I can't figure out what that means. " \ + "Perhaps try denshi jisho directly: " + + jisho_url = u"http://jisho.org/words?jap={jap}&eng={eng}&dict=edict" + if thing[0] in ('@', '#'): + # Prefixes for roomaji + reply += jisho_url.format(jap=urlencode(thing[1:]), eng=u'') + # wtf why is any() overridden + elif filter(lambda c: ord(c) > 256, thing): + reply += jisho_url.format(jap=urlencode(thing), eng=u'') + else: + reply += jisho_url.format(jap=u'', eng=urlencode(thing)) + + self._reply(irc, reply) + return - # Everything ends with a newline, bleh! - entry = unicode(thing).strip() + thing_ct = 0 + for entry in soup.pre.string.splitlines(): + entry = entry.strip() if entry == '': continue