X-Git-Url: http://git.veekun.com/zzz-dywypi.git/blobdiff_plain/e2fe83e21702a0fb951c4a15e4b41f2070280e68..2e65d989581ac75a844d4ad02f1ec04e5d9cd551:/plugins/WWWJDIC/plugin.py diff --git a/plugins/WWWJDIC/plugin.py b/plugins/WWWJDIC/plugin.py index cefd09b..8d727a4 100644 --- a/plugins/WWWJDIC/plugin.py +++ b/plugins/WWWJDIC/plugin.py @@ -39,6 +39,10 @@ import urllib2 from BeautifulSoup import BeautifulSoup, NavigableString +def urlencode(string): + """Encodes some string as URL-encoded UTF-8.""" + return urllib.quote(string.encode('utf8')) + class WWWJDIC(callbacks.Plugin): """Add the help for "@plugin help WWWJDIC" here This should describe *how* to use this plugin.""" @@ -72,19 +76,41 @@ class WWWJDIC(callbacks.Plugin): # Even the raw results come wrapped in minimal HTML. This sucks. # They're just in this form though: - #

- #
entry 1 - #
entry 2 - # So grab everything from that paragraph that isn't a tag (
) or - # blank space and spit it back out. + #

+        # entry 1
+        # entry 2
+        # So grab everything from that pre tag, split by lines, and spit it
+        # back out.
         soup = BeautifulSoup(res)
-        thing_ct = 0
-        for thing in soup.p:
-            if not isinstance(thing, NavigableString):
-                continue
+        if not soup.pre:
+            # Nothing found!  Try again but allow non-P words
+            res = urllib2.urlopen(
+                u"http://www.csse.monash.edu.au/~jwb/cgi-bin/wwwjdic.cgi?1ZUQ"
+                + url_thing
+            )
+            soup = BeautifulSoup(res)
+
+        if not soup.pre:
+            # Still nothing.  Bail.
+            reply = u"Hmm, I can't figure out what that means.  " \
+                "Perhaps try denshi jisho directly: "
+
+            jisho_url = u"http://jisho.org/words?jap={jap}&eng={eng}&dict=edict"
+            if thing[0] in ('@', '#'):
+                # Prefixes for roomaji
+                reply += jisho_url.format(jap=urlencode(thing[1:]), eng=u'')
+            # wtf why is any() overridden
+            elif filter(lambda c: ord(c) > 256, thing):
+                reply += jisho_url.format(jap=urlencode(thing), eng=u'')
+            else:
+                reply += jisho_url.format(jap=u'', eng=urlencode(thing))
+
+            self._reply(irc, reply)
+            return
 
-            # Everything ends with a newline, bleh!
-            entry = unicode(thing).strip()
+        thing_ct = 0
+        for entry in soup.pre.string.splitlines():
+            entry = entry.strip()
             if entry == '':
                 continue