WWWJDIC: Fallback to non-common words.
[zzz-dywypi.git] / plugins / WWWJDIC / plugin.py
index cefd09b..8d727a4 100644 (file)
@@ -39,6 +39,10 @@ import urllib2
 from BeautifulSoup import BeautifulSoup, NavigableString
 
 
+def urlencode(string):
+    """Encodes some string as URL-encoded UTF-8."""
+    return urllib.quote(string.encode('utf8'))
+
 class WWWJDIC(callbacks.Plugin):
     """Add the help for "@plugin help WWWJDIC" here
     This should describe *how* to use this plugin."""
@@ -72,19 +76,41 @@ class WWWJDIC(callbacks.Plugin):
 
         # Even the raw results come wrapped in minimal HTML.  This sucks.
         # They're just in this form though:
-        # <p>
-        # <br>entry 1
-        # <br>entry 2
-        # So grab everything from that paragraph that isn't a tag (<br>) or
-        # blank space and spit it back out.
+        # <pre>
+        # entry 1
+        # entry 2
+        # So grab everything from that pre tag, split by lines, and spit it
+        # back out.
         soup = BeautifulSoup(res)
-        thing_ct = 0
-        for thing in soup.p:
-            if not isinstance(thing, NavigableString):
-                continue
+        if not soup.pre:
+            # Nothing found!  Try again but allow non-P words
+            res = urllib2.urlopen(
+                u"http://www.csse.monash.edu.au/~jwb/cgi-bin/wwwjdic.cgi?1ZUQ"
+                + url_thing
+            )
+            soup = BeautifulSoup(res)
+
+        if not soup.pre:
+            # Still nothing.  Bail.
+            reply = u"Hmm, I can't figure out what that means.  " \
+                "Perhaps try denshi jisho directly: "
+
+            jisho_url = u"http://jisho.org/words?jap={jap}&eng={eng}&dict=edict"
+            if thing[0] in ('@', '#'):
+                # Prefixes for roomaji
+                reply += jisho_url.format(jap=urlencode(thing[1:]), eng=u'')
+            # wtf why is any() overridden
+            elif filter(lambda c: ord(c) > 256, thing):
+                reply += jisho_url.format(jap=urlencode(thing), eng=u'')
+            else:
+                reply += jisho_url.format(jap=u'', eng=urlencode(thing))
+
+            self._reply(irc, reply)
+            return
 
-            # Everything ends with a newline, bleh!
-            entry = unicode(thing).strip()
+        thing_ct = 0
+        for entry in soup.pre.string.splitlines():
+            entry = entry.strip()
             if entry == '':
                 continue