from BeautifulSoup import BeautifulSoup, NavigableString
+def urlencode(string):
+ """Encodes some string as URL-encoded UTF-8."""
+ return urllib.quote(string.encode('utf8'))
+
class WWWJDIC(callbacks.Plugin):
"""Add the help for "@plugin help WWWJDIC" here
This should describe *how* to use this plugin."""
# Even the raw results come wrapped in minimal HTML. This sucks.
# They're just in this form though:
- # <p>
- # <br>entry 1
- # <br>entry 2
- # So grab everything from that paragraph that isn't a tag (<br>) or
- # blank space and spit it back out.
+ # <pre>
+ # entry 1
+ # entry 2
+ # So grab everything from that pre tag, split by lines, and spit it
+ # back out.
soup = BeautifulSoup(res)
- thing_ct = 0
- for thing in soup.p:
- if not isinstance(thing, NavigableString):
- continue
+ if not soup.pre:
+ # Nothing found! Try again but allow non-P words
+ res = urllib2.urlopen(
+ u"http://www.csse.monash.edu.au/~jwb/cgi-bin/wwwjdic.cgi?1ZUQ"
+ + url_thing
+ )
+ soup = BeautifulSoup(res)
+
+ if not soup.pre:
+ # Still nothing. Bail.
+ reply = u"Hmm, I can't figure out what that means. " \
+ "Perhaps try denshi jisho directly: "
+
+ jisho_url = u"http://jisho.org/words?jap={jap}&eng={eng}&dict=edict"
+ if thing[0] in ('@', '#'):
+ # Prefixes for roomaji
+ reply += jisho_url.format(jap=urlencode(thing[1:]), eng=u'')
+ # wtf why is any() overridden
+ elif filter(lambda c: ord(c) > 256, thing):
+ reply += jisho_url.format(jap=urlencode(thing), eng=u'')
+ else:
+ reply += jisho_url.format(jap=u'', eng=urlencode(thing))
+
+ self._reply(irc, reply)
+ return
- # Everything ends with a newline, bleh!
- entry = unicode(thing).strip()
+ thing_ct = 0
+ for entry in soup.pre.string.splitlines():
+ entry = entry.strip()
if entry == '':
continue