WWWJDIC: Fallback to non-common words.
[zzz-dywypi.git] / plugins / WWWJDIC / plugin.py
1 ###
2 # Copyright (c) 2010, Alex Munroe
3 # All rights reserved.
4 #
5 # Redistribution and use in source and binary forms, with or without
6 # modification, are permitted provided that the following conditions are met:
7 #
8 # * Redistributions of source code must retain the above copyright notice,
9 # this list of conditions, and the following disclaimer.
10 # * Redistributions in binary form must reproduce the above copyright notice,
11 # this list of conditions, and the following disclaimer in the
12 # documentation and/or other materials provided with the distribution.
13 # * Neither the name of the author of this software nor the name of
14 # contributors to this software may be used to endorse or promote products
15 # derived from this software without specific prior written consent.
16 #
17 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 # POSSIBILITY OF SUCH DAMAGE.
28
29 ###
30
31 import supybot.utils as utils
32 from supybot.commands import *
33 import supybot.plugins as plugins
34 import supybot.ircutils as ircutils
35 import supybot.callbacks as callbacks
36
37 import urllib
38 import urllib2
39 from BeautifulSoup import BeautifulSoup, NavigableString
40
41
42 def urlencode(string):
43 """Encodes some string as URL-encoded UTF-8."""
44 return urllib.quote(string.encode('utf8'))
45
46 class WWWJDIC(callbacks.Plugin):
47 """Add the help for "@plugin help WWWJDIC" here
48 This should describe *how* to use this plugin."""
49 threaded = True
50
51 def jdic(self, irc, msg, args, thing):
52 """<thing...>
53
54 Looks up <thing> in the EDICT Japanese dictionary.
55 To use roomaji, prefix with @ for hiragana or # for katakana."""
56
57 # Fix encoding. Sigh. Stolen from Pokedex.plugin.
58 if not isinstance(thing, unicode):
59 ascii_thing = thing
60 try:
61 thing = ascii_thing.decode('utf8')
62 except UnicodeDecodeError:
63 thing = ascii_thing.decode('latin1')
64
65
66 # Unnngh this is horrendous. urllib doesn't understand unicode at all;
67 # manually encode as bytes and then urlencode
68 url_thing = urllib.quote(thing.encode('utf8'))
69
70 # Hit up wwwjdic
71 # 1 = edict; Z = raw results; U = utf8 input; R = exact + common
72 res = urllib2.urlopen(
73 u"http://www.csse.monash.edu.au/~jwb/cgi-bin/wwwjdic.cgi?1ZUR"
74 + url_thing
75 )
76
77 # Even the raw results come wrapped in minimal HTML. This sucks.
78 # They're just in this form though:
79 # <pre>
80 # entry 1
81 # entry 2
82 # So grab everything from that pre tag, split by lines, and spit it
83 # back out.
84 soup = BeautifulSoup(res)
85 if not soup.pre:
86 # Nothing found! Try again but allow non-P words
87 res = urllib2.urlopen(
88 u"http://www.csse.monash.edu.au/~jwb/cgi-bin/wwwjdic.cgi?1ZUQ"
89 + url_thing
90 )
91 soup = BeautifulSoup(res)
92
93 if not soup.pre:
94 # Still nothing. Bail.
95 reply = u"Hmm, I can't figure out what that means. " \
96 "Perhaps try denshi jisho directly: "
97
98 jisho_url = u"http://jisho.org/words?jap={jap}&eng={eng}&dict=edict"
99 if thing[0] in ('@', '#'):
100 # Prefixes for roomaji
101 reply += jisho_url.format(jap=urlencode(thing[1:]), eng=u'')
102 # wtf why is any() overridden
103 elif filter(lambda c: ord(c) > 256, thing):
104 reply += jisho_url.format(jap=urlencode(thing), eng=u'')
105 else:
106 reply += jisho_url.format(jap=u'', eng=urlencode(thing))
107
108 self._reply(irc, reply)
109 return
110
111 thing_ct = 0
112 for entry in soup.pre.string.splitlines():
113 entry = entry.strip()
114 if entry == '':
115 continue
116
117 self._reply(irc, entry)
118
119 # Don't send back more than three; that's probably plenty
120 thing_ct += 1
121 if thing_ct >= 3:
122 break
123
124 jdic = wrap(jdic, [rest('something')])
125
126
127 def _reply(self, irc, response):
128 """Wraps irc.reply() to do some Unicode decoding.
129
130 Also stolen from Pokedex.plugin.
131 """
132 if isinstance(response, str):
133 irc.reply(response)
134 else:
135 irc.reply(response.encode('utf8'))
136
137
138
139
140 Class = WWWJDIC
141
142
143 # vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79: