5e548a30b12a1368eaf6512d48a40d0b2c2d1395
[zzz-dywypi.git] / plugins / WWWJDIC / plugin.py
1 ###
2 # Copyright (c) 2010, Alex Munroe
3 # All rights reserved.
4 #
5 # Redistribution and use in source and binary forms, with or without
6 # modification, are permitted provided that the following conditions are met:
7 #
8 # * Redistributions of source code must retain the above copyright notice,
9 # this list of conditions, and the following disclaimer.
10 # * Redistributions in binary form must reproduce the above copyright notice,
11 # this list of conditions, and the following disclaimer in the
12 # documentation and/or other materials provided with the distribution.
13 # * Neither the name of the author of this software nor the name of
14 # contributors to this software may be used to endorse or promote products
15 # derived from this software without specific prior written consent.
16 #
17 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 # POSSIBILITY OF SUCH DAMAGE.
28
29 ###
30
31 import supybot.utils as utils
32 from supybot.commands import *
33 import supybot.plugins as plugins
34 import supybot.ircutils as ircutils
35 import supybot.callbacks as callbacks
36
37 import urllib
38 import urllib2
39 from BeautifulSoup import BeautifulSoup, NavigableString
40
41
42 class WWWJDIC(callbacks.Plugin):
43 """Add the help for "@plugin help WWWJDIC" here
44 This should describe *how* to use this plugin."""
45 threaded = True
46
47 def jdic(self, irc, msg, args, thing):
48 """<thing...>
49
50 Looks up <thing> in the EDICT Japanese dictionary.
51 To use roomaji, prefix with @ for hiragana or # for katakana."""
52
53 # Fix encoding. Sigh. Stolen from Pokedex.plugin.
54 if not isinstance(thing, unicode):
55 ascii_thing = thing
56 try:
57 thing = ascii_thing.decode('utf8')
58 except UnicodeDecodeError:
59 thing = ascii_thing.decode('latin1')
60
61
62 # Unnngh this is horrendous. urllib doesn't understand unicode at all;
63 # manually encode as bytes and then urlencode
64 url_thing = urllib.quote(thing.encode('utf8'))
65
66 # Hit up wwwjdic
67 # 1 = edict; Z = raw results; U = utf8 input; R = exact + common
68 res = urllib2.urlopen(
69 u"http://www.csse.monash.edu.au/~jwb/cgi-bin/wwwjdic.cgi?1ZUR"
70 + url_thing
71 )
72
73 # Even the raw results come wrapped in minimal HTML. This sucks.
74 # They're just in this form though:
75 # <pre>
76 # entry 1
77 # entry 2
78 # So grab everything from that pre tag, split by lines, and spit it
79 # back out.
80 soup = BeautifulSoup(res)
81 if not soup.pre:
82 # Nothing found?
83 reply = u"Hmm, nothing found -- but I only look for exact " \
84 "matches and common words. Try denshi jisho directly: "
85
86 jisho_url = u"http://jisho.org/words?jap={jap}&eng={eng}&dict=edict"
87 if thing[0] in ('@', '#'):
88 # Prefixes for roomaji
89 reply += jisho_url.format(jap=thing[1:], eng=u'')
90 # wtf why is any() overridden
91 elif filter(lambda c: ord(c) > 256, thing):
92 reply += jisho_url.format(jap=thing, eng=u'')
93 else:
94 reply += jisho_url.format(jap=u'', eng=thing)
95
96 self._reply(irc, reply)
97 return
98
99 thing_ct = 0
100 for entry in soup.pre.string.splitlines():
101 entry = entry.strip()
102 if entry == '':
103 continue
104
105 self._reply(irc, entry)
106
107 # Don't send back more than three; that's probably plenty
108 thing_ct += 1
109 if thing_ct >= 3:
110 break
111
112 jdic = wrap(jdic, [rest('something')])
113
114
115 def _reply(self, irc, response):
116 """Wraps irc.reply() to do some Unicode decoding.
117
118 Also stolen from Pokedex.plugin.
119 """
120 if isinstance(response, str):
121 irc.reply(response)
122 else:
123 irc.reply(response.encode('utf8'))
124
125
126
127
128 Class = WWWJDIC
129
130
131 # vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79: