Added Pokémon movesets. #14
[zzz-pokedex.git] / pokedex / lookup.py
1 # encoding: utf8
2 import re
3
4 from sqlalchemy.sql import func
5 import whoosh
6 from whoosh.qparser import QueryParser
7 import whoosh.spelling
8
9 import pokedex.db.tables as tables
10
11 # Dictionary of table name => table class.
12 # Need the table name so we can get the class from the table name after we
13 # retrieve something from the index
14 indexed_tables = {}
15 for cls in [
16 tables.Pokemon,
17 ]:
18 indexed_tables[cls.__tablename__] = cls
19
20 # Dictionary of extra keys to file types of objects under, e.g. Pokémon can
21 # also be looked up purely by number
22 extra_keys = {
23 tables.Pokemon: [
24 lambda row: unicode(row.id),
25 ],
26 }
27
28 index_bits = {}
29 def get_index(session):
30 """Returns (index, speller).
31
32 Creates an index if one does not exist.
33 """
34
35 if index_bits:
36 return index_bits['index'], index_bits['speller']
37
38 store = whoosh.store.RamStorage()
39 schema = whoosh.fields.Schema(
40 name=whoosh.fields.ID(stored=True),
41 table=whoosh.fields.STORED,
42 row_id=whoosh.fields.STORED,
43 language_id=whoosh.fields.STORED,
44 )
45
46 # Construct a straight lookup index
47 index = whoosh.index.Index(store, schema=schema, create=True)
48 writer = index.writer()
49
50 # Index every name in all our tables of interest
51 speller_entries = []
52 for cls in indexed_tables.values():
53 q = session.query(cls)
54
55 # Only index base Pokémon formes
56 if hasattr(cls, 'forme_base_pokemon_id'):
57 q = q.filter_by(forme_base_pokemon_id=None)
58
59 for row in q.yield_per(5):
60 row_key = dict(table=cls.__tablename__, row_id=row.id)
61
62 # Spelling index only indexes strings of letters, alas, so we
63 # reduce every name to this to make the index work. However, exact
64 # matches are not returned, so e.g. 'nidoran' would neither match
65 # exactly nor fuzzy-match. Solution: add the spelling-munged name
66 # as a regular index row too.
67 name = row.name.lower()
68 writer.add_document(name=name, **row_key)
69
70 speller_entries.append(name)
71
72 for extra_key_func in extra_keys[cls]:
73 extra_key = extra_key_func(row)
74 writer.add_document(name=extra_key, **row_key)
75
76 writer.commit()
77
78 # Construct and populate a spell-checker index. Quicker to do it all
79 # at once, as every call to add_* does a commit(), and those seem to be
80 # expensive
81 speller = whoosh.spelling.SpellChecker(index.storage)
82 # WARNING: HERE BE DRAGONS
83 # whoosh.spelling refuses to index things that don't look like words.
84 # Unfortunately, this doesn't work so well for Pokémon (Mr. Mime,
85 # Porygon-Z, etc.), and attempts to work around it lead to further
86 # complications.
87 # The below is copied from SpellChecker.add_scored_words without the check
88 # for isalpha(). XXX get whoosh patched to make this unnecessary!
89 writer = whoosh.writing.IndexWriter(speller.index())
90 for word in speller_entries:
91 fields = {"word": word, "score": 1}
92 for size in xrange(speller.mingram, speller.maxgram + 1):
93 nga = whoosh.analysis.NgramAnalyzer(size)
94 gramlist = [t.text for t in nga(word)]
95 if len(gramlist) > 0:
96 fields["start%s" % size] = gramlist[0]
97 fields["end%s" % size] = gramlist[-1]
98 fields["gram%s" % size] = " ".join(gramlist)
99 writer.add_document(**fields)
100 writer.commit()
101 # end copy-pasta
102
103 index_bits['index'] = index
104 index_bits['speller'] = speller
105 index_bits['store'] = store
106 return index_bits['index'], index_bits['speller']
107
108 def lookup(session, name, exact_only=False):
109 """Attempts to find some sort of object, given a database session and name.
110
111 Returns (objects, exact) where `objects` is a list of database objects, and
112 `exact` is True iff the given name matched the returned objects exactly.
113
114 This function ONLY does fuzzy matching if there are no exact matches.
115
116 Formes are not returned; "Shaymin" will return only grass Shaymin.
117
118 Currently recognizes:
119 - Pokémon names: "Eevee"
120 """
121
122 exact = True
123
124 index, speller = get_index(session)
125
126 # Look for exact name. A Term object does an exact match, so we don't have
127 # to worry about a query parser tripping on weird characters in the input
128 searcher = index.searcher()
129 query = whoosh.query.Term('name', name)
130 results = searcher.search(query)
131
132 if not exact_only:
133 # Look for some fuzzy matches
134 if not results:
135 exact = False
136 results = []
137
138 for suggestion in speller.suggest(name, 3):
139 query = whoosh.query.Term('name', suggestion)
140 results.extend(searcher.search(query))
141
142 # Convert results to db objects
143 objects = []
144 seen = {}
145 for result in results:
146 # Skip dupe results
147 seen_key = result['table'], result['row_id']
148 if seen_key in seen:
149 continue
150 seen[seen_key] = True
151
152 cls = indexed_tables[result['table']]
153 obj = session.query(cls).get(result['row_id'])
154 objects.append(obj)
155
156 return objects, exact