4 from sqlalchemy
.sql
import func
6 from whoosh
.qparser
import QueryParser
9 import pokedex
.db
.tables
as tables
11 # Dictionary of table name => table class.
12 # Need the table name so we can get the class from the table name after we
13 # retrieve something from the index
18 indexed_tables
[cls
.__tablename__
] = cls
20 # Dictionary of extra keys to file types of objects under, e.g. Pokémon can
21 # also be looked up purely by number
24 lambda row
: unicode(row
.id),
29 def get_index(session
):
30 """Returns (index, speller).
32 Creates an index if one does not exist.
36 return index_bits
['index'], index_bits
['speller']
38 store
= whoosh
.store
.RamStorage()
39 schema
= whoosh
.fields
.Schema(
40 name
=whoosh
.fields
.ID(stored
=True),
41 table
=whoosh
.fields
.STORED
,
42 row_id
=whoosh
.fields
.STORED
,
43 language_id
=whoosh
.fields
.STORED
,
46 # Construct a straight lookup index
47 index
= whoosh
.index
.Index(store
, schema
=schema
, create
=True)
48 writer
= index
.writer()
50 # Index every name in all our tables of interest
52 for cls
in indexed_tables
.values():
53 q
= session
.query(cls
)
55 # Only index base Pokémon formes
56 if hasattr(cls
, 'forme_base_pokemon_id'):
57 q
= q
.filter_by(forme_base_pokemon_id
=None)
59 for row
in q
.yield_per(5):
60 row_key
= dict(table
=cls
.__tablename__
, row_id
=row
.id)
62 # Spelling index only indexes strings of letters, alas, so we
63 # reduce every name to this to make the index work. However, exact
64 # matches are not returned, so e.g. 'nidoran' would neither match
65 # exactly nor fuzzy-match. Solution: add the spelling-munged name
66 # as a regular index row too.
67 name
= row
.name
.lower()
68 writer
.add_document(name
=name
, **row_key
)
70 speller_entries
.append(name
)
72 for extra_key_func
in extra_keys
[cls
]:
73 extra_key
= extra_key_func(row
)
74 writer
.add_document(name
=extra_key
, **row_key
)
78 # Construct and populate a spell-checker index. Quicker to do it all
79 # at once, as every call to add_* does a commit(), and those seem to be
81 speller
= whoosh
.spelling
.SpellChecker(index
.storage
)
82 # WARNING: HERE BE DRAGONS
83 # whoosh.spelling refuses to index things that don't look like words.
84 # Unfortunately, this doesn't work so well for Pokémon (Mr. Mime,
85 # Porygon-Z, etc.), and attempts to work around it lead to further
87 # The below is copied from SpellChecker.add_scored_words without the check
88 # for isalpha(). XXX get whoosh patched to make this unnecessary!
89 writer
= whoosh
.writing
.IndexWriter(speller
.index())
90 for word
in speller_entries
:
91 fields
= {"word": word
, "score": 1}
92 for size
in xrange(speller
.mingram
, speller
.maxgram
+ 1):
93 nga
= whoosh
.analysis
.NgramAnalyzer(size
)
94 gramlist
= [t
.text
for t
in nga(word
)]
96 fields
["start%s" % size
] = gramlist
[0]
97 fields
["end%s" % size
] = gramlist
[-1]
98 fields
["gram%s" % size
] = " ".join(gramlist
)
99 writer
.add_document(**fields
)
103 index_bits
['index'] = index
104 index_bits
['speller'] = speller
105 index_bits
['store'] = store
106 return index_bits
['index'], index_bits
['speller']
108 def lookup(session
, name
, exact_only
=False):
109 """Attempts to find some sort of object, given a database session and name.
111 Returns (objects, exact) where `objects` is a list of database objects, and
112 `exact` is True iff the given name matched the returned objects exactly.
114 This function ONLY does fuzzy matching if there are no exact matches.
116 Formes are not returned; "Shaymin" will return only grass Shaymin.
118 Currently recognizes:
119 - Pokémon names: "Eevee"
124 index
, speller
= get_index(session
)
126 # Look for exact name. A Term object does an exact match, so we don't have
127 # to worry about a query parser tripping on weird characters in the input
128 searcher
= index
.searcher()
129 query
= whoosh
.query
.Term('name', name
)
130 results
= searcher
.search(query
)
133 # Look for some fuzzy matches
138 for suggestion
in speller
.suggest(name
, 3):
139 query
= whoosh
.query
.Term('name', suggestion
)
140 results
.extend(searcher
.search(query
))
142 # Convert results to db objects
145 for result
in results
:
147 seen_key
= result
['table'], result
['row_id']
150 seen
[seen_key
] = True
152 cls
= indexed_tables
[result
['table']]
153 obj
= session
.query(cls
).get(result
['row_id'])
156 return objects
, exact