4 from sqlalchemy
.sql
import func
6 from whoosh
.qparser
import QueryParser
9 import pokedex
.db
.tables
as tables
11 # Dictionary of table name => table class.
12 # Need the table name so we can get the class from the table name after we
13 # retrieve something from the index
18 indexed_tables
[cls
.__tablename__
] = cls
21 def get_index(session
):
22 """Returns (index, speller).
24 Creates an index if one does not exist.
28 return index_bits
['index'], index_bits
['speller']
30 store
= whoosh
.store
.RamStorage()
31 schema
= whoosh
.fields
.Schema(
32 name
=whoosh
.fields
.ID(stored
=True),
33 spelling_name
=whoosh
.fields
.ID(stored
=True),
34 table
=whoosh
.fields
.STORED
,
35 row_id
=whoosh
.fields
.STORED
,
36 language_id
=whoosh
.fields
.STORED
,
39 index
= whoosh
.index
.Index(store
, schema
=schema
, create
=True)
40 writer
= index
.writer()
42 # Index every name in all our tables of interest
43 for cls
in indexed_tables
.values():
44 q
= session
.query(cls
)
46 # Only index base Pokémon formes
47 if hasattr(cls
, 'forme_base_pokemon_id'):
48 q
= q
.filter_by(forme_base_pokemon_id
=None)
50 for row
in q
.yield_per(5):
51 name
= row
.name
.lower()
52 spelling_name
= re
.sub('[^a-z]', '', name
)
53 writer
.add_document(name
=name
,
54 spelling_name
=spelling_name
,
55 table
=cls
.__tablename__
,
60 ### Construct a spell-checker index
61 speller
= whoosh
.spelling
.SpellChecker(index
.storage
)
63 # Can't use speller.add_field because it tries to intuit a frequency, and
64 # names are in an ID field, which seems to be immune to frequency.
65 # Not hard to add everything ourselves, though
66 reader
= index
.doc_reader()
67 speller
.add_words([ _
['spelling_name'] for _
in reader
])
70 index_bits
['index'] = index
71 index_bits
['speller'] = speller
72 index_bits
['store'] = store
73 return index_bits
['index'], index_bits
['speller']
75 def lookup(session
, name
, exact_only
=False):
76 """Attempts to find some sort of object, given a database session and name.
78 Returns (objects, exact) where `objects` is a list of database objects, and
79 `exact` is True iff the given name matched the returned objects exactly.
81 This function ONLY does fuzzy matching if there are no exact matches.
83 Formes are not returned; "Shaymin" will return only grass Shaymin.
86 - Pokémon names: "Eevee"
91 # Alas! We have to make three attempts to find anything with this index.
92 # First: Try an exact match for a name in the index.
93 # Second: Try an exact match for a stripped-down name in the index.
94 # Third: Get spelling suggestions.
95 # The spelling module apparently only indexes *words* -- that is, [a-z]+.
96 # So we have a separate field that contains the same name, stripped down to
98 # Unfortunately, exact matches aren't returned as spelling suggestions, so
99 # we also have to do a regular index match against this separate field.
100 # Otherwise, 'nidoran' will never match anything
101 index
, speller
= get_index(session
)
103 # Look for exact name
104 parser
= QueryParser('name', schema
=index
.schema
)
105 results
= index
.find(name
.lower(), parser
=parser
)
108 # Look for a match with a reduced a-z name
110 parser
= QueryParser('spelling_name', schema
=index
.schema
)
111 results
= index
.find(name
.lower(), parser
=parser
)
113 # Look for some fuzzy matches
118 for suggestion
in speller
.suggest(name
, 3):
119 results
.extend( index
.find(suggestion
, parser
=parser
) )
121 # Convert results to db objects
124 for result
in results
:
126 seen_key
= result
['table'], result
['row_id']
129 seen
[seen_key
] = True
131 cls
= indexed_tables
[result
['table']]
132 obj
= session
.query(cls
).get(result
['row_id'])
135 return objects
, exact