6 from sqlalchemy
.sql
import func
8 import whoosh
.filedb
.filestore
9 import whoosh
.filedb
.fileindex
11 from whoosh
.qparser
import QueryParser
12 import whoosh
.spelling
14 from pokedex
.db
import connect
15 import pokedex
.db
.tables
as tables
17 # Dictionary of table name => table class.
18 # Need the table name so we can get the class from the table name after we
19 # retrieve something from the index
28 indexed_tables
[cls
.__tablename__
] = cls
30 # Dictionary of extra keys to file types of objects under, e.g. Pokémon can
31 # also be looked up purely by number
34 lambda row
: u
"move %d" % row
.id,
37 lambda row
: unicode(row
.id),
41 def open_index(directory
=None, session
=None, recreate
=False):
42 """Opens the whoosh index stored in the named directory and returns (index,
43 speller). If the index doesn't already exist, it will be created.
46 Directory containing the index. Defaults to a location within the
47 `pokedex` egg directory.
50 If the index needs to be created, this database session will be used.
51 Defaults to an attempt to connect to the default SQLite database
52 installed by `pokedex setup`.
55 If set to True, the whoosh index will be created even if it already
61 directory
= pkg_resources
.resource_filename('pokedex',
67 # Attempt to open or create the index
68 directory_exists
= os
.path
.exists(directory
)
69 if directory_exists
and not recreate
:
70 # Already exists; should be an index!
72 index
= whoosh
.index
.open_dir(directory
, indexname
='pokedex')
73 spell_store
= whoosh
.filedb
.filestore
.FileStorage(directory
)
74 speller
= whoosh
.spelling
.SpellChecker(spell_store
,
77 except whoosh
.index
.EmptyIndexError
as e
:
78 # Apparently not a real index. Fall out of the if and create it
81 if not directory_exists
:
86 schema
= whoosh
.fields
.Schema(
87 name
=whoosh
.fields
.ID(stored
=True),
88 table
=whoosh
.fields
.STORED
,
89 row_id
=whoosh
.fields
.STORED
,
90 language
=whoosh
.fields
.STORED
,
93 index
= whoosh
.index
.create_in(directory
, schema
=schema
,
95 writer
= index
.writer()
97 # Index every name in all our tables of interest
99 for cls
in indexed_tables
.values():
100 q
= session
.query(cls
)
102 # Only index base Pokémon formes
103 if hasattr(cls
, 'forme_base_pokemon_id'):
104 q
= q
.filter_by(forme_base_pokemon_id
=None)
106 for row
in q
.yield_per(5):
107 row_key
= dict(table
=cls
.__tablename__
, row_id
=row
.id)
109 # Spelling index only indexes strings of letters, alas, so we
110 # reduce every name to this to make the index work. However, exact
111 # matches are not returned, so e.g. 'nidoran' would neither match
112 # exactly nor fuzzy-match. Solution: add the spelling-munged name
113 # as a regular index row too.
114 name
= row
.name
.lower()
115 writer
.add_document(name
=name
, **row_key
)
117 speller_entries
.append(name
)
119 for extra_key_func
in extra_keys
.get(cls
, []):
120 extra_key
= extra_key_func(row
)
121 writer
.add_document(name
=extra_key
, **row_key
)
125 # Construct and populate a spell-checker index. Quicker to do it all
126 # at once, as every call to add_* does a commit(), and those seem to be
128 speller
= whoosh
.spelling
.SpellChecker(index
.storage
, indexname
='spelling')
129 speller
.add_words(speller_entries
)
131 return index
, speller
134 def lookup(name
, session
=None, indices
=None, exact_only
=False):
135 """Attempts to find some sort of object, given a database session and name.
137 Returns (objects, exact) where `objects` is a list of database objects, and
138 `exact` is True iff the given name matched the returned objects exactly.
140 This function ONLY does fuzzy matching if there are no exact matches.
142 Formes are not returned; "Shaymin" will return only grass Shaymin.
144 Currently recognizes:
145 - Pokémon names: "Eevee"
148 Name of the thing to look for.
151 A database session to use for retrieving objects. As with get_index,
152 if this is not provided, a connection to the default database will be
156 Tuple of index, speller as returned from `open_index()`. Defaults to
157 a call to `open_index()`.
160 If True, only exact matches are returned. If set to False (the
161 default), and the provided `name` doesn't match anything exactly,
162 spelling correction will be attempted.
169 index
, speller
= indices
171 index
, speller
= open_index()
175 # Look for exact name. A Term object does an exact match, so we don't have
176 # to worry about a query parser tripping on weird characters in the input
177 searcher
= index
.searcher()
178 query
= whoosh
.query
.Term('name', name
.lower())
179 results
= searcher
.search(query
)
182 # Look for some fuzzy matches
187 for suggestion
in speller
.suggest(name
, 3):
188 query
= whoosh
.query
.Term('name', suggestion
)
189 results
.extend(searcher
.search(query
))
191 # Convert results to db objects
194 for result
in results
:
196 seen_key
= result
['table'], result
['row_id']
199 seen
[seen_key
] = True
201 cls
= indexed_tables
[result
['table']]
202 obj
= session
.query(cls
).get(result
['row_id'])
205 return objects
, exact