55e1b5a1ba131689a1488f3746a2d6f561f2b136
6 from sqlalchemy
.sql
import func
8 import whoosh
.filedb
.filestore
9 import whoosh
.filedb
.fileindex
11 from whoosh
.qparser
import QueryParser
12 import whoosh
.spelling
14 from pokedex
.db
import connect
15 import pokedex
.db
.tables
as tables
17 # Dictionary of table name => table class.
18 # Need the table name so we can get the class from the table name after we
19 # retrieve something from the index
28 indexed_tables
[cls
.__tablename__
] = cls
30 # Dictionary of extra keys to file types of objects under, e.g. Pokémon can
31 # also be looked up purely by number
34 lambda row
: u
"move %d" % row
.id,
37 lambda row
: unicode(row
.id),
41 def open_index(directory
=None, session
=None, recreate
=False):
42 """Opens the whoosh index stored in the named directory and returns (index,
43 speller). If the index doesn't already exist, it will be created.
46 Directory containing the index. Defaults to a location within the
47 `pokedex` egg directory.
50 If the index needs to be created, this database session will be used.
51 Defaults to an attempt to connect to the default SQLite database
52 installed by `pokedex setup`.
55 If set to True, the whoosh index will be created even if it already
61 directory
= pkg_resources
.resource_filename('pokedex',
67 # Attempt to open or create the index
68 directory_exists
= os
.path
.exists(directory
)
69 if directory_exists
and not recreate
:
70 # Already exists; should be an index!
72 index
= whoosh
.index
.open_dir(directory
, indexname
='pokedex')
73 speller
= whoosh
.index
.open_dir(directory
, indexname
='spelling')
75 except whoosh
.index
.EmptyIndexError
as e
:
76 # Apparently not a real index. Fall out of the if and create it
79 if not directory_exists
:
84 schema
= whoosh
.fields
.Schema(
85 name
=whoosh
.fields
.ID(stored
=True),
86 table
=whoosh
.fields
.STORED
,
87 row_id
=whoosh
.fields
.STORED
,
88 language
=whoosh
.fields
.STORED
,
90 # Whoosh 0.2 explodes when using a file-stored schema with no TEXT
92 dummy
=whoosh
.fields
.TEXT
,
95 index
= whoosh
.index
.create_in(directory
, schema
=schema
,
97 writer
= index
.writer()
99 # Index every name in all our tables of interest
101 for cls
in indexed_tables
.values():
102 q
= session
.query(cls
)
104 # Only index base Pokémon formes
105 if hasattr(cls
, 'forme_base_pokemon_id'):
106 q
= q
.filter_by(forme_base_pokemon_id
=None)
108 for row
in q
.yield_per(5):
109 row_key
= dict(table
=cls
.__tablename__
, row_id
=row
.id)
111 # Spelling index only indexes strings of letters, alas, so we
112 # reduce every name to this to make the index work. However, exact
113 # matches are not returned, so e.g. 'nidoran' would neither match
114 # exactly nor fuzzy-match. Solution: add the spelling-munged name
115 # as a regular index row too.
116 name
= row
.name
.lower()
117 writer
.add_document(name
=name
, **row_key
)
119 speller_entries
.append(name
)
121 for extra_key_func
in extra_keys
.get(cls
, []):
122 extra_key
= extra_key_func(row
)
123 writer
.add_document(name
=extra_key
, **row_key
)
128 old__schema
= whoosh
.spelling
.SpellChecker
._schema
129 def new__schema(self
):
130 schema
= old__schema(self
)
131 schema
.add('dummy', whoosh
.fields
.TEXT
)
133 whoosh
.spelling
.SpellChecker
._schema
= new__schema
135 # Construct and populate a spell-checker index. Quicker to do it all
136 # at once, as every call to add_* does a commit(), and those seem to be
138 speller
= whoosh
.spelling
.SpellChecker(index
.storage
, indexname
='spelling')
139 # WARNING: HERE BE DRAGONS
140 # whoosh.spelling refuses to index things that don't look like words.
141 # Unfortunately, this doesn't work so well for Pokémon (Mr. Mime,
142 # Porygon-Z, etc.), and attempts to work around it lead to further
144 # The below is copied from SpellChecker.add_scored_words without the check
145 # for isalpha(). XXX get whoosh patched to make this unnecessary!
146 writer
= speller
.index(create
=True).writer()
147 for word
in speller_entries
:
148 fields
= {"word": word
, "score": 1}
149 for size
in xrange(speller
.mingram
, speller
.maxgram
+ 1):
150 nga
= whoosh
.analysis
.NgramAnalyzer(size
)
151 gramlist
= [t
.text
for t
in nga(word
)]
152 if len(gramlist
) > 0:
153 fields
["start%s" % size
] = gramlist
[0]
154 fields
["end%s" % size
] = gramlist
[-1]
155 fields
["gram%s" % size
] = " ".join(gramlist
)
156 writer
.add_document(**fields
)
160 return index
, speller
163 def lookup(name
, session
=None, exact_only
=False):
164 """Attempts to find some sort of object, given a database session and name.
166 Returns (objects, exact) where `objects` is a list of database objects, and
167 `exact` is True iff the given name matched the returned objects exactly.
169 This function ONLY does fuzzy matching if there are no exact matches.
171 Formes are not returned; "Shaymin" will return only grass Shaymin.
173 Currently recognizes:
174 - Pokémon names: "Eevee"
177 Name of the thing to look for.
180 A database session to use for retrieving objects. As with get_index,
181 if this is not provided, a connection to the default database will be
185 If True, only exact matches are returned. If set to False (the
186 default), and the provided `name` doesn't match anything exactly,
187 spelling correction will be attempted.
193 index
, speller
= open_index()
197 # Look for exact name. A Term object does an exact match, so we don't have
198 # to worry about a query parser tripping on weird characters in the input
199 searcher
= index
.searcher()
200 query
= whoosh
.query
.Term('name', name
.lower())
201 results
= searcher
.search(query
)
204 # Look for some fuzzy matches
209 for suggestion
in speller
.suggest(name
, 3):
210 query
= whoosh
.query
.Term('name', suggestion
)
211 results
.extend(searcher
.search(query
))
213 # Convert results to db objects
216 for result
in results
:
218 seen_key
= result
['table'], result
['row_id']
221 seen
[seen_key
] = True
223 cls
= indexed_tables
[result
['table']]
224 obj
= session
.query(cls
).get(result
['row_id'])
227 return objects
, exact