4 from sqlalchemy
.sql
import func
6 from whoosh
.qparser
import QueryParser
9 import pokedex
.db
.tables
as tables
11 # Dictionary of table name => table class.
12 # Need the table name so we can get the class from the table name after we
13 # retrieve something from the index
22 indexed_tables
[cls
.__tablename__
] = cls
24 # Dictionary of extra keys to file types of objects under, e.g. Pokémon can
25 # also be looked up purely by number
28 lambda row
: u
"move %d" % row
.id,
31 lambda row
: unicode(row
.id),
36 def get_index(session
):
37 """Returns (index, speller).
39 Creates an index if one does not exist.
43 return index_bits
['index'], index_bits
['speller']
45 store
= whoosh
.store
.RamStorage()
46 schema
= whoosh
.fields
.Schema(
47 name
=whoosh
.fields
.ID(stored
=True),
48 table
=whoosh
.fields
.STORED
,
49 row_id
=whoosh
.fields
.STORED
,
50 language_id
=whoosh
.fields
.STORED
,
53 # Construct a straight lookup index
54 index
= whoosh
.index
.Index(store
, schema
=schema
, create
=True)
55 writer
= index
.writer()
57 # Index every name in all our tables of interest
59 for cls
in indexed_tables
.values():
60 q
= session
.query(cls
)
62 # Only index base Pokémon formes
63 if hasattr(cls
, 'forme_base_pokemon_id'):
64 q
= q
.filter_by(forme_base_pokemon_id
=None)
66 for row
in q
.yield_per(5):
67 row_key
= dict(table
=cls
.__tablename__
, row_id
=row
.id)
69 # Spelling index only indexes strings of letters, alas, so we
70 # reduce every name to this to make the index work. However, exact
71 # matches are not returned, so e.g. 'nidoran' would neither match
72 # exactly nor fuzzy-match. Solution: add the spelling-munged name
73 # as a regular index row too.
74 name
= row
.name
.lower()
75 writer
.add_document(name
=name
, **row_key
)
77 speller_entries
.append(name
)
79 for extra_key_func
in extra_keys
.get(cls
, []):
80 extra_key
= extra_key_func(row
)
81 writer
.add_document(name
=extra_key
, **row_key
)
85 # Construct and populate a spell-checker index. Quicker to do it all
86 # at once, as every call to add_* does a commit(), and those seem to be
88 speller
= whoosh
.spelling
.SpellChecker(index
.storage
)
89 # WARNING: HERE BE DRAGONS
90 # whoosh.spelling refuses to index things that don't look like words.
91 # Unfortunately, this doesn't work so well for Pokémon (Mr. Mime,
92 # Porygon-Z, etc.), and attempts to work around it lead to further
94 # The below is copied from SpellChecker.add_scored_words without the check
95 # for isalpha(). XXX get whoosh patched to make this unnecessary!
96 writer
= whoosh
.writing
.IndexWriter(speller
.index())
97 for word
in speller_entries
:
98 fields
= {"word": word
, "score": 1}
99 for size
in xrange(speller
.mingram
, speller
.maxgram
+ 1):
100 nga
= whoosh
.analysis
.NgramAnalyzer(size
)
101 gramlist
= [t
.text
for t
in nga(word
)]
102 if len(gramlist
) > 0:
103 fields
["start%s" % size
] = gramlist
[0]
104 fields
["end%s" % size
] = gramlist
[-1]
105 fields
["gram%s" % size
] = " ".join(gramlist
)
106 writer
.add_document(**fields
)
110 index_bits
['index'] = index
111 index_bits
['speller'] = speller
112 index_bits
['store'] = store
113 return index_bits
['index'], index_bits
['speller']
115 def lookup(session
, name
, exact_only
=False):
116 """Attempts to find some sort of object, given a database session and name.
118 Returns (objects, exact) where `objects` is a list of database objects, and
119 `exact` is True iff the given name matched the returned objects exactly.
121 This function ONLY does fuzzy matching if there are no exact matches.
123 Formes are not returned; "Shaymin" will return only grass Shaymin.
125 Currently recognizes:
126 - Pokémon names: "Eevee"
131 index
, speller
= get_index(session
)
133 # Look for exact name. A Term object does an exact match, so we don't have
134 # to worry about a query parser tripping on weird characters in the input
135 searcher
= index
.searcher()
136 query
= whoosh
.query
.Term('name', name
.lower())
137 results
= searcher
.search(query
)
140 # Look for some fuzzy matches
145 for suggestion
in speller
.suggest(name
, 3):
146 query
= whoosh
.query
.Term('name', suggestion
)
147 results
.extend(searcher
.search(query
))
149 # Convert results to db objects
152 for result
in results
:
154 seen_key
= result
['table'], result
['row_id']
157 seen
[seen_key
] = True
159 cls
= indexed_tables
[result
['table']]
160 obj
= session
.query(cls
).get(result
['row_id'])
163 return objects
, exact