5 from sqlalchemy
.sql
import func
7 import whoosh
.filedb
.filestore
8 import whoosh
.filedb
.fileindex
10 from whoosh
.qparser
import QueryParser
11 import whoosh
.spelling
13 import pokedex
.db
.tables
as tables
15 # Dictionary of table name => table class.
16 # Need the table name so we can get the class from the table name after we
17 # retrieve something from the index
26 indexed_tables
[cls
.__tablename__
] = cls
28 # Dictionary of extra keys to file types of objects under, e.g. Pokémon can
29 # also be looked up purely by number
32 lambda row
: u
"move %d" % row
.id,
35 lambda row
: unicode(row
.id),
40 def get_index(session
):
41 """Returns (index, speller).
43 Creates an index if one does not exist.
47 return index_bits
['index'], index_bits
['speller']
49 store
= whoosh
.filedb
.filestore
.RamStorage()
50 schema
= whoosh
.fields
.Schema(
51 name
=whoosh
.fields
.ID(stored
=True),
52 table
=whoosh
.fields
.STORED
,
53 row_id
=whoosh
.fields
.STORED
,
54 language
=whoosh
.fields
.STORED
,
56 # Whoosh 0.2 explodes when using a file-stored schema with no TEXT
58 dummy
=whoosh
.fields
.TEXT
,
61 index_directory
= '/var/tmp/pokedex'
62 if not os
.path
.exists(index_directory
):
63 os
.mkdir(index_directory
)
64 index
= whoosh
.index
.create_in(index_directory
, schema
=schema
)
65 writer
= index
.writer()
67 # Index every name in all our tables of interest
69 for cls
in indexed_tables
.values():
70 q
= session
.query(cls
)
72 # Only index base Pokémon formes
73 if hasattr(cls
, 'forme_base_pokemon_id'):
74 q
= q
.filter_by(forme_base_pokemon_id
=None)
76 for row
in q
.yield_per(5):
77 row_key
= dict(table
=cls
.__tablename__
, row_id
=row
.id)
79 # Spelling index only indexes strings of letters, alas, so we
80 # reduce every name to this to make the index work. However, exact
81 # matches are not returned, so e.g. 'nidoran' would neither match
82 # exactly nor fuzzy-match. Solution: add the spelling-munged name
83 # as a regular index row too.
84 name
= row
.name
.lower()
85 writer
.add_document(name
=name
, **row_key
)
87 speller_entries
.append(name
)
89 for extra_key_func
in extra_keys
.get(cls
, []):
90 extra_key
= extra_key_func(row
)
91 writer
.add_document(name
=extra_key
, **row_key
)
96 old__schema
= whoosh
.spelling
.SpellChecker
._schema
97 def new__schema(self
):
98 schema
= old__schema(self
)
99 schema
.add('dummy', whoosh
.fields
.TEXT
)
101 whoosh
.spelling
.SpellChecker
._schema
= new__schema
103 # Construct and populate a spell-checker index. Quicker to do it all
104 # at once, as every call to add_* does a commit(), and those seem to be
106 speller
= whoosh
.spelling
.SpellChecker(index
.storage
)
107 # WARNING: HERE BE DRAGONS
108 # whoosh.spelling refuses to index things that don't look like words.
109 # Unfortunately, this doesn't work so well for Pokémon (Mr. Mime,
110 # Porygon-Z, etc.), and attempts to work around it lead to further
112 # The below is copied from SpellChecker.add_scored_words without the check
113 # for isalpha(). XXX get whoosh patched to make this unnecessary!
114 writer
= speller
.index(create
=True).writer()
115 for word
in speller_entries
:
116 fields
= {"word": word
, "score": 1}
117 for size
in xrange(speller
.mingram
, speller
.maxgram
+ 1):
118 nga
= whoosh
.analysis
.NgramAnalyzer(size
)
119 gramlist
= [t
.text
for t
in nga(word
)]
120 if len(gramlist
) > 0:
121 fields
["start%s" % size
] = gramlist
[0]
122 fields
["end%s" % size
] = gramlist
[-1]
123 fields
["gram%s" % size
] = " ".join(gramlist
)
124 writer
.add_document(**fields
)
128 index_bits
['index'] = index
129 index_bits
['speller'] = speller
130 index_bits
['store'] = store
131 return index_bits
['index'], index_bits
['speller']
133 def lookup(session
, name
, exact_only
=False):
134 """Attempts to find some sort of object, given a database session and name.
136 Returns (objects, exact) where `objects` is a list of database objects, and
137 `exact` is True iff the given name matched the returned objects exactly.
139 This function ONLY does fuzzy matching if there are no exact matches.
141 Formes are not returned; "Shaymin" will return only grass Shaymin.
143 Currently recognizes:
144 - Pokémon names: "Eevee"
149 index
, speller
= get_index(session
)
151 # Look for exact name. A Term object does an exact match, so we don't have
152 # to worry about a query parser tripping on weird characters in the input
153 searcher
= index
.searcher()
154 query
= whoosh
.query
.Term('name', name
.lower())
155 results
= searcher
.search(query
)
158 # Look for some fuzzy matches
163 for suggestion
in speller
.suggest(name
, 3):
164 query
= whoosh
.query
.Term('name', suggestion
)
165 results
.extend(searcher
.search(query
))
167 # Convert results to db objects
170 for result
in results
:
172 seen_key
= result
['table'], result
['row_id']
175 seen
[seen_key
] = True
177 cls
= indexed_tables
[result
['table']]
178 obj
= session
.query(cls
).get(result
['row_id'])
181 return objects
, exact