- # WARNING: HERE BE DRAGONS
- # whoosh.spelling refuses to index things that don't look like words.
- # Unfortunately, this doesn't work so well for Pokémon (Mr. Mime,
- # Porygon-Z, etc.), and attempts to work around it lead to further
- # complications.
- # The below is copied from SpellChecker.add_scored_words without the check
- # for isalpha(). XXX get whoosh patched to make this unnecessary!
- writer = whoosh.writing.IndexWriter(speller.index())
- for word in speller_entries:
- fields = {"word": word, "score": 1}
- for size in xrange(speller.mingram, speller.maxgram + 1):
- nga = whoosh.analysis.NgramAnalyzer(size)
- gramlist = [t.text for t in nga(word)]
- if len(gramlist) > 0:
- fields["start%s" % size] = gramlist[0]
- fields["end%s" % size] = gramlist[-1]
- fields["gram%s" % size] = " ".join(gramlist)
- writer.add_document(**fields)
- writer.commit()
- # end copy-pasta
+ speller.add_scored_words(speller_entries)
+
+ return index, speller
+
+
+class LanguageWeighting(whoosh.scoring.Weighting):
+ """A scoring class that forces otherwise-equal English results to come
+ before foreign results.
+ """
+
+ def score(self, searcher, fieldnum, text, docnum, weight, QTF=1):
+ doc = searcher.stored_fields(docnum)
+ if doc['language'] == None:
+ # English (well, "default"); leave it at 1
+ return weight
+ elif doc['language'] == u'Roomaji':
+ # Give Roomaji a bit of a boost, as it's most likely to be searched
+ return weight * 0.95
+ else:
+ # Everything else can drop down the totem pole
+ return weight * 0.9
+
+rx_is_number = re.compile('^\d+$')
+
+LookupResult = namedtuple('LookupResult',
+ ['object', 'name', 'language', 'iso3166', 'exact'])
+
+def _parse_table_name(name):
+ """Takes a singular table name, table name, or table object and returns the
+ table name.
+
+ Returns None for a bogus name.
+ """
+ if hasattr(name, '__tablename__'):
+ return getattr(name, '__tablename__')
+ elif name in indexed_tables:
+ return name
+ elif name + 's' in indexed_tables:
+ return name + 's'
+ else:
+ # Bogus. Be nice and return dummy
+ return None
+
+def _whoosh_records_to_results(records, session, exact=True):
+ """Converts a list of whoosh's indexed records to LookupResult tuples
+ containing database objects.
+ """
+ # XXX this 'exact' thing is getting kinda leaky. would like a better way
+ # to handle it, since only lookup() cares about fuzzy results
+ seen = {}
+ results = []
+ for record in records:
+ # Skip dupes
+ seen_key = record['table'], record['row_id']
+ if seen_key in seen:
+ continue
+ seen[seen_key] = True
+
+ cls = indexed_tables[record['table']]
+ obj = session.query(cls).get(record['row_id'])
+
+ results.append(LookupResult(object=obj,
+ name=record['display_name'],
+ language=record['language'],
+ iso3166=record['iso3166'],
+ exact=exact))