from collections import namedtuple
import os, os.path
import pkg_resources
+import random
import re
import shutil
import pokedex.db.tables as tables
from pokedex.roomaji import romanize
-__all__ = ['open_index', 'lookup']
+__all__ = ['open_index', 'lookup', 'random_lookup']
INTERMEDIATE_LOOKUP_RESULTS = 25
MAX_LOOKUP_RESULTS = 10
LookupResult = namedtuple('LookupResult',
['object', 'name', 'language', 'exact'])
+
+def _parse_table_name(name):
+ """Takes a singular table name, table name, or table object and returns the
+ table name.
+
+ Returns None for a bogus name.
+ """
+ if hasattr(name, '__tablename__'):
+ return getattr(name, '__tablename__')
+ elif name in indexed_tables:
+ return name
+ elif name + 's' in indexed_tables:
+ return name + 's'
+ else:
+ # Bogus. Be nice and return dummy
+ return None
+
+def _whoosh_records_to_results(records, session, exact=True):
+ """Converts a list of whoosh's indexed records to LookupResult tuples
+ containing database objects.
+ """
+ # XXX this 'exact' thing is getting kinda leaky. would like a better way
+ # to handle it, since only lookup() cares about fuzzy results
+ seen = {}
+ results = []
+ for record in records:
+ # Skip dupes
+ seen_key = record['table'], record['row_id']
+ if seen_key in seen:
+ continue
+ seen[seen_key] = True
+
+ cls = indexed_tables[record['table']]
+ obj = session.query(cls).get(record['row_id'])
+
+ results.append(LookupResult(object=obj,
+ name=record['display_name'],
+ language=record['language'],
+ exact=exact))
+
+ return results
+
+
def lookup(input, valid_types=[], session=None, indices=None, exact_only=False):
"""Attempts to find some sort of object, given a database session and name.
This function currently ONLY does fuzzy matching if there are no exact
matches.
- Formes are not returned; "Shaymin" will return only grass Shaymin.
+ Formes are not returned unless requested; "Shaymin" will return only grass
+ Shaymin.
+
+ Extraneous whitespace is removed with extreme prejudice.
Recognizes:
- Names: "Eevee", "Surf", "Run Away", "Payapa Berry", etc.
else:
index, speller = open_index()
- name = unicode(input).lower()
+ name = unicode(input).strip().lower()
exact = True
form = None
# Remove any type prefix (pokemon:133) before constructing a query
if ':' in name:
- prefix_chunk, name = name.split(':', 2)
- prefixes = prefix_chunk.split(',')
+ prefix_chunk, name = name.split(':', 1)
+ name = name.strip()
+
if not valid_types:
# Only use types from the query string if none were explicitly
# provided
- valid_types = prefixes
+ prefixes = prefix_chunk.split(',')
+ valid_types = [_.strip() for _ in prefixes]
+
+ # Random lookup
+ if name == 'random':
+ return random_lookup(indices=(index, speller),
+ session=session,
+ valid_types=valid_types)
# Do different things depending what the query looks like
# Note: Term objects do an exact match, so we don't have to worry about a
# If there's a space in the input, this might be a form
if ' ' in name:
- form, formless_name = name.split(' ', 2)
+ form, formless_name = name.split(' ', 1)
form_query = whoosh.query.Term(u'name', formless_name) \
& whoosh.query.Term(u'forme_name', form)
query = query | form_query
### Filter by type of object
type_terms = []
for valid_type in valid_types:
- if hasattr(valid_type, '__tablename__'):
- table_name = getattr(valid_type, '__tablename__')
- elif valid_type in indexed_tables:
- table_name = valid_type
- elif valid_type + 's' in indexed_tables:
- table_name = valid_type + 's'
- else:
- # Bogus. Be nice and ignore it
- continue
-
+ table_name = _parse_table_name(valid_type)
type_terms.append(whoosh.query.Term(u'table', table_name))
if type_terms:
results.extend(searcher.search(query))
### Convert results to db objects
- objects = []
- seen = {}
- for result in results:
- # Skip dupe results
- seen_key = result['table'], result['row_id']
- if seen_key in seen:
- continue
- seen[seen_key] = True
-
- cls = indexed_tables[result['table']]
- obj = session.query(cls).get(result['row_id'])
-
- objects.append(LookupResult(object=obj,
- name=result['display_name'],
- language=result['language'],
- exact=exact))
+ objects = _whoosh_records_to_results(results, session, exact=exact)
# Only return up to 10 matches; beyond that, something is wrong.
# We strip out duplicate entries above, so it's remotely possible that we
# give us some padding, and should avoid that problem. Not a big deal if
# we lose the 25th-most-likely match anyway.
return objects[:MAX_LOOKUP_RESULTS]
+
+
+def random_lookup(valid_types=[], session=None, indices=None):
+ """Takes similar arguments as `lookup()`, but returns a random lookup
+ result from one of the provided `valid_types`.
+ """
+
+ tables = []
+ for valid_type in valid_types:
+ table_name = _parse_table_name(valid_type)
+ if table_name:
+ tables.append(indexed_tables[table_name])
+
+ if not tables:
+ tables = indexed_tables.values()
+
+ # Rather than create an array of many hundred items and pick randomly from
+ # it, just pick a number up to the total number of potential items, then
+ # pick randomly from that, and partition the whole range into chunks
+ total = 0
+ partitions = []
+ for table in tables:
+ count = session.query(table).count()
+ total += count
+ partitions.append((table, count))
+
+ n = random.randint(1, total)
+ while n > partitions[0][1]:
+ n -= partitions[0][1]
+ partitions.pop(0)
+
+ return lookup(unicode(n), valid_types=[ partitions[0][0] ],
+ indices=indices, session=session)
+
+def prefix_lookup(prefix, session=None, indices=None):
+ """Returns terms starting with the given exact prefix.
+
+ No special magic is currently done with the name; type prefixes are not
+ recognized.
+
+ `session` and `indices` are treated as with `lookup()`.
+ """
+
+ if not session:
+ session = connect()
+
+ if indices:
+ index, speller = indices
+ else:
+ index, speller = open_index()
+
+ query = whoosh.query.Prefix(u'name', prefix.lower())
+
+ searcher = index.searcher()
+ searcher.weighting = LanguageWeighting()
+ results = searcher.search(query) # XXX , limit=MAX_LOOKUP_RESULTS)
+
+ return _whoosh_records_to_results(results, session)