Added support for lookup by other language name. #15

author Eevee <git@veekun.com>

Sat, 22 Aug 2009 08:13:34 +0000 (01:13 -0700)

committer Eevee <git@veekun.com>

Sat, 22 Aug 2009 08:13:34 +0000 (01:13 -0700)
author Eevee <git@veekun.com>
Sat, 22 Aug 2009 08:13:34 +0000 (01:13 -0700)
committer Eevee <git@veekun.com>
Sat, 22 Aug 2009 08:13:34 +0000 (01:13 -0700)
diff --git a/pokedex/__init__.py b/pokedex/__init__.py

index 4af3fa0..73391dc 100644 (file)
--- a/pokedex/__init__.py
+++ b/pokedex/__init__.py
@@ -66,8 +66,12 @@ def command_lookup(name):
      else:
          print "Fuzzy-matched:"
  
-    for object, language, exact in results:
-        print object.__tablename__, object.name, language
+    for result in results:
+        print "%s: %s" % (result.object.__tablename__, result.object.name),
+        if result.language:
+            print "(%s in %s)" % (result.name, result.language)
+        else:
+            print
  
  
  def command_help():
diff --git a/pokedex/lookup.py b/pokedex/lookup.py

index 0bf1c18..2653b9a 100644 (file)
--- a/pokedex/lookup.py
+++ b/pokedex/lookup.py
@@ -95,6 +95,10 @@ def open_index(directory=None, session=None, recreate=False):
      writer = index.writer()
  
      # Index every name in all our tables of interest
+    # speller_entries becomes a list of (word, score) tuples; the score is 2
+    # for English names, 1.5 for Roomaji, and 1 for everything else.  I think
+    # this biases the results in the direction most people expect, especially
+    # when e.g. German names are very similar to English names
      speller_entries = []
      for cls in indexed_tables.values():
          q = session.query(cls)
@@ -108,28 +112,31 @@ def open_index(directory=None, session=None, recreate=False):
  
              name = row.name.lower()
              writer.add_document(name=name, **row_key)
-            speller_entries.append(name)
+            speller_entries.append((name, 1))
  
              for extra_key_func in extra_keys.get(cls, []):
                  extra_key = extra_key_func(row)
                  writer.add_document(name=extra_key, **row_key)
  
              # Pokemon also get other languages
-            if cls == tables.Pokemon:
-                for foreign_name in row.foreign_names:
-                    name = foreign_name.name.lower()
-                    writer.add_document(name=name,
-                                        language=foreign_name.language.name,
+            for foreign_name in getattr(row, 'foreign_names', []):
+                moonspeak = foreign_name.name.lower()
+                if name == moonspeak:
+                    # Don't add the English name again as a different language;
+                    # no point and it makes spell results confusing
+                    continue
+
+                writer.add_document(name=moonspeak,
+                                    language=foreign_name.language.name,
+                                    **row_key)
+                speller_entries.append((moonspeak, 3))
+
+                # Add Roomaji too
+                if foreign_name.language.name == 'Japanese':
+                    roomaji = romanize(foreign_name.name).lower()
+                    writer.add_document(name=roomaji, language='Roomaji',
                                          **row_key)
-                    speller_entries.append(name)
-
-                    if foreign_name.language.name == 'Japanese':
-                        # Add Roomaji too
-                        roomaji = romanize(foreign_name.name).lower()
-                        writer.add_document(name=roomaji,
-                                            language='Roomaji',
-                                            **row_key)
-                        speller_entries.append(roomaji)
+                    speller_entries.append((roomaji, 8))
  
  
      writer.commit()
@@ -138,18 +145,20 @@ def open_index(directory=None, session=None, recreate=False):
      # at once, as every call to add_* does a commit(), and those seem to be
      # expensive
      speller = whoosh.spelling.SpellChecker(index.storage)
-    speller.add_words(speller_entries)
+    speller.add_scored_words(speller_entries)
  
      return index, speller
  
  
-LookupResult = namedtuple('LookupResult', ['object', 'language', 'exact'])
+LookupResult = namedtuple('LookupResult',
+                          ['object', 'name', 'language', 'exact'])
  def lookup(name, session=None, indices=None, exact_only=False):
      """Attempts to find some sort of object, given a database session and name.
  
-    Returns a list of named (object, language, exact) tuples.  `object` is a
-    database object, `language` is the name of the language in which the name
-    was found, and `exact` is True iff this was an exact match.
+    Returns a list of named (object, name, language, exact) tuples.  `object`
+    is a database object, `name` is the name under which the object was found,
+    `language` is the name of the language in which the name was found, and
+    `exact` is True iff this was an exact match.
  
      This function currently ONLY does fuzzy matching if there are no exact
      matches.
@@ -209,6 +218,12 @@ def lookup(name, session=None, indices=None, exact_only=False):
      seen = {}
      for result in results:
          # Skip dupe results
+        # Note!  The speller prefers English names, but the query does not.  So
+        # "latias" comes over "ratiasu".  "latias" matches only the English
+        # row, comes out first, and all is well.
+        # However!  The speller could then return "foo" which happens to be the
+        # name for two different things in different languages, and the
+        # non-English one could appear preferred.  This is not very likely.
          seen_key = result['table'], result['row_id']
          if seen_key in seen:
              continue
@@ -216,6 +231,9 @@ def lookup(name, session=None, indices=None, exact_only=False):
  
          cls = indexed_tables[result['table']]
          obj = session.query(cls).get(result['row_id'])
-        objects.append(LookupResult(obj, result['language'], exact))
+        objects.append(LookupResult(object=obj,
+                                    name=result['name'],
+                                    language=result['language'],
+                                    exact=exact))
  
-    return objects
+    return objects[:5]
author	Eevee <git@veekun.com>
	Sat, 22 Aug 2009 08:13:34 +0000 (01:13 -0700)
committer	Eevee <git@veekun.com>
	Sat, 22 Aug 2009 08:13:34 +0000 (01:13 -0700)
pokedex/__init__.py		patch \| blob \| history
pokedex/lookup.py		patch \| blob \| history