X-Git-Url: http://git.veekun.com/zzz-pokedex.git/blobdiff_plain/3b7db63323a04c7b2e91ffdd856f8e61f1e72b02..a1cdf50e2d333aee2267af6987dc1cf468fbc160:/pokedex/lookup.py?ds=sidebyside diff --git a/pokedex/lookup.py b/pokedex/lookup.py index 8c90950..8488f21 100644 --- a/pokedex/lookup.py +++ b/pokedex/lookup.py @@ -222,25 +222,24 @@ class PokedexLookup(object): add(row.pokemon_name, None, u'en', u'us') continue - add(row.name, None, u'en', u'us') - # Some things also have other languages' names # XXX other language form names..? - for foreign_name in getattr(row, 'foreign_names', []): - moonspeak = foreign_name.name - if row.name == moonspeak: - # Don't add the English name again as a different + seen = set() + for language, name in getattr(row, 'name_map', {}).items(): + if name in seen: + # Don't add the name again as a different # language; no point and it makes spell results # confusing continue + seen.add(name) - add(moonspeak, foreign_name.language.name, - foreign_name.language.iso639, - foreign_name.language.iso3166) + add(name, language.name, + language.iso639, + language.iso3166) # Add Roomaji too - if foreign_name.language.name == 'Japanese': - roomaji = romanize(foreign_name.name) + if language.identifier == 'ja': + roomaji = romanize(name) add(roomaji, u'Roomaji', u'ja', u'jp') writer.commit() @@ -564,25 +563,18 @@ class PokedexLookup(object): table_names = self.indexed_tables.keys() table_names.remove('pokemon_forms') - # Rather than create an array of many hundred items and pick randomly - # from it, just pick a number up to the total number of potential - # items, then pick randomly from that, and partition the whole range - # into chunks. This also avoids the slight problem that the index - # contains more rows (for languages) for some items than others. - # XXX ought to cache this (in the index?) if possible - total = 0 - partitions = [] - for table_name in table_names: - count = self.session.query(self.indexed_tables[table_name]).count() - total += count - partitions.append((table_name, count)) - - n = random.randint(1, total) - while n > partitions[0][1]: - n -= partitions[0][1] - partitions.pop(0) - - return self.lookup(unicode(n), valid_types=[ partitions[0][0] ]) + # Pick a random table, then pick a random item from it. Small tables + # like Type will have an unnatural bias. The alternative is that a + # simple search for "random" will do some eight queries, counting the + # rows in every single indexed table, and that's awful. + # XXX Can we improve on this, reasonably? + table_name = random.choice(table_names) + count = self.session.query(self.indexed_tables[table_name]).count() + id, = self.session.query(self.indexed_tables[table_name].id) \ + .offset(random.randint(0, count - 1)) \ + .first() + + return self.lookup(unicode(id), valid_types=[table_name]) def prefix_lookup(self, prefix, valid_types=[]): """Returns terms starting with the given exact prefix.