X-Git-Url: http://git.veekun.com/zzz-pokedex.git/blobdiff_plain/208880f7ade0f42a34f19bbdc4c89e96dea3b6a9..4334a1a641cdc77605d84d366ab2bc1b9b1ab484:/pokedex/lookup.py diff --git a/pokedex/lookup.py b/pokedex/lookup.py index 774df92..8488f21 100644 --- a/pokedex/lookup.py +++ b/pokedex/lookup.py @@ -104,6 +104,7 @@ class PokedexLookup(object): tables.Move, tables.Nature, tables.Pokemon, + tables.PokemonForm, tables.Type, ) ) @@ -212,34 +213,33 @@ class PokedexLookup(object): # Add the basic English name to the index if cls == tables.Pokemon: - # Pokémon need their form name added - # XXX kinda kludgy - add(row.full_name, None, u'en', u'us') - - # If this is a default form, ALSO add the unadorned name, - # so 'Deoxys' alone will still do the right thing - if row.forme_name and not row.forme_base_pokemon_id: - add(row.name, None, u'en', u'us') - else: - add(row.name, None, u'en', u'us') + # Don't re-add alternate forms of the same Pokémon; they'll + # be added as Pokémon forms instead + if not row.is_base_form: + continue + elif cls == tables.PokemonForm: + if row.name: + add(row.pokemon_name, None, u'en', u'us') + continue # Some things also have other languages' names # XXX other language form names..? - for foreign_name in getattr(row, 'foreign_names', []): - moonspeak = foreign_name.name - if row.name == moonspeak: - # Don't add the English name again as a different + seen = set() + for language, name in getattr(row, 'name_map', {}).items(): + if name in seen: + # Don't add the name again as a different # language; no point and it makes spell results # confusing continue + seen.add(name) - add(moonspeak, foreign_name.language.name, - foreign_name.language.iso639, - foreign_name.language.iso3166) + add(name, language.name, + language.iso639, + language.iso3166) # Add Roomaji too - if foreign_name.language.name == 'Japanese': - roomaji = romanize(foreign_name.name) + if language.identifier == 'ja': + roomaji = romanize(name) add(roomaji, u'Roomaji', u'ja', u'jp') writer.commit() @@ -296,7 +296,11 @@ class PokedexLookup(object): name = name.strip() prefixes = prefix_chunk.split(',') - user_valid_types = [_.strip() for _ in prefixes] + user_valid_types = [] + for prefix in prefixes: + prefix = prefix.strip() + if prefix: + user_valid_types.append(prefix) # Merge the valid types together. Only types that appear in BOTH lists # may be used. @@ -546,8 +550,10 @@ class PokedexLookup(object): table_names = [] for valid_type in valid_types: table_name = self._parse_table_name(valid_type) - # Skip anything not recognized. Could be, say, a language code - if table_name: + # Skip anything not recognized. Could be, say, a language code. + # XXX The vast majority of Pokémon forms are unnamed and unindexed, + # which can produce blank results. So skip them too for now. + if table_name and table_name != 'pokemon_forms': table_names.append(table_name) if not table_names: @@ -555,26 +561,20 @@ class PokedexLookup(object): # were valid, but this function is guaranteed to return # *something*, so it politely selects from the entire index instead table_names = self.indexed_tables.keys() - - # Rather than create an array of many hundred items and pick randomly - # from it, just pick a number up to the total number of potential - # items, then pick randomly from that, and partition the whole range - # into chunks. This also avoids the slight problem that the index - # contains more rows (for languages) for some items than others. - # XXX ought to cache this (in the index?) if possible - total = 0 - partitions = [] - for table_name in table_names: - count = self.session.query(self.indexed_tables[table_name]).count() - total += count - partitions.append((table_name, count)) - - n = random.randint(1, total) - while n > partitions[0][1]: - n -= partitions[0][1] - partitions.pop(0) - - return self.lookup(unicode(n), valid_types=[ partitions[0][0] ]) + table_names.remove('pokemon_forms') + + # Pick a random table, then pick a random item from it. Small tables + # like Type will have an unnatural bias. The alternative is that a + # simple search for "random" will do some eight queries, counting the + # rows in every single indexed table, and that's awful. + # XXX Can we improve on this, reasonably? + table_name = random.choice(table_names) + count = self.session.query(self.indexed_tables[table_name]).count() + id, = self.session.query(self.indexed_tables[table_name].id) \ + .offset(random.randint(0, count - 1)) \ + .first() + + return self.lookup(unicode(id), valid_types=[table_name]) def prefix_lookup(self, prefix, valid_types=[]): """Returns terms starting with the given exact prefix.