From: Eevee Date: Sun, 3 Apr 2011 08:56:27 +0000 (-0700) Subject: Random lookup algorithm is now more naive, but less broken. X-Git-Tag: veekun-promotions/2011041101~11 X-Git-Url: http://git.veekun.com/zzz-pokedex.git/commitdiff_plain/1bbf8a18cdb406ffe6c735e15a0cc9159d085c78?hp=2b931d7a9cd2782246bb739bcff66e230bb706bd Random lookup algorithm is now more naive, but less broken. --- diff --git a/pokedex/lookup.py b/pokedex/lookup.py index a0946cf..8488f21 100644 --- a/pokedex/lookup.py +++ b/pokedex/lookup.py @@ -563,25 +563,18 @@ class PokedexLookup(object): table_names = self.indexed_tables.keys() table_names.remove('pokemon_forms') - # Rather than create an array of many hundred items and pick randomly - # from it, just pick a number up to the total number of potential - # items, then pick randomly from that, and partition the whole range - # into chunks. This also avoids the slight problem that the index - # contains more rows (for languages) for some items than others. - # XXX ought to cache this (in the index?) if possible - total = 0 - partitions = [] - for table_name in table_names: - count = self.session.query(self.indexed_tables[table_name]).count() - total += count - partitions.append((table_name, count)) - - n = random.randint(1, total) - while n > partitions[0][1]: - n -= partitions[0][1] - partitions.pop(0) - - return self.lookup(unicode(n), valid_types=[ partitions[0][0] ]) + # Pick a random table, then pick a random item from it. Small tables + # like Type will have an unnatural bias. The alternative is that a + # simple search for "random" will do some eight queries, counting the + # rows in every single indexed table, and that's awful. + # XXX Can we improve on this, reasonably? + table_name = random.choice(table_names) + count = self.session.query(self.indexed_tables[table_name]).count() + id, = self.session.query(self.indexed_tables[table_name].id) \ + .offset(random.randint(0, count - 1)) \ + .first() + + return self.lookup(unicode(id), valid_types=[table_name]) def prefix_lookup(self, prefix, valid_types=[]): """Returns terms starting with the given exact prefix.