X-Git-Url: http://git.veekun.com/zzz-pokedex.git/blobdiff_plain/33d1129fdb78bcf2cec136230870feca77b85517..d2880d4f03e6192e08f6a9eb70c485027de0fe8a:/scripts/disambiguate-location-identifiers.py diff --git a/scripts/disambiguate-location-identifiers.py b/scripts/disambiguate-location-identifiers.py new file mode 100644 index 0000000..fdc0ca7 --- /dev/null +++ b/scripts/disambiguate-location-identifiers.py @@ -0,0 +1,80 @@ +# Encoding: UTF-8 +"""Automatically disambiguate location identifiers + +This is an unmaintained one-shot script, only included in the repo for reference. + + +Disambiguates identifiers that aren't unique, Routes and Sea Routes, and +generic names like 'villa' or 'game corner' that could appear in future +generations again. + +Does this by prepending the region name, and if that isn't enough, appends +numbers. +""" + +import sys +import re +from collections import defaultdict + +from pokedex.db import connect, tables + +ambiguous_re = re.compile(r'^(sea-)?route-\d+$') + +ambiguous_set = set('foreign-building game-corner global-terminal lighthouse ' + 'restaurant flower-shop cycle-shop cafe shopping-mall villa'.split()) + +def main(*argv): + session = connect() + + location_dict = defaultdict(list) + for location in session.query(tables.Location).order_by(tables.Location.id): + location_dict[location.identifier].append(location) + + changes = False + for identifier, locations in sorted(location_dict.items()): + disambiguate = any(( + len(locations) > 1, + ambiguous_re.match(identifier), + identifier in ambiguous_set, + )) + print len(locations), ' *'[disambiguate], identifier, + if disambiguate: + changes = True + print u'→'.encode('utf-8'), + by_region = defaultdict(list) + for location in locations: + if location.region: + by_region[location.region.identifier].append(location) + else: + by_region[None].append(location) + for region_identifier, region_locations in by_region.items(): + if region_identifier: + new_identifier = '%s-%s' % (region_identifier, identifier) + else: + # No change + new_identifier = identifier + if len(region_locations) == 1: + location = region_locations[0] + # The region was enough + print new_identifier, + location.identifier = new_identifier + else: + # Need to number the locations :( + for i, location in enumerate(region_locations, start=1): + numbered_identifier = '%s-%s' % (new_identifier, i) + print numbered_identifier, + location.identifier = numbered_identifier + print + + if changes: + if argv and argv[0] == '--commit': + session.commit() + print 'Committed' + else: + print 'Run with --commit to commit changes' + else: + print 'No changes needed' + + +if __name__ == '__main__': + main(*sys.argv[1:]) \ No newline at end of file