Automatically disambiguate location identifiers
[zzz-pokedex.git] / scripts / disambiguate-location-identifiers.py
1 # Encoding: UTF-8
2 """Automatically disambiguate location identifiers
3
4 This is an unmaintained one-shot script, only included in the repo for reference.
5
6
7 Disambiguates identifiers that aren't unique, Routes and Sea Routes, and
8 generic names like 'villa' or 'game corner' that could appear in future
9 generations again.
10
11 Does this by prepending the region name, and if that isn't enough, appends
12 numbers.
13 """
14
15 import sys
16 import re
17 from collections import defaultdict
18
19 from pokedex.db import connect, tables
20
21 ambiguous_re = re.compile(r'^(sea-)?route-\d+$')
22
23 ambiguous_set = set('foreign-building game-corner global-terminal lighthouse '
24 'restaurant flower-shop cycle-shop cafe shopping-mall villa'.split())
25
26 def main(*argv):
27 session = connect()
28
29 location_dict = defaultdict(list)
30 for location in session.query(tables.Location).order_by(tables.Location.id):
31 location_dict[location.identifier].append(location)
32
33 changes = False
34 for identifier, locations in sorted(location_dict.items()):
35 disambiguate = any((
36 len(locations) > 1,
37 ambiguous_re.match(identifier),
38 identifier in ambiguous_set,
39 ))
40 print len(locations), ' *'[disambiguate], identifier,
41 if disambiguate:
42 changes = True
43 print u'→'.encode('utf-8'),
44 by_region = defaultdict(list)
45 for location in locations:
46 if location.region:
47 by_region[location.region.identifier].append(location)
48 else:
49 by_region[None].append(location)
50 for region_identifier, region_locations in by_region.items():
51 if region_identifier:
52 new_identifier = '%s-%s' % (region_identifier, identifier)
53 else:
54 # No change
55 new_identifier = identifier
56 if len(region_locations) == 1:
57 location = region_locations[0]
58 # The region was enough
59 print new_identifier,
60 location.identifier = new_identifier
61 else:
62 # Need to number the locations :(
63 for i, location in enumerate(region_locations, start=1):
64 numbered_identifier = '%s-%s' % (new_identifier, i)
65 print numbered_identifier,
66 location.identifier = numbered_identifier
67 print
68
69 if changes:
70 if argv and argv[0] == '--commit':
71 session.commit()
72 print 'Committed'
73 else:
74 print 'Run with --commit to commit changes'
75 else:
76 print 'No changes needed'
77
78
79 if __name__ == '__main__':
80 main(*sys.argv[1:])