From: Petr Viktorin Date: Sat, 12 Mar 2011 14:07:14 +0000 (+0200) Subject: Fix up the migration X-Git-Tag: veekun-promotions/2011041101~35^2~11 X-Git-Url: http://git.veekun.com/zzz-pokedex.git/commitdiff_plain/b805f963c65196ee9d5ad222974635066ad4ee43?ds=sidebyside Fix up the migration The script got some things wrong; fix those up manually. Also remove the migration script, as it won't work any more. --- diff --git a/pokedex/data/csv/version_texts.csv b/pokedex/data/csv/version_texts.csv index 6ad48af..518a11c 100644 --- a/pokedex/data/csv/version_texts.csv +++ b/pokedex/data/csv/version_texts.csv @@ -15,5 +15,5 @@ version_id,language_id,name 14,9,Platinum 15,9,HeartGold 16,9,SoulSilver -17,1,Black -18,1,White +17,9,Black +18,9,White diff --git a/scripts/migration-i18n.py b/scripts/migration-i18n.py deleted file mode 100644 index 4c1f99c..0000000 --- a/scripts/migration-i18n.py +++ /dev/null @@ -1,272 +0,0 @@ -# Encoding: UTF-8 - -"""Moves/transforms values in CSVs in an ad-hoc way, based mainly on column name - -Auto-creates identifiers from names -Auto-creates names from identifiers -Copies IDs for foreign keys -Creates autoincrement-style IDs when missing -Sets text language to 9 (en), except when it sets to 1 (jp) - -And looks good doing it! -""" - -import csv -import re -import os -from StringIO import StringIO -from collections import namedtuple, defaultdict - -from sqlalchemy.orm import class_mapper - -from pokedex.db import tables, load - -english_id = 9 -japanese_id = 1 - -bw_version_group_id = 11 - -dir = load.get_default_csv_dir() - -def tuple_key(tup): - """Return a sort key for mixed int/string tuples. - - Strings sort first. - """ - def generator(): - for item in tup: - try: - yield (1, int(item)) - except ValueError: - yield (0, item) - return tuple(generator()) - -class MakeFieldFuncs: - """Various ways to get a new value from the old one""" - @staticmethod - def copy(field_name, source, **kwargs): - """Plain copy""" - return source[field_name] - - @staticmethod - def main(field_name, source, **kwargs): - """Populate aux table from the main table""" - return source[field_name] - - @staticmethod - def Main(field_name, source, **kwargs): - """Capitalize""" - return source[field_name].capitalize() - - @staticmethod - def ident(source, **kwargs): - """Craft an identifier from the 'identifier' or 'name' column""" - return name2ident(source.get('identifier', source.get('name'))) - - @staticmethod - def Name(source, **kwargs): - """Capitalize the name (or identifier) column""" - name = source.get('name', source.get('identifier', None)) - name = ' '.join(word.capitalize() for word in name.split(' ')) - return name - - @staticmethod - def f_id(source, **kwargs): - """Capitalize the identifier column""" - return source['identifier'].capitalize() - - @staticmethod - def name(source, **kwargs): - """Get the original name""" - return source['name'] - - @staticmethod - def newid(i, source, **kwargs): - """Assign a new "auto-incremented" id""" - source['id'] = i # hack to make srcid work - return i - - @staticmethod - def en(source, **kwargs): - """Assign the value for English -- unless it's Japanese""" - if source.get('version_group_id', None) == str(bw_version_group_id): - return japanese_id - return english_id - - @staticmethod - def srcid(source, field_name, **kwargs): - """The original table's id""" - try: - return source['id'] - except KeyError: - if field_name == 'pokemon_form_group_id': - # This one reuses another table's ID - return source['pokemon_id'] - else: - raise - -def name2ident(name): - ident = name.decode('utf-8').lower() - ident = ident.replace(u'+', ' plus ') - ident = re.sub(u'[ _–]+', u'-', ident) - ident = re.sub(u'[\'./;’(),:]', u'', ident) - ident = ident.replace(u'é', 'e') - ident = ident.replace(u'♀', '-f') - ident = ident.replace(u'♂', '-m') - if ident in ('???', '????'): - ident = 'unknown' - elif ident == '!': - ident = 'exclamation' - elif ident == '?': - ident = 'question' - for c in ident: - assert c in "abcdefghijklmnopqrstuvwxyz0123456789-", repr(ident) - return ident - - -FieldSpec = namedtuple('FieldSpec', 'out name func') - -def main(): - for table in sorted(tables.all_tables(), key=lambda t: t.__name__): - datafilename = dir + '/' + table.__tablename__ + '.csv' - classname = table.__name__ - if hasattr(table, 'object_table'): - # This is an auxilliary table; it'll be processed with the main one - continue - else: - print "%s: %s" % (classname, table.__tablename__) - with open(datafilename) as datafile: - datacsv = csv.reader(datafile, lineterminator='\n') - orig_fields = datacsv.next() - columns = class_mapper(table).c - new_fields = [] - main_out = [] - outputs = {datafilename: main_out} - name_out = None - srcfiles = [datafilename] - # Set new_fields to a list of FieldSpec object, one for each field we want in the csv - for column in columns: - name = column.name - if name == 'identifier': - new_fields.append(FieldSpec(datafilename, column.name, MakeFieldFuncs.ident)) - elif name in orig_fields: - new_fields.append(FieldSpec(datafilename, column.name, MakeFieldFuncs.copy)) - elif name == 'id': - new_fields.append(FieldSpec(datafilename, column.name, MakeFieldFuncs.newid)) - elif name == 'language_id': - new_fields.insert(2, FieldSpec(datafilename, column.name, MakeFieldFuncs.en)) - else: - raise AssertionError(name) - # Remember headers - headers = {datafilename: list(field.name for field in new_fields)} - # Pretty prnt :) - for field in new_fields: - print ' [{0.func.func_name:5}] {0.name}'.format(field) - # Do pretty much the same for aux tables - aux_tables = [] - for attrname in 'text_table prose_table'.split(): - aux_table = getattr(table, attrname, None) - if aux_table: - aux_datafilename = dir + '/' + aux_table.__tablename__ + '.csv' - print " %s: %s" % (aux_table.__name__, aux_table.__tablename__) - srcfiles.append(datafilename) - aux_tables.append(aux_table) - columns = class_mapper(aux_table).c - aux_out = [] - outputs[aux_datafilename] = aux_out - aux_fields = [] - for column in columns: - name = column.name - if name == 'language_id': - aux_fields.insert(1, FieldSpec(aux_datafilename, column.name, MakeFieldFuncs.en)) - elif name == 'name' and table.__name__ == 'ItemFlag': - aux_fields.append(FieldSpec(aux_datafilename, column.name, MakeFieldFuncs.f_id)) - elif name == 'description' and table.__name__ == 'ItemFlag': - aux_fields.append(FieldSpec(aux_datafilename, column.name, MakeFieldFuncs.name)) - elif name in orig_fields and name == 'name' and table.__name__ in 'PokemonColor ContestType BerryFirmness'.split(): - # Capitalize these names - aux_fields.append(FieldSpec(aux_datafilename, column.name, MakeFieldFuncs.Name)) - elif name in orig_fields and name in 'color flavor'.split() and table.__name__ == 'ContestType': - aux_fields.append(FieldSpec(aux_datafilename, column.name, MakeFieldFuncs.Main)) - elif name in orig_fields: - aux_fields.append(FieldSpec(aux_datafilename, column.name, MakeFieldFuncs.main)) - elif name == table.__singlename__ + '_id': - aux_fields.append(FieldSpec(aux_datafilename, column.name, MakeFieldFuncs.srcid)) - elif name == 'name': - aux_fields.append(FieldSpec(aux_datafilename, column.name, MakeFieldFuncs.Name)) - elif name == 'lang_id': - aux_fields.append(FieldSpec(aux_datafilename, column.name, MakeFieldFuncs.srcid)) - else: - print orig_fields - raise AssertionError(name) - if name == 'name': - # If this table contains the name, remember that - name_fields = aux_fields - name_out = aux_out - # Sort aux tables nicely - def key(f): - if f.func == MakeFieldFuncs.srcid: - return 0 - elif f.name == 'language_id': - return 1 - elif f.name == 'name': - return 2 - else: - return 10 - aux_fields.sort(key=key) - new_fields += aux_fields - headers[aux_datafilename] = list(field.name for field in aux_fields) - # Pretty print :) - for field in aux_fields: - print ' [{0.func.func_name:5}] {0.name}'.format(field) - # Do nothing if the table's the same - if all(field.func == MakeFieldFuncs.copy for field in new_fields): - print u' → skipping' - continue - # Otherwise read the file - # outputs will be a (filename -> list of rows) dict - print u' → reading' - for autoincrement_id, src_row in enumerate(datacsv, start=1): - row = dict(zip(orig_fields, src_row)) - new_rows = defaultdict(list) - for field in new_fields: - new_rows[field.out].append(field.func( - source=row, - field_name=field.name, - i=autoincrement_id, - )) - for name, row in new_rows.items(): - outputs[name].append(row) - # If there was a _names table, read that and append it to the - # aux table that has names - try: - name_datafilename = dir + '/' + table.__singlename__ + '_names.csv' - name_file = open(name_datafilename) - except (AttributeError, IOError): - pass - else: - print u' → reading foreign names' - with name_file: - namecsv = csv.reader(name_file, lineterminator='\n') - src_fields = namecsv.next() - obj_id_fieldname = table.__singlename__ + '_id' - assert src_fields == [obj_id_fieldname, 'language_id', 'name'] - for name_row in namecsv: - name_dict = dict(zip(src_fields, name_row)) - row = [] - for field in name_fields: - row.append(name_dict.get(field.name, '')) - name_out.append(row) - os.unlink(name_datafilename) - # For all out files, write a header & sorted rows - print u' → writing' - for filename, rows in outputs.items(): - with open(filename, 'w') as outfile: - outcsv = csv.writer(outfile, lineterminator='\n') - outcsv.writerow(headers[filename]) - rows.sort(key=tuple_key) - for row in rows: - outcsv.writerow(row) - -if __name__ == '__main__': - main()