+# Encoding: UTF-8
+
+"""Moves/transforms values in CSVs in an ad-hoc way, based mainly on column name
+
+Auto-creates identifiers from names
+Auto-creates names from identifiers
+Copies IDs for foreign keys
+Creates autoincrement-style IDs when missing
+Sets text language to 9 (en), except when it sets to 1 (jp)
+
+And looks good doing it!
+"""
+
+import csv
+import re
+import os
+from StringIO import StringIO
+from collections import namedtuple, defaultdict
+
+from sqlalchemy.orm import class_mapper
+
+from pokedex.db import tables, load
+
+english_id = 9
+japanese_id = 1
+
+bw_version_group_id = 11
+
+dir = load.get_default_csv_dir()
+
+def tuple_key(tup):
+ """Return a sort key for mixed int/string tuples.
+
+ Strings sort first.
+ """
+ def generator():
+ for item in tup:
+ try:
+ yield (1, int(item))
+ except ValueError:
+ yield (0, item)
+ return tuple(generator())
+
+class MakeFieldFuncs:
+ """Various ways to get a new value from the old one"""
+ @staticmethod
+ def copy(field_name, source, **kwargs):
+ """Plain copy"""
+ return source[field_name]
+
+ @staticmethod
+ def main(field_name, source, **kwargs):
+ """Populate aux table from the main table"""
+ return source[field_name]
+
+ @staticmethod
+ def Main(field_name, source, **kwargs):
+ """Capitalize"""
+ return source[field_name].capitalize()
+
+ @staticmethod
+ def ident(source, **kwargs):
+ """Craft an identifier from the 'identifier' or 'name' column"""
+ return name2ident(source.get('identifier', source.get('name')))
+
+ @staticmethod
+ def Name(source, **kwargs):
+ """Capitalize the name (or identifier) column"""
+ name = source.get('name', source.get('identifier', None))
+ name = ' '.join(word.capitalize() for word in name.split(' '))
+ return name
+
+ @staticmethod
+ def f_id(source, **kwargs):
+ """Capitalize the identifier column"""
+ return source['identifier'].capitalize()
+
+ @staticmethod
+ def name(source, **kwargs):
+ """Get the original name"""
+ return source['name']
+
+ @staticmethod
+ def newid(i, source, **kwargs):
+ """Assign a new "auto-incremented" id"""
+ source['id'] = i # hack to make srcid work
+ return i
+
+ @staticmethod
+ def en(source, **kwargs):
+ """Assign the value for English -- unless it's Japanese"""
+ if source.get('version_group_id', None) == str(bw_version_group_id):
+ return japanese_id
+ return english_id
+
+ @staticmethod
+ def srcid(source, field_name, **kwargs):
+ """The original table's id"""
+ try:
+ return source['id']
+ except KeyError:
+ if field_name == 'pokemon_form_group_id':
+ # This one reuses another table's ID
+ return source['pokemon_id']
+ else:
+ raise
+
+def name2ident(name):
+ ident = name.decode('utf-8').lower()
+ ident = ident.replace(u'+', ' plus ')
+ ident = re.sub(u'[ _–]+', u'-', ident)
+ ident = re.sub(u'[\'./;’(),:]', u'', ident)
+ ident = ident.replace(u'é', 'e')
+ ident = ident.replace(u'♀', '-f')
+ ident = ident.replace(u'♂', '-m')
+ if ident in ('???', '????'):
+ ident = 'unknown'
+ elif ident == '!':
+ ident = 'exclamation'
+ elif ident == '?':
+ ident = 'question'
+ for c in ident:
+ assert c in "abcdefghijklmnopqrstuvwxyz0123456789-", repr(ident)
+ return ident
+
+
+FieldSpec = namedtuple('FieldSpec', 'out name func')
+
+def main():
+ for table in sorted(tables.all_tables(), key=lambda t: t.__name__):
+ datafilename = dir + '/' + table.__tablename__ + '.csv'
+ classname = table.__name__
+ if hasattr(table, 'object_table'):
+ # This is an auxilliary table; it'll be processed with the main one
+ continue
+ else:
+ print "%s: %s" % (classname, table.__tablename__)
+ with open(datafilename) as datafile:
+ datacsv = csv.reader(datafile, lineterminator='\n')
+ orig_fields = datacsv.next()
+ columns = class_mapper(table).c
+ new_fields = []
+ main_out = []
+ outputs = {datafilename: main_out}
+ name_out = None
+ srcfiles = [datafilename]
+ # Set new_fields to a list of FieldSpec object, one for each field we want in the csv
+ for column in columns:
+ name = column.name
+ if name == 'identifier':
+ new_fields.append(FieldSpec(datafilename, column.name, MakeFieldFuncs.ident))
+ elif name in orig_fields:
+ new_fields.append(FieldSpec(datafilename, column.name, MakeFieldFuncs.copy))
+ elif name == 'id':
+ new_fields.append(FieldSpec(datafilename, column.name, MakeFieldFuncs.newid))
+ elif name == 'language_id':
+ new_fields.insert(2, FieldSpec(datafilename, column.name, MakeFieldFuncs.en))
+ else:
+ raise AssertionError(name)
+ # Remember headers
+ headers = {datafilename: list(field.name for field in new_fields)}
+ # Pretty prnt :)
+ for field in new_fields:
+ print ' [{0.func.func_name:5}] {0.name}'.format(field)
+ # Do pretty much the same for aux tables
+ aux_tables = []
+ for attrname in 'text_table prose_table'.split():
+ aux_table = getattr(table, attrname, None)
+ if aux_table:
+ aux_datafilename = dir + '/' + aux_table.__tablename__ + '.csv'
+ print " %s: %s" % (aux_table.__name__, aux_table.__tablename__)
+ srcfiles.append(datafilename)
+ aux_tables.append(aux_table)
+ columns = class_mapper(aux_table).c
+ aux_out = []
+ outputs[aux_datafilename] = aux_out
+ aux_fields = []
+ for column in columns:
+ name = column.name
+ if name == 'language_id':
+ aux_fields.insert(1, FieldSpec(aux_datafilename, column.name, MakeFieldFuncs.en))
+ elif name == 'name' and table.__name__ == 'ItemFlag':
+ aux_fields.append(FieldSpec(aux_datafilename, column.name, MakeFieldFuncs.f_id))
+ elif name == 'description' and table.__name__ == 'ItemFlag':
+ aux_fields.append(FieldSpec(aux_datafilename, column.name, MakeFieldFuncs.name))
+ elif name in orig_fields and name == 'name' and table.__name__ in 'PokemonColor ContestType BerryFirmness'.split():
+ # Capitalize these names
+ aux_fields.append(FieldSpec(aux_datafilename, column.name, MakeFieldFuncs.Name))
+ elif name in orig_fields and name in 'color flavor'.split() and table.__name__ == 'ContestType':
+ aux_fields.append(FieldSpec(aux_datafilename, column.name, MakeFieldFuncs.Main))
+ elif name in orig_fields:
+ aux_fields.append(FieldSpec(aux_datafilename, column.name, MakeFieldFuncs.main))
+ elif name == table.__singlename__ + '_id':
+ aux_fields.append(FieldSpec(aux_datafilename, column.name, MakeFieldFuncs.srcid))
+ elif name == 'name':
+ aux_fields.append(FieldSpec(aux_datafilename, column.name, MakeFieldFuncs.Name))
+ elif name == 'lang_id':
+ aux_fields.append(FieldSpec(aux_datafilename, column.name, MakeFieldFuncs.srcid))
+ else:
+ print orig_fields
+ raise AssertionError(name)
+ if name == 'name':
+ # If this table contains the name, remember that
+ name_fields = aux_fields
+ name_out = aux_out
+ # Sort aux tables nicely
+ def key(f):
+ if f.func == MakeFieldFuncs.srcid:
+ return 0
+ elif f.name == 'language_id':
+ return 1
+ elif f.name == 'name':
+ return 2
+ else:
+ return 10
+ aux_fields.sort(key=key)
+ new_fields += aux_fields
+ headers[aux_datafilename] = list(field.name for field in aux_fields)
+ # Pretty print :)
+ for field in aux_fields:
+ print ' [{0.func.func_name:5}] {0.name}'.format(field)
+ # Do nothing if the table's the same
+ if all(field.func == MakeFieldFuncs.copy for field in new_fields):
+ print u' → skipping'
+ continue
+ # Otherwise read the file
+ # outputs will be a (filename -> list of rows) dict
+ print u' → reading'
+ for autoincrement_id, src_row in enumerate(datacsv, start=1):
+ row = dict(zip(orig_fields, src_row))
+ new_rows = defaultdict(list)
+ for field in new_fields:
+ new_rows[field.out].append(field.func(
+ source=row,
+ field_name=field.name,
+ i=autoincrement_id,
+ ))
+ for name, row in new_rows.items():
+ outputs[name].append(row)
+ # If there was a _names table, read that and append it to the
+ # aux table that has names
+ try:
+ name_datafilename = dir + '/' + table.__singlename__ + '_names.csv'
+ name_file = open(name_datafilename)
+ except (AttributeError, IOError):
+ pass
+ else:
+ print u' → reading foreign names'
+ with name_file:
+ namecsv = csv.reader(name_file, lineterminator='\n')
+ src_fields = namecsv.next()
+ obj_id_fieldname = table.__singlename__ + '_id'
+ assert src_fields == [obj_id_fieldname, 'language_id', 'name']
+ for name_row in namecsv:
+ name_dict = dict(zip(src_fields, name_row))
+ row = []
+ for field in name_fields:
+ row.append(name_dict.get(field.name, ''))
+ name_out.append(row)
+ os.unlink(name_datafilename)
+ # For all out files, write a header & sorted rows
+ print u' → writing'
+ for filename, rows in outputs.items():
+ with open(filename, 'w') as outfile:
+ outcsv = csv.writer(outfile, lineterminator='\n')
+ outcsv.writerow(headers[filename])
+ rows.sort(key=tuple_key)
+ for row in rows:
+ outcsv.writerow(row)
+
+if __name__ == '__main__':
+ main()