Fix up the migration

author Petr Viktorin <encukou@gmail.com>

Sat, 12 Mar 2011 14:07:14 +0000 (16:07 +0200)

committer Eevee <git@veekun.com>

Sun, 13 Mar 2011 22:41:23 +0000 (15:41 -0700)
author Petr Viktorin <encukou@gmail.com>
Sat, 12 Mar 2011 14:07:14 +0000 (16:07 +0200)
committer Eevee <git@veekun.com>
Sun, 13 Mar 2011 22:41:23 +0000 (15:41 -0700)
diff --git a/pokedex/data/csv/version_texts.csv b/pokedex/data/csv/version_texts.csv

index 6ad48af..518a11c 100644 (file)
--- a/pokedex/data/csv/version_texts.csv
+++ b/pokedex/data/csv/version_texts.csv
@@ -15,5 +15,5 @@ version_id,language_id,name
  14,9,Platinum
  15,9,HeartGold
  16,9,SoulSilver
  14,9,Platinum
  15,9,HeartGold
  16,9,SoulSilver
-17,1,Black
-18,1,White
+17,9,Black
+18,9,White
diff --git a/scripts/migration-i18n.py b/scripts/migration-i18n.py

deleted file mode 100644 (file)

index 4c1f99c..0000000
--- a/scripts/migration-i18n.py
+++ /dev/null
@@ -1,272 +0,0 @@
-# Encoding: UTF-8
-
-"""Moves/transforms values in CSVs in an ad-hoc way, based mainly on column name
-
-Auto-creates identifiers from names
-Auto-creates names from identifiers
-Copies IDs for foreign keys
-Creates autoincrement-style IDs when missing
-Sets text language to 9 (en), except when it sets to 1 (jp)
-
-And looks good doing it!
-"""
-
-import csv
-import re
-import os
-from StringIO import StringIO
-from collections import namedtuple, defaultdict
-
-from sqlalchemy.orm import class_mapper
-
-from pokedex.db import tables, load
-
-english_id = 9
-japanese_id = 1
-
-bw_version_group_id = 11
-
-dir = load.get_default_csv_dir()
-
-def tuple_key(tup):
-    """Return a sort key for mixed int/string tuples.
-
-    Strings sort first.
-    """
-    def generator():
-        for item in tup:
-            try:
-                yield (1, int(item))
-            except ValueError:
-                yield (0, item)
-    return tuple(generator())
-
-class MakeFieldFuncs:
-    """Various ways to get a new value from the old one"""
-    @staticmethod
-    def copy(field_name, source, **kwargs):
-        """Plain copy"""
-        return source[field_name]
-
-    @staticmethod
-    def main(field_name, source, **kwargs):
-        """Populate aux table from the main table"""
-        return source[field_name]
-
-    @staticmethod
-    def Main(field_name, source, **kwargs):
-        """Capitalize"""
-        return source[field_name].capitalize()
-
-    @staticmethod
-    def ident(source, **kwargs):
-        """Craft an identifier from the 'identifier' or 'name' column"""
-        return name2ident(source.get('identifier', source.get('name')))
-
-    @staticmethod
-    def Name(source, **kwargs):
-        """Capitalize the name (or identifier) column"""
-        name = source.get('name', source.get('identifier', None))
-        name = ' '.join(word.capitalize() for word in name.split(' '))
-        return name
-
-    @staticmethod
-    def f_id(source, **kwargs):
-        """Capitalize the identifier column"""
-        return source['identifier'].capitalize()
-
-    @staticmethod
-    def name(source, **kwargs):
-        """Get the original name"""
-        return source['name']
-
-    @staticmethod
-    def newid(i, source, **kwargs):
-        """Assign a new "auto-incremented" id"""
-        source['id'] = i  # hack to make srcid work
-        return i
-
-    @staticmethod
-    def en(source, **kwargs):
-        """Assign the value for English -- unless it's Japanese"""
-        if source.get('version_group_id', None) == str(bw_version_group_id):
-            return japanese_id
-        return english_id
-
-    @staticmethod
-    def srcid(source, field_name, **kwargs):
-        """The original table's id"""
-        try:
-            return source['id']
-        except KeyError:
-            if field_name == 'pokemon_form_group_id':
-                # This one reuses another table's ID
-                return source['pokemon_id']
-            else:
-                raise
-
-def name2ident(name):
-    ident = name.decode('utf-8').lower()
-    ident = ident.replace(u'+', ' plus ')
-    ident = re.sub(u'[ _–]+', u'-', ident)
-    ident = re.sub(u'[\'./;’(),:]', u'', ident)
-    ident = ident.replace(u'é', 'e')
-    ident = ident.replace(u'♀', '-f')
-    ident = ident.replace(u'♂', '-m')
-    if ident in ('???', '????'):
-        ident = 'unknown'
-    elif ident == '!':
-        ident = 'exclamation'
-    elif ident == '?':
-        ident = 'question'
-    for c in ident:
-        assert c in "abcdefghijklmnopqrstuvwxyz0123456789-", repr(ident)
-    return ident
-
-
-FieldSpec = namedtuple('FieldSpec', 'out name func')
-
-def main():
-    for table in sorted(tables.all_tables(), key=lambda t: t.__name__):
-        datafilename = dir + '/' + table.__tablename__ + '.csv'
-        classname = table.__name__
-        if hasattr(table, 'object_table'):
-            # This is an auxilliary table; it'll be processed with the main one
-            continue
-        else:
-            print "%s: %s" % (classname, table.__tablename__)
-        with open(datafilename) as datafile:
-            datacsv = csv.reader(datafile, lineterminator='\n')
-            orig_fields = datacsv.next()
-            columns = class_mapper(table).c
-            new_fields = []
-            main_out = []
-            outputs = {datafilename: main_out}
-            name_out = None
-            srcfiles = [datafilename]
-            # Set new_fields to a list of FieldSpec object, one for each field we want in the csv
-            for column in columns:
-                name = column.name
-                if name == 'identifier':
-                    new_fields.append(FieldSpec(datafilename, column.name, MakeFieldFuncs.ident))
-                elif name in orig_fields:
-                    new_fields.append(FieldSpec(datafilename, column.name, MakeFieldFuncs.copy))
-                elif name == 'id':
-                    new_fields.append(FieldSpec(datafilename, column.name, MakeFieldFuncs.newid))
-                elif name == 'language_id':
-                    new_fields.insert(2, FieldSpec(datafilename, column.name, MakeFieldFuncs.en))
-                else:
-                    raise AssertionError(name)
-            # Remember headers
-            headers = {datafilename: list(field.name for field in new_fields)}
-            # Pretty prnt :)
-            for field in new_fields:
-                print '    [{0.func.func_name:5}] {0.name}'.format(field)
-            # Do pretty much the same for aux tables
-            aux_tables = []
-            for attrname in 'text_table prose_table'.split():
-                aux_table = getattr(table, attrname, None)
-                if aux_table:
-                    aux_datafilename = dir + '/' + aux_table.__tablename__ + '.csv'
-                    print "  %s: %s" % (aux_table.__name__, aux_table.__tablename__)
-                    srcfiles.append(datafilename)
-                    aux_tables.append(aux_table)
-                    columns = class_mapper(aux_table).c
-                    aux_out = []
-                    outputs[aux_datafilename] = aux_out
-                    aux_fields = []
-                    for column in columns:
-                        name = column.name
-                        if name == 'language_id':
-                            aux_fields.insert(1, FieldSpec(aux_datafilename, column.name, MakeFieldFuncs.en))
-                        elif name == 'name' and table.__name__ == 'ItemFlag':
-                            aux_fields.append(FieldSpec(aux_datafilename, column.name, MakeFieldFuncs.f_id))
-                        elif name == 'description' and table.__name__ == 'ItemFlag':
-                            aux_fields.append(FieldSpec(aux_datafilename, column.name, MakeFieldFuncs.name))
-                        elif name in orig_fields and name == 'name' and table.__name__ in 'PokemonColor ContestType BerryFirmness'.split():
-                            # Capitalize these names
-                            aux_fields.append(FieldSpec(aux_datafilename, column.name, MakeFieldFuncs.Name))
-                        elif name in orig_fields and name in 'color flavor'.split() and table.__name__ == 'ContestType':
-                            aux_fields.append(FieldSpec(aux_datafilename, column.name, MakeFieldFuncs.Main))
-                        elif name in orig_fields:
-                            aux_fields.append(FieldSpec(aux_datafilename, column.name, MakeFieldFuncs.main))
-                        elif name == table.__singlename__ + '_id':
-                            aux_fields.append(FieldSpec(aux_datafilename, column.name, MakeFieldFuncs.srcid))
-                        elif name == 'name':
-                            aux_fields.append(FieldSpec(aux_datafilename, column.name, MakeFieldFuncs.Name))
-                        elif name == 'lang_id':
-                            aux_fields.append(FieldSpec(aux_datafilename, column.name, MakeFieldFuncs.srcid))
-                        else:
-                            print orig_fields
-                            raise AssertionError(name)
-                        if name == 'name':
-                            # If this table contains the name, remember that
-                            name_fields = aux_fields
-                            name_out = aux_out
-                    # Sort aux tables nicely
-                    def key(f):
-                        if f.func == MakeFieldFuncs.srcid:
-                            return 0
-                        elif f.name == 'language_id':
-                            return 1
-                        elif f.name == 'name':
-                            return 2
-                        else:
-                            return 10
-                    aux_fields.sort(key=key)
-                    new_fields += aux_fields
-                    headers[aux_datafilename] = list(field.name for field in aux_fields)
-                    # Pretty print :)
-                    for field in aux_fields:
-                        print '    [{0.func.func_name:5}] {0.name}'.format(field)
-            # Do nothing if the table's the same
-            if all(field.func == MakeFieldFuncs.copy for field in new_fields):
-                print u'  → skipping'
-                continue
-            # Otherwise read the file
-            # outputs will be a (filename -> list of rows) dict
-            print u'  → reading'
-            for autoincrement_id, src_row in enumerate(datacsv, start=1):
-                row = dict(zip(orig_fields, src_row))
-                new_rows = defaultdict(list)
-                for field in new_fields:
-                    new_rows[field.out].append(field.func(
-                            source=row,
-                            field_name=field.name,
-                            i=autoincrement_id,
-                        ))
-                for name, row in new_rows.items():
-                    outputs[name].append(row)
-        # If there was a _names table, read that and append it to the
-        # aux table that has names
-        try:
-            name_datafilename = dir + '/' + table.__singlename__ + '_names.csv'
-            name_file = open(name_datafilename)
-        except (AttributeError, IOError):
-            pass
-        else:
-            print u'  → reading foreign names'
-            with name_file:
-                namecsv = csv.reader(name_file, lineterminator='\n')
-                src_fields = namecsv.next()
-                obj_id_fieldname = table.__singlename__ + '_id'
-                assert src_fields == [obj_id_fieldname, 'language_id', 'name']
-                for name_row in namecsv:
-                    name_dict = dict(zip(src_fields, name_row))
-                    row = []
-                    for field in name_fields:
-                        row.append(name_dict.get(field.name, ''))
-                    name_out.append(row)
-            os.unlink(name_datafilename)
-        # For all out files, write a header & sorted rows
-        print u'  → writing'
-        for filename, rows in outputs.items():
-            with open(filename, 'w') as outfile:
-                outcsv = csv.writer(outfile, lineterminator='\n')
-                outcsv.writerow(headers[filename])
-                rows.sort(key=tuple_key)
-                for row in rows:
-                    outcsv.writerow(row)
-
-if __name__ == '__main__':
-    main()
author	Petr Viktorin <encukou@gmail.com>
	Sat, 12 Mar 2011 14:07:14 +0000 (16:07 +0200)
committer	Eevee <git@veekun.com>
	Sun, 13 Mar 2011 22:41:23 +0000 (15:41 -0700)
pokedex/data/csv/version_texts.csv		patch \| blob \| history
scripts/migration-i18n.py	[deleted file]	patch \| blob \| history