4c1f99c6c7a5b59fc3f630ebd8132f389e62da86
3 """Moves/transforms values in CSVs in an ad-hoc way, based mainly on column name
5 Auto-creates identifiers from names
6 Auto-creates names from identifiers
7 Copies IDs for foreign keys
8 Creates autoincrement-style IDs when missing
9 Sets text language to 9 (en), except when it sets to 1 (jp)
11 And looks good doing it!
17 from StringIO
import StringIO
18 from collections
import namedtuple
, defaultdict
20 from sqlalchemy
.orm
import class_mapper
22 from pokedex
.db
import tables
, load
27 bw_version_group_id
= 11
29 dir = load
.get_default_csv_dir()
32 """Return a sort key for mixed int/string tuples.
42 return tuple(generator())
45 """Various ways to get a new value from the old one"""
47 def copy(field_name
, source
, **kwargs
):
49 return source
[field_name
]
52 def main(field_name
, source
, **kwargs
):
53 """Populate aux table from the main table"""
54 return source
[field_name
]
57 def Main(field_name
, source
, **kwargs
):
59 return source
[field_name
].capitalize()
62 def ident(source
, **kwargs
):
63 """Craft an identifier from the 'identifier' or 'name' column"""
64 return name2ident(source
.get('identifier', source
.get('name')))
67 def Name(source
, **kwargs
):
68 """Capitalize the name (or identifier) column"""
69 name
= source
.get('name', source
.get('identifier', None))
70 name
= ' '.join(word
.capitalize() for word
in name
.split(' '))
74 def f_id(source
, **kwargs
):
75 """Capitalize the identifier column"""
76 return source
['identifier'].capitalize()
79 def name(source
, **kwargs
):
80 """Get the original name"""
84 def newid(i
, source
, **kwargs
):
85 """Assign a new "auto-incremented" id"""
86 source
['id'] = i
# hack to make srcid work
90 def en(source
, **kwargs
):
91 """Assign the value for English -- unless it's Japanese"""
92 if source
.get('version_group_id', None) == str(bw_version_group_id
):
97 def srcid(source
, field_name
, **kwargs
):
98 """The original table's id"""
102 if field_name
== 'pokemon_form_group_id':
103 # This one reuses another table's ID
104 return source
['pokemon_id']
108 def name2ident(name
):
109 ident
= name
.decode('utf-8').lower()
110 ident
= ident
.replace(u
'+', ' plus ')
111 ident
= re
.sub(u
'[ _–]+', u
'-', ident
)
112 ident
= re
.sub(u
'[\'./;’(),:]', u
'', ident
)
113 ident
= ident
.replace(u
'é', 'e')
114 ident
= ident
.replace(u
'♀', '-f')
115 ident
= ident
.replace(u
'♂', '-m')
116 if ident
in ('???', '????'):
119 ident
= 'exclamation'
123 assert c
in "abcdefghijklmnopqrstuvwxyz0123456789-", repr(ident
)
127 FieldSpec
= namedtuple('FieldSpec', 'out name func')
130 for table
in sorted(tables
.all_tables(), key
=lambda t
: t
.__name__
):
131 datafilename
= dir + '/' + table
.__tablename__
+ '.csv'
132 classname
= table
.__name__
133 if hasattr(table
, 'object_table'):
134 # This is an auxilliary table; it'll be processed with the main one
137 print "%s: %s" %
(classname
, table
.__tablename__
)
138 with
open(datafilename
) as datafile
:
139 datacsv
= csv
.reader(datafile
, lineterminator
='\n')
140 orig_fields
= datacsv
.next()
141 columns
= class_mapper(table
).c
144 outputs
= {datafilename
: main_out
}
146 srcfiles
= [datafilename
]
147 # Set new_fields to a list of FieldSpec object, one for each field we want in the csv
148 for column
in columns
:
150 if name
== 'identifier':
151 new_fields
.append(FieldSpec(datafilename
, column
.name
, MakeFieldFuncs
.ident
))
152 elif name
in orig_fields
:
153 new_fields
.append(FieldSpec(datafilename
, column
.name
, MakeFieldFuncs
.copy
))
155 new_fields
.append(FieldSpec(datafilename
, column
.name
, MakeFieldFuncs
.newid
))
156 elif name
== 'language_id':
157 new_fields
.insert(2, FieldSpec(datafilename
, column
.name
, MakeFieldFuncs
.en
))
159 raise AssertionError(name
)
161 headers
= {datafilename
: list(field
.name
for field
in new_fields
)}
163 for field
in new_fields
:
164 print ' [{0.func.func_name:5}] {0.name}'.format(field
)
165 # Do pretty much the same for aux tables
167 for attrname
in 'text_table prose_table'.split():
168 aux_table
= getattr(table
, attrname
, None)
170 aux_datafilename
= dir + '/' + aux_table
.__tablename__
+ '.csv'
171 print " %s: %s" %
(aux_table
.__name__
, aux_table
.__tablename__
)
172 srcfiles
.append(datafilename
)
173 aux_tables
.append(aux_table
)
174 columns
= class_mapper(aux_table
).c
176 outputs
[aux_datafilename
] = aux_out
178 for column
in columns
:
180 if name
== 'language_id':
181 aux_fields
.insert(1, FieldSpec(aux_datafilename
, column
.name
, MakeFieldFuncs
.en
))
182 elif name
== 'name' and table
.__name__
== 'ItemFlag':
183 aux_fields
.append(FieldSpec(aux_datafilename
, column
.name
, MakeFieldFuncs
.f_id
))
184 elif name
== 'description' and table
.__name__
== 'ItemFlag':
185 aux_fields
.append(FieldSpec(aux_datafilename
, column
.name
, MakeFieldFuncs
.name
))
186 elif name
in orig_fields
and name
== 'name' and table
.__name__
in 'PokemonColor ContestType BerryFirmness'.split():
187 # Capitalize these names
188 aux_fields
.append(FieldSpec(aux_datafilename
, column
.name
, MakeFieldFuncs
.Name
))
189 elif name
in orig_fields
and name
in 'color flavor'.split() and table
.__name__
== 'ContestType':
190 aux_fields
.append(FieldSpec(aux_datafilename
, column
.name
, MakeFieldFuncs
.Main
))
191 elif name
in orig_fields
:
192 aux_fields
.append(FieldSpec(aux_datafilename
, column
.name
, MakeFieldFuncs
.main
))
193 elif name
== table
.__singlename__
+ '_id':
194 aux_fields
.append(FieldSpec(aux_datafilename
, column
.name
, MakeFieldFuncs
.srcid
))
196 aux_fields
.append(FieldSpec(aux_datafilename
, column
.name
, MakeFieldFuncs
.Name
))
197 elif name
== 'lang_id':
198 aux_fields
.append(FieldSpec(aux_datafilename
, column
.name
, MakeFieldFuncs
.srcid
))
201 raise AssertionError(name
)
203 # If this table contains the name, remember that
204 name_fields
= aux_fields
206 # Sort aux tables nicely
208 if f
.func
== MakeFieldFuncs
.srcid
:
210 elif f
.name
== 'language_id':
212 elif f
.name
== 'name':
216 aux_fields
.sort(key
=key
)
217 new_fields
+= aux_fields
218 headers
[aux_datafilename
] = list(field
.name
for field
in aux_fields
)
220 for field
in aux_fields
:
221 print ' [{0.func.func_name:5}] {0.name}'.format(field
)
222 # Do nothing if the table's the same
223 if all(field
.func
== MakeFieldFuncs
.copy
for field
in new_fields
):
226 # Otherwise read the file
227 # outputs will be a (filename -> list of rows) dict
229 for autoincrement_id
, src_row
in enumerate(datacsv
, start
=1):
230 row
= dict(zip(orig_fields
, src_row
))
231 new_rows
= defaultdict(list)
232 for field
in new_fields
:
233 new_rows
[field
.out
].append(field
.func(
235 field_name
=field
.name
,
238 for name
, row
in new_rows
.items():
239 outputs
[name
].append(row
)
240 # If there was a _names table, read that and append it to the
241 # aux table that has names
243 name_datafilename
= dir + '/' + table
.__singlename__
+ '_names.csv'
244 name_file
= open(name_datafilename
)
245 except (AttributeError, IOError):
248 print u
' → reading foreign names'
250 namecsv
= csv
.reader(name_file
, lineterminator
='\n')
251 src_fields
= namecsv
.next()
252 obj_id_fieldname
= table
.__singlename__
+ '_id'
253 assert src_fields
== [obj_id_fieldname
, 'language_id', 'name']
254 for name_row
in namecsv
:
255 name_dict
= dict(zip(src_fields
, name_row
))
257 for field
in name_fields
:
258 row
.append(name_dict
.get(field
.name
, ''))
260 os
.unlink(name_datafilename
)
261 # For all out files, write a header & sorted rows
263 for filename
, rows
in outputs
.items():
264 with
open(filename
, 'w') as outfile
:
265 outcsv
= csv
.writer(outfile
, lineterminator
='\n')
266 outcsv
.writerow(headers
[filename
])
267 rows
.sort(key
=tuple_key
)
271 if __name__
== '__main__':