4 u"""Creation and loading of GNU Gettext language files.
6 poupdate [options] [file1.csv file2.csv ...]
9 - Create .pot files (in pokedex/i18n/)
10 - Update the .po files (in pokedex/i18n/<lang>)
11 - Update the pokedex .csv files in (pokedex/data/csv/translations)
13 To make pos for a new language, make sure it is in the database, make
14 a directory for it in pokedex/i18n/, and run this.
16 You can also give one or more translation CSVs as arguments.
17 These are in the same format as veekun's main database CSVs, for example
18 pokedex/data/csv/ability_prose.csv. Be sure to set the correct language
19 ID (which implies the language must be in the database).
20 Also be sure to have the correct column order: first an appropriately named
21 foreign key, then local_language_id, and then the text columns.
25 # Everything related to Gettext files, and the CLI interface, is here.
26 # General message handling and CSV I/O is in the pokedex library.
28 # Notes on how we use PO format:
29 # The source information is stored in the occurences fields, using
30 # "table_name.column_name" for file and object ID for line number. This is used
31 # as a message key, instead of the source string. So it's important not to
32 # discard location information. It also means "obsolete" and "fuzzy" mean
33 # pretty much the same in our context.
35 # Also note that a pot file is just a po file with all strings untranslated.
36 # So some functions here will work on either.
38 # Gettext context (msgctxt) is written to the files so that tools don't merge
39 # unrelated strings together. It is ignored when reading the PO files.
41 # Also of note, "polib" means "(do) kiss!" in Czech.
46 from datetime import datetime
47 from optparse import OptionParser
48 from collections import defaultdict
52 from pokedex.db import tables, translations
53 from pokedex.defaults import get_default_csv_dir
58 if __name__ == '__main__':
59 exit('This utility needs polib installed.\n$ pip install polib')
62 number_replacement_flag = '-pokedex-number-replacement'
64 default_gettext_directory = pkg_resources.resource_filename('pokedex', 'i18n')
66 mapped_class_dict = dict((c.__name__, c) for c in tables.mapped_classes)
67 for cls in tables.mapped_classes:
68 mapped_class_dict.update(dict((c.__name__, cls) for c in cls.translation_classes))
70 class PokedexPot(polib.POFile):
71 def __init__(self, name):
72 super(PokedexPot, self).__init__()
74 'Project-Id-Version': 'pokedex-%s 0.1' % name,
75 'Report-Msgid-Bugs-To': 'encukou@gmail.com',
76 'POT-Creation-Date': datetime.now().isoformat(),
77 'PO-Revision-Date': 'YEAR-MO-DA HO:MI+ZONE',
78 'MIME-Version': '1.0',
79 'Content-Type': 'text/plain; charset=utf-8',
80 'Content-Transfer-Encoding': '8bit',
81 'Generated-By': "The pokedex",
83 self.seen_entries = {}
85 def append(self, entry):
86 """Append an entry. POEntries that only differ in numbers are merged.
88 For example "Route 1", "Route 2", etc. are replaced by a single
91 Multiple numbers might be replaced, for example in "{num}--{num}
92 different Unown caught"
94 Entries without numbers are merged as well (e.g. "Has no overworld
95 effect" appears quite a few times in in AbilityChangelog)
97 replaced = translations.number_re.sub('{num}', entry.msgid)
99 common_entry = self.seen_entries[(entry.msgctxt, replaced)]
101 self.seen_entries[(entry.msgctxt, replaced)] = entry
103 common_entry.occurrences += entry.occurrences
104 # Only now is the actual entry replaced. So we get
105 # "Route {num}", but "Porygon2" because there's no Porygon3.
106 common_entry.msgid = replaced
107 common_entry.msgstr = translations.number_re.sub('{num}', common_entry.msgstr)
108 if replaced != entry.msgid and number_replacement_flag not in common_entry.flags:
109 common_entry.flags.append(number_replacement_flag)
114 """A defaultdict of pot files"""
115 def __missing__(self, name):
116 pot = PokedexPot(name)
120 def yield_po_messages(pos):
121 """Yield messages from all given .po files
123 merger = translations.Merge()
124 for po in pos.values():
125 merger.add_iterator(_yield_one_po_messages(po, merger))
128 def entry_sort_key(entry):
130 cls_col, line = entry.occurrences[0]
135 classname, col = cls_col.split('.')
136 fuzzy = entry.obsolete or 'fuzzy' in entry.flags
138 cls = mapped_class_dict[classname]
141 print 'Warning: Unknown class %s' % classname
142 return '', int(line), col, fuzzy
144 return cls.__name__, int(line), col, fuzzy
146 def _yield_one_po_messages(pofile, merger):
147 # Yield messages from one po file
149 # Messages in our po files are ordered by the first occurrence.
150 # The occurrences of a single message are also ordered.
151 # So just merge all the subsequences as we go
152 for entry in sorted(pofile, key=entry_sort_key):
154 fuzzy = (entry.obsolete or 'fuzzy' in entry.flags)
156 for occurrence in entry.occurrences:
157 cls_colname, id = occurrence
159 clsname, colname = cls_colname.split('.')
160 cls = mapped_class_dict[clsname]
161 messages.append(translations.Message(
162 mapped_class_dict[clsname].__name__,
167 number_replacement=number_replacement_flag in entry.flags,
172 # Spawn extra iterators before yielding
173 merger.add_iterator(messages[1:])
177 def create_pots(source, *translation_streams):
178 """Convert an iterator of Messages to a dictionary of pot/po files
180 If translations are given, they're merged, and any exact matches are put
181 in the po file. Give some for po files, don't give any for pot files.
185 merged = translations.merge_translations(source, *translation_streams, unused=obsolete.append)
186 for source, sourcehash, string, exact in merged:
187 ctxt = '.'.join((source.cls, source.colname))
188 entry = polib.POEntry(
190 occurrences=[(ctxt, source.id)],
194 entry.msgstr = string
196 entry.flags.append('fuzzy')
197 pots[source.pot].append(entry)
198 for message in obsolete:
199 ctxt = '.'.join((message.cls, message.colname))
200 entry = polib.POEntry(
201 msgid=message.source or '???',
202 occurrences=[(ctxt, message.id)],
208 def save_pots(pots, gettext_directory=default_gettext_directory):
209 """Save pot files to a directory."""
210 for name, pot in pots.items():
211 pot.save(os.path.join(gettext_directory, 'pokedex-%s.pot' % name))
213 def save_pos(pos, lang, gettext_directory=default_gettext_directory):
214 """Save po files to the appropriate directory."""
215 for name, po in pos.items():
216 po.save(os.path.join(gettext_directory, lang, 'pokedex-%s.po' % name))
218 def read_pots(directory=default_gettext_directory, extension='.pot'):
219 """Read all files from the given directory with the given extension as pofiles
221 Works on pos or pots.
224 for filename in os.listdir(directory):
225 basename, ext = os.path.splitext(filename)
227 pots[basename] = polib.pofile(os.path.join(directory, filename))
231 def all_langs(gettext_directory=default_gettext_directory):
233 d for d in os.listdir(gettext_directory)
234 if os.path.isdir(os.path.join(gettext_directory, d))
237 def merge_pos(transl, lang, language_directory):
238 """Update all po files for the given language
240 Takes into account the source, the official translations from the database,
241 the existing PO files, and the current translation CSV, in that order.
243 Returns a name -> pofile dict
247 transl.official_messages(lang),
248 yield_po_messages(pos=read_pots(language_directory, '.po')),
249 transl.yield_target_messages(lang),
252 def bar(fraction, size, done_char='=', split_char='|', notdone_char='-'):
253 """Build an ASCII art progress bar
257 split_char = done_char
258 completed = int(round(size * fraction))
259 bar = [done_char] * completed
260 bar.append(split_char)
261 bar += notdone_char * (size - completed)
264 def print_stats(pos):
265 """Print out some fun stats about a set of po files
267 template = u"{0:>10}: {1:4}/{2:4} {3:6.2f}% [{4}]"
270 for name, po in pos.items():
271 num_translated = len(po.translated_entries())
272 total_translated += num_translated
273 fraction_translated = 1. * num_translated / len(po)
275 print template.format(
279 100 * fraction_translated,
280 bar(fraction_translated, 47),
282 fraction_translated = 1. * total_translated / total
283 print template.format(
287 100 * fraction_translated,
288 bar(fraction_translated, 47),
292 if __name__ == '__main__':
293 parser = OptionParser(__doc__)
295 parser.add_option('-l', '--langs', dest='langs',
296 help="List of languages to handle, separated by commas (example: -l 'en,de,ja') (default: all in gettext directory)")
297 parser.add_option('-P', '--no-pots', dest='pots', action='store_false', default=True,
298 help='Do not create POT files (templates)')
299 parser.add_option('-p', '--no-pos', dest='pos', action='store_false', default=True,
300 help='Do not update PO files (message catalogs)')
302 parser.add_option('-c', '--no-csv', dest='csv', action='store_false', default=True,
303 help='Do not update pokedex translations files')
305 parser.add_option('-d', '--directory', dest='directory',
306 help='Veekun data directory')
307 parser.add_option('-L', '--source-language', dest='source_lang',
308 help="Source language identifier (default: 'en')")
310 parser.add_option('-g', '--gettext-dir', dest='gettext_directory', default=default_gettext_directory,
311 help='Gettext directory (default: pokedex/i18n/)')
313 parser.add_option('-q', '--quiet', dest='verbose', default=True, action='store_false',
314 help="Don't print what's going on")
316 options, arguments = parser.parse_args()
318 transl = translations.Translations.from_parsed_options(options)
320 gettext_directory = options.gettext_directory
322 if (options.pots or options.pos) and not os.path.exists(gettext_directory):
323 print "Error: Gettext directory doesn't exist. Skipping pot/po creation"
324 options.pots = options.pos = False
328 print 'Creating pots in', gettext_directory
329 save_pots(create_pots(transl.source), gettext_directory=gettext_directory)
331 if options.pos or options.csv:
332 # Merge in CSV files from command line
333 csv_streams = defaultdict(translations.Merge)
334 for argument in arguments:
335 # Add each message in its own stream, to sort them.
336 file = open(argument, 'rb')
338 for message in translations.yield_guessed_csv_messages(file):
339 lang = transl.language_identifiers[message.language_id]
340 csv_streams[lang].add_iterator([message])
341 streams = defaultdict(list)
342 for lang, stream in csv_streams.items():
343 streams[lang].append(stream)
345 if os.path.exists(gettext_directory):
346 # Merge in the PO files
348 langs = options.langs.split(',')
350 langs = all_langs(gettext_directory)
353 language_directory = os.path.join(gettext_directory, lang)
355 print 'Merging translations for %s in %s' % (lang, language_directory)
356 pos = merge_pos(transl, lang, language_directory)
360 print 'Writing POs for %s' % lang
361 save_pos(pos, lang, gettext_directory=gettext_directory)
366 streams[lang].append(yield_po_messages(pos))
369 for lang, lang_streams in streams.items():
371 print "Merging %s translation stream/s for '%s'" % (len(lang_streams), lang)
372 existing_messages = list(transl.yield_target_messages(lang))
373 lang_streams.append(existing_messages)
374 transl.write_translations(lang, *lang_streams)