2 """Provides `romanize()` for romanizing simple Japanese text."""
5 u
'ア': 'a', u
'イ': 'i', u
'ウ': 'u', u
'エ': 'e', u
'オ': 'o',
6 u
'カ': 'ka', u
'キ': 'ki', u
'ク': 'ku', u
'ケ': 'ke', u
'コ': 'ko',
7 u
'サ': 'sa', u
'シ': 'shi', u
'ス': 'su', u
'セ': 'se', u
'ソ': 'so',
8 u
'タ': 'ta', u
'チ': 'chi', u
'ツ': 'tsu', u
'テ': 'te', u
'ト': 'to',
9 u
'ナ': 'na', u
'ニ': 'ni', u
'ヌ': 'nu', u
'ネ': 'ne', u
'ノ': 'no',
10 u
'ハ': 'ha', u
'ヒ': 'hi', u
'フ': 'fu', u
'ヘ': 'he', u
'ホ': 'ho',
11 u
'マ': 'ma', u
'ミ': 'mi', u
'ム': 'mu', u
'メ': 'me', u
'モ': 'mo',
12 u
'ヤ': 'ya', u
'ユ': 'yu', u
'ヨ': 'yo',
13 u
'ラ': 'ra', u
'リ': 'ri', u
'ル': 'ru', u
'レ': 're', u
'ロ': 'ro',
14 u
'ワ': 'wa', u
'ヰ': 'wi', u
'ヱ': 'we', u
'ヲ': 'wo',
16 u
'ガ': 'ga', u
'ギ': 'gi', u
'グ': 'gu', u
'ゲ': 'ge', u
'ゴ': 'go',
17 u
'ザ': 'za', u
'ジ': 'ji', u
'ズ': 'zu', u
'ゼ': 'ze', u
'ゾ': 'zo',
18 u
'ダ': 'da', u
'ヂ': 'ji', u
'ヅ': 'dzu', u
'デ': 'de', u
'ド': 'do',
19 u
'バ': 'ba', u
'ビ': 'bi', u
'ブ': 'bu', u
'ベ': 'be', u
'ボ': 'bo',
20 u
'パ': 'pa', u
'ピ': 'pi', u
'プ': 'pu', u
'ペ': 'pe', u
'ポ': 'po',
24 u
'ャ': 'ya', u
'ュ': 'yu', u
'ョ': 'yo',
25 u
'ゃ': 'ya', u
'ゅ': 'yu', u
'ょ': 'yo',
28 # XXX If romanize() ever handles hiragana, it will need to make sure that the
29 # preceding character was a katakana
30 # This does not include every small kana combination, but should include every
31 # one used in a Pokémon name. An exhaustive list would be.. very long
32 _roomaji_small_kana
= {
33 u
'ァ': 'a', u
'ィ': 'i', u
'ゥ': 'u', u
'ェ': 'e', u
'ォ': 'o',
35 _roomaji_small_kana_combos
= {
38 u
'テァ': 'tha', u
'ティ': 'ti', u
'テゥ': 'thu', u
'テェ': 'tye', u
'テォ': 'tho',
39 u
'デァ': 'dha', u
'ディ': 'di', u
'デゥ': 'dhu', u
'デェ': 'dye', u
'デォ': 'dho',
40 u
'ファ': 'fa', u
'フィ': 'fi', u
'ホゥ': 'hu', u
'フェ': 'fe', u
'フォ': 'fo',
44 """Converts a string of kana to roomaji."""
46 vowels
= ['a', 'e', 'i', 'o', 'u', 'y']
49 last_kana
= None # Used for ー; っ or ッ; ん or ン
50 last_char
= None # Used for small kana combos
53 if ord(char
) >= 0xff11 and ord(char
) <= 0xff5e:
54 if last_kana
== 'sokuon':
55 raise ValueError("Sokuon cannot precede Latin characters.")
57 char
= chr(ord(char
) - 0xff11 + 0x31)
58 characters
.append(char
)
63 elif char
in _roomaji_small_kana
:
64 combo
= last_char
+ char
65 if combo
in _roomaji_small_kana_combos
:
66 characters
[-1] = _roomaji_small_kana_combos
[combo
]
69 # If we don't know what it is... act dumb and treat it as a
70 # full-size vowel. Better than bailing, and seems to occur a
71 # lot, e.g. ピィ is "pii"
72 characters
.append(_roomaji_small_kana
[char
])
75 elif char
in _roomaji_youon
:
76 if last_kana
[-1] != 'i' or last_kana
== 'i':
77 raise ValueError("Youon must follow an -i sound.")
79 # Drop the -i and append the ya/yu/yo sound
80 new_sound
= _roomaji_youon
[char
]
81 if last_kana
in ['chi', 'shi', 'ji']:
83 new_char
= last_kana
[:-1] + new_sound
[1:]
85 new_char
= last_kana
[:-1] + new_sound
87 characters
[-1] = new_char
91 elif char
in (u
'っ', u
'ッ'):
92 # Remember it and double the consonant next time around
97 if last_kana
[-1] not in vowels
:
98 raise ValueError(u
"'ー' must follow by a vowel.")
99 characters
.append(last_kana
[-1])
104 elif char
in _roomaji_kana
:
105 kana
= _roomaji_kana
[char
]
107 if last_kana
== 'sokuon':
108 if kana
[0] in vowels
:
109 raise ValueError("Sokuon cannot precede a vowel.")
111 characters
.append(kana
[0])
112 elif last_kana
== 'n' and kana
[0] in vowels
:
113 characters
.append("'")
115 characters
.append(kana
)
121 if last_kana
== 'sokuon':
122 raise ValueError("Sokuon must be followed by another kana.")
124 characters
.append(char
)
131 if last_kana
== 'sokuon':
132 raise ValueError("Sokuon cannot be the last character.")
134 return unicode(''.join(characters
))