2 """Provides `romanize()` for romanizing simple Japanese text."""
6 u
'あ': 'a', u
'い': 'i', u
'う': 'u', u
'え': 'e', u
'お': 'o',
7 u
'か': 'ka', u
'き': 'ki', u
'く': 'ku', u
'け': 'ke', u
'こ': 'ko',
8 u
'さ': 'sa', u
'し': 'shi', u
'す': 'su', u
'せ': 'se', u
'そ': 'so',
9 u
'た': 'ta', u
'ち': 'chi', u
'つ': 'tsu', u
'て': 'te', u
'と': 'to',
10 u
'な': 'na', u
'に': 'ni', u
'ぬ': 'nu', u
'ね': 'ne', u
'の': 'no',
11 u
'は': 'ha', u
'ひ': 'hi', u
'ふ': 'fu', u
'へ': 'he', u
'ほ': 'ho',
12 u
'ま': 'ma', u
'み': 'mi', u
'む': 'mu', u
'め': 'me', u
'も': 'mo',
13 u
'や': 'ya', u
'ゆ': 'yu', u
'よ': 'yo',
14 u
'ら': 'ra', u
'り': 'ri', u
'る': 'ru', u
'れ': 're', u
'ろ': 'ro',
15 u
'わ': 'wa', u
'ゐ': 'wi', u
'ゑ': 'we', u
'を': 'wo',
17 u
'が': 'ga', u
'ぎ': 'gi', u
'ぐ': 'gu', u
'げ': 'ge', u
'ご': 'go',
18 u
'ざ': 'za', u
'じ': 'ji', u
'ず': 'zu', u
'ぜ': 'ze', u
'ぞ': 'zo',
19 u
'だ': 'da', u
'ぢ': 'ji', u
'づ': 'dzu', u
'で': 'de', u
'ど': 'do',
20 u
'ば': 'ba', u
'び': 'bi', u
'ぶ': 'bu', u
'べ': 'be', u
'ぼ': 'bo',
21 u
'ぱ': 'pa', u
'ぴ': 'pi', u
'ぷ': 'pu', u
'ぺ': 'pe', u
'ぽ': 'po',
24 u
'ア': 'a', u
'イ': 'i', u
'ウ': 'u', u
'エ': 'e', u
'オ': 'o',
25 u
'カ': 'ka', u
'キ': 'ki', u
'ク': 'ku', u
'ケ': 'ke', u
'コ': 'ko',
26 u
'サ': 'sa', u
'シ': 'shi', u
'ス': 'su', u
'セ': 'se', u
'ソ': 'so',
27 u
'タ': 'ta', u
'チ': 'chi', u
'ツ': 'tsu', u
'テ': 'te', u
'ト': 'to',
28 u
'ナ': 'na', u
'ニ': 'ni', u
'ヌ': 'nu', u
'ネ': 'ne', u
'ノ': 'no',
29 u
'ハ': 'ha', u
'ヒ': 'hi', u
'フ': 'fu', u
'ヘ': 'he', u
'ホ': 'ho',
30 u
'マ': 'ma', u
'ミ': 'mi', u
'ム': 'mu', u
'メ': 'me', u
'モ': 'mo',
31 u
'ヤ': 'ya', u
'ユ': 'yu', u
'ヨ': 'yo',
32 u
'ラ': 'ra', u
'リ': 'ri', u
'ル': 'ru', u
'レ': 're', u
'ロ': 'ro',
33 u
'ワ': 'wa', u
'ヰ': 'wi', u
'ヱ': 'we', u
'ヲ': 'wo',
35 u
'ガ': 'ga', u
'ギ': 'gi', u
'グ': 'gu', u
'ゲ': 'ge', u
'ゴ': 'go',
36 u
'ザ': 'za', u
'ジ': 'ji', u
'ズ': 'zu', u
'ゼ': 'ze', u
'ゾ': 'zo',
37 u
'ダ': 'da', u
'ヂ': 'ji', u
'ヅ': 'dzu', u
'デ': 'de', u
'ド': 'do',
38 u
'バ': 'ba', u
'ビ': 'bi', u
'ブ': 'bu', u
'ベ': 'be', u
'ボ': 'bo',
39 u
'パ': 'pa', u
'ピ': 'pi', u
'プ': 'pu', u
'ペ': 'pe', u
'ポ': 'po',
45 u
'ゃ': 'ya', u
'ゅ': 'yu', u
'ょ': 'yo',
48 u
'ャ': 'ya', u
'ュ': 'yu', u
'ョ': 'yo',
51 # XXX If romanize() ever handles hiragana, it will need to make sure that the
52 # preceding character was a katakana
53 # This does not include every small kana combination, but should include every
54 # one used in a Pokémon name. An exhaustive list would be.. very long
55 _roomaji_small_kana
= {
56 u
'ァ': 'a', u
'ィ': 'i', u
'ゥ': 'u', u
'ェ': 'e', u
'ォ': 'o',
58 _roomaji_small_kana_combos
= {
59 # These are, by the way, fairly arbitrary. "shi xi" to mean "sy" is
60 # particularly weird, but it seems to be what GF intends
62 # Simple vowel replacement
63 u
'ウィ': 'wi', u
'ウゥ': 'wu', u
'ウェ': 'we', u
'ウォ': 'wo',
64 u
'ヴァ': 'va', u
'ヴィ': 'vi', u
'ヴェ': 've', u
'ヴォ': 'vo',
68 u
'テァ': 'tha', u
'ティ': 'ti', u
'テゥ': 'thu', u
'テェ': 'tye', u
'テォ': 'tho',
69 u
'デァ': 'dha', u
'ディ': 'di', u
'デゥ': 'dhu', u
'デェ': 'dye', u
'デォ': 'dho',
70 u
'ファ': 'fa', u
'フィ': 'fi', u
'ホゥ': 'hu', u
'フェ': 'fe', u
'フォ': 'fo',
80 """Converts a string of kana to roomaji."""
82 vowels
= ['a', 'e', 'i', 'o', 'u', 'y']
85 last_kana
= None # Used for ー; っ or ッ; ん or ン
86 last_char
= None # Used for small kana combos
89 if 0xff01 <= ord(char
) <= 0xff5e:
90 if last_kana
== 'sokuon':
91 raise ValueError("Sokuon cannot precede Latin characters.")
93 # XXX Real Unicode decomposition would be nicer
94 char
= chr(ord(char
) - 0xff01 + 0x21)
95 characters
.append(char
)
100 elif char
in _roomaji_small_kana
:
101 combo
= last_char
+ char
102 if combo
in _roomaji_small_kana_combos
:
103 characters
[-1] = _roomaji_small_kana_combos
[combo
]
106 # If we don't know what it is... act dumb and treat it as a
107 # full-size vowel. Better than bailing, and seems to occur a
108 # lot, e.g. ピィ is "pii"
109 characters
.append(_roomaji_small_kana
[char
])
111 last_kana
= _roomaji_small_kana
[char
]
114 elif char
in _roomaji_youon
:
115 if not last_kana
or last_kana
[-1] != 'i' or last_kana
== 'i':
116 raise ValueError("Youon must follow an -i sound.")
118 # Drop the -i and append the ya/yu/yo sound
119 new_sound
= _roomaji_youon
[char
]
120 if last_kana
in ['chi', 'shi', 'ji']:
122 new_char
= last_kana
[:-1] + new_sound
[1:]
124 new_char
= last_kana
[:-1] + new_sound
126 characters
[-1] = new_char
130 elif char
in (u
'っ', u
'ッ'):
131 # Remember it and double the consonant next time around
134 # Extended vowel or n
136 if last_kana
[-1] not in vowels
:
137 raise ValueError(u
"'ー' must follow by a vowel.")
138 characters
.append(last_kana
[-1])
143 elif char
in _roomaji_kana
:
144 kana
= _roomaji_kana
[char
]
146 if last_kana
== 'sokuon':
147 if kana
[0] in vowels
:
148 raise ValueError("Sokuon cannot precede a vowel.")
150 characters
.append(kana
[0])
151 elif last_kana
== 'n' and kana
[0] in vowels
:
152 characters
.append("'")
154 characters
.append(kana
)
160 if last_kana
== 'sokuon':
161 raise ValueError("Sokuon must be followed by another kana.")
163 characters
.append(char
)
170 if last_kana
== 'sokuon':
171 raise ValueError("Sokuon cannot be the last character.")
173 return unicode(''.join(characters
))