u'ダ': 'da', u'ヂ': 'ji', u'ヅ': 'dzu', u'デ': 'de', u'ド': 'do',
u'バ': 'ba', u'ビ': 'bi', u'ブ': 'bu', u'ベ': 'be', u'ボ': 'bo',
u'パ': 'pa', u'ピ': 'pi', u'プ': 'pu', u'ペ': 'pe', u'ポ': 'po',
+ u'ヴ': 'vu',
}
_roomaji_youon = {
u'ァ': 'a', u'ィ': 'i', u'ゥ': 'u', u'ェ': 'e', u'ォ': 'o',
}
_roomaji_small_kana_combos = {
- u'ウィ': 'wi',
+ # These are, by the way, fairly arbitrary. "shi xi" to mean "sy" is
+ # particularly weird, but it seems to be what GF intends
+
+ # Simple vowel replacement
+ u'ウィ': 'wi', u'ウゥ': 'wu', u'ウェ': 'we', u'ウォ': 'wo',
+ u'ヴァ': 'va', u'ヴィ': 'vi', u'ヴェ': 've', u'ヴォ': 'vo',
u'チェ': 'che',
u'シェ': 'she',
+ u'ジェ': 'je',
u'テァ': 'tha', u'ティ': 'ti', u'テゥ': 'thu', u'テェ': 'tye', u'テォ': 'tho',
u'デァ': 'dha', u'ディ': 'di', u'デゥ': 'dhu', u'デェ': 'dye', u'デォ': 'dho',
u'ファ': 'fa', u'フィ': 'fi', u'ホゥ': 'hu', u'フェ': 'fe', u'フォ': 'fo',
+
+ # Not so much
+ u'シィ': 'sy',
+ u'ミィ': 'my',
+ u'ビィ': 'by',
+ u'ピィ': 'py',
}
def romanize(string):
last_char = None # Used for small kana combos
for char in string:
# Full-width Latin
- if ord(char) >= 0xff11 and ord(char) <= 0xff5e:
+ if 0xff01 <= ord(char) <= 0xff5e:
if last_kana == 'sokuon':
raise ValueError("Sokuon cannot precede Latin characters.")
- char = chr(ord(char) - 0xff11 + 0x31)
+ # XXX Real Unicode decomposition would be nicer
+ char = chr(ord(char) - 0xff01 + 0x21)
characters.append(char)
last_kana = None
last_kana = new_char
# Sokuon
- #elif char in (u'っ', u'ッ'):
- elif char in (u'ッ',):
+ elif char in (u'っ', u'ッ'):
# Remember it and double the consonant next time around
last_kana = 'sokuon'