G2P: Fixed generated rules

This commit is contained in:
Daniel Wolf 2016-12-20 21:20:53 +01:00
parent 07fe549f42
commit faea06933b
2 changed files with 9 additions and 8 deletions

View File

@ -5,7 +5,7 @@
//
// get rid of some digraphs
{ wregex(L"ch"), L"ç" },
{ wregex(L"sh"), L"$" },
{ wregex(L"sh"), L"$$" },
{ wregex(L"ph"), L"f" },
{ wregex(L"th"), L"+" },
{ wregex(L"qu"), L"kw" },
@ -48,16 +48,16 @@
// sSl can simplify
{ wregex(L"(s)t(l[aeiouäëïöüâêîôûùò@]$)"), L"$1$2" },
// affrication of t + front vowel
{ wregex(L"([bcdfghjklmnpqrstvwxyzç+$ñaeiouäëïöüâêîôûùò@])ci([aeiouäëïöüâêîôûùò@])"), L"$1$$2" },
{ wregex(L"([bcdfghjklmnpqrstvwxyzç+$ñaeiouäëïöüâêîôûùò@])ti([aeiouäëïöüâêîôûùò@])"), L"$1$$2" },
{ wregex(L"([bcdfghjklmnpqrstvwxyzç+$ñaeiouäëïöüâêîôûùò@])ci([aeiouäëïöüâêîôûùò@])"), L"$1$$$2" },
{ wregex(L"([bcdfghjklmnpqrstvwxyzç+$ñaeiouäëïöüâêîôûùò@])ti([aeiouäëïöüâêîôûùò@])"), L"$1$$$2" },
{ wregex(L"([bcdfghjklmnpqrstvwxyzç+$ñaeiouäëïöüâêîôûùò@])tu([aeiouäëïöüâêîôûùò@])"), L"$1çu$2" },
{ wregex(L"([bcdfghjklmnpqrstvwxyzç+$ñaeiouäëïöüâêîôûùò@])tu([rl][aeiouäëïöüâêîôûùò@])"), L"$1çu$2" },
{ wregex(L"([bcdfghjklmnpqrstvwxyzç+$ñ])si(o)"), L"$1$$2" },
{ wregex(L"([bcdfghjklmnpqrstvwxyzç+$ñ])si(o)"), L"$1$$$2" },
{ wregex(L"([aeiouäëïöüâêîôûùò@])si(o)"), L"$1j$2" },
{ wregex(L"([bcdfghjklmnpqrstvwxyzç+$ñ])s(ur)"), L"$1$$2" },
{ wregex(L"([bcdfghjklmnpqrstvwxyzç+$ñ])s(ur)"), L"$1$$$2" },
{ wregex(L"([aeiouäëïöüâêîôûùò@])s(ur)"), L"$1j$2" },
{ wregex(L"(k)s(u[aeiouäëïöüâêîôûùò@])"), L"$1$$2" },
{ wregex(L"(k)s(u[rl])"), L"$1$$2" },
{ wregex(L"(k)s(u[aeiouäëïöüâêîôûùò@])"), L"$1$$$2" },
{ wregex(L"(k)s(u[rl])"), L"$1$$$2" },
// intervocalic s
{ wregex(L"([eiou])s([aeiouäëïöüâêîôûùò@])"), L"$1z$2" },
// al to ol (do this before respelling)

View File

@ -27,6 +27,7 @@ def formatRule(searchValue, replaceValue, contextBegin, contextEnd)
searchValue = Regexp.escape(searchValue)
contextBegin = Regexp.escape(contextBegin)
contextEnd = Regexp.escape(contextEnd)
replaceValue.gsub!(/\$/, '$$')
# Anchors
contextBegin.sub!(/^\\\#/, '^')
@ -66,7 +67,7 @@ def formatRule(searchValue, replaceValue, contextBegin, contextEnd)
end
# Read rules
lines = File.read('../lib/soundchange/english.sc', :encoding => 'iso-8859-1').split(/\r?\n/)
lines = File.read('../../lib/soundchange/english.sc', :encoding => 'iso-8859-1').split(/\r?\n/)
# Parse character class definitions
characterClassLineCount = 0