G2P: Fixed generated rules
This commit is contained in:
parent
07fe549f42
commit
faea06933b
|
@ -5,7 +5,7 @@
|
||||||
//
|
//
|
||||||
// get rid of some digraphs
|
// get rid of some digraphs
|
||||||
{ wregex(L"ch"), L"ç" },
|
{ wregex(L"ch"), L"ç" },
|
||||||
{ wregex(L"sh"), L"$" },
|
{ wregex(L"sh"), L"$$" },
|
||||||
{ wregex(L"ph"), L"f" },
|
{ wregex(L"ph"), L"f" },
|
||||||
{ wregex(L"th"), L"+" },
|
{ wregex(L"th"), L"+" },
|
||||||
{ wregex(L"qu"), L"kw" },
|
{ wregex(L"qu"), L"kw" },
|
||||||
|
@ -48,16 +48,16 @@
|
||||||
// sSl can simplify
|
// sSl can simplify
|
||||||
{ wregex(L"(s)t(l[aeiouäëïöüâêîôûùò@]$)"), L"$1$2" },
|
{ wregex(L"(s)t(l[aeiouäëïöüâêîôûùò@]$)"), L"$1$2" },
|
||||||
// affrication of t + front vowel
|
// affrication of t + front vowel
|
||||||
{ wregex(L"([bcdfghjklmnpqrstvwxyzç+$ñaeiouäëïöüâêîôûùò@])ci([aeiouäëïöüâêîôûùò@])"), L"$1$$2" },
|
{ wregex(L"([bcdfghjklmnpqrstvwxyzç+$ñaeiouäëïöüâêîôûùò@])ci([aeiouäëïöüâêîôûùò@])"), L"$1$$$2" },
|
||||||
{ wregex(L"([bcdfghjklmnpqrstvwxyzç+$ñaeiouäëïöüâêîôûùò@])ti([aeiouäëïöüâêîôûùò@])"), L"$1$$2" },
|
{ wregex(L"([bcdfghjklmnpqrstvwxyzç+$ñaeiouäëïöüâêîôûùò@])ti([aeiouäëïöüâêîôûùò@])"), L"$1$$$2" },
|
||||||
{ wregex(L"([bcdfghjklmnpqrstvwxyzç+$ñaeiouäëïöüâêîôûùò@])tu([aeiouäëïöüâêîôûùò@])"), L"$1çu$2" },
|
{ wregex(L"([bcdfghjklmnpqrstvwxyzç+$ñaeiouäëïöüâêîôûùò@])tu([aeiouäëïöüâêîôûùò@])"), L"$1çu$2" },
|
||||||
{ wregex(L"([bcdfghjklmnpqrstvwxyzç+$ñaeiouäëïöüâêîôûùò@])tu([rl][aeiouäëïöüâêîôûùò@])"), L"$1çu$2" },
|
{ wregex(L"([bcdfghjklmnpqrstvwxyzç+$ñaeiouäëïöüâêîôûùò@])tu([rl][aeiouäëïöüâêîôûùò@])"), L"$1çu$2" },
|
||||||
{ wregex(L"([bcdfghjklmnpqrstvwxyzç+$ñ])si(o)"), L"$1$$2" },
|
{ wregex(L"([bcdfghjklmnpqrstvwxyzç+$ñ])si(o)"), L"$1$$$2" },
|
||||||
{ wregex(L"([aeiouäëïöüâêîôûùò@])si(o)"), L"$1j$2" },
|
{ wregex(L"([aeiouäëïöüâêîôûùò@])si(o)"), L"$1j$2" },
|
||||||
{ wregex(L"([bcdfghjklmnpqrstvwxyzç+$ñ])s(ur)"), L"$1$$2" },
|
{ wregex(L"([bcdfghjklmnpqrstvwxyzç+$ñ])s(ur)"), L"$1$$$2" },
|
||||||
{ wregex(L"([aeiouäëïöüâêîôûùò@])s(ur)"), L"$1j$2" },
|
{ wregex(L"([aeiouäëïöüâêîôûùò@])s(ur)"), L"$1j$2" },
|
||||||
{ wregex(L"(k)s(u[aeiouäëïöüâêîôûùò@])"), L"$1$$2" },
|
{ wregex(L"(k)s(u[aeiouäëïöüâêîôûùò@])"), L"$1$$$2" },
|
||||||
{ wregex(L"(k)s(u[rl])"), L"$1$$2" },
|
{ wregex(L"(k)s(u[rl])"), L"$1$$$2" },
|
||||||
// intervocalic s
|
// intervocalic s
|
||||||
{ wregex(L"([eiou])s([aeiouäëïöüâêîôûùò@])"), L"$1z$2" },
|
{ wregex(L"([eiou])s([aeiouäëïöüâêîôûùò@])"), L"$1z$2" },
|
||||||
// al to ol (do this before respelling)
|
// al to ol (do this before respelling)
|
||||||
|
|
|
@ -27,6 +27,7 @@ def formatRule(searchValue, replaceValue, contextBegin, contextEnd)
|
||||||
searchValue = Regexp.escape(searchValue)
|
searchValue = Regexp.escape(searchValue)
|
||||||
contextBegin = Regexp.escape(contextBegin)
|
contextBegin = Regexp.escape(contextBegin)
|
||||||
contextEnd = Regexp.escape(contextEnd)
|
contextEnd = Regexp.escape(contextEnd)
|
||||||
|
replaceValue.gsub!(/\$/, '$$')
|
||||||
|
|
||||||
# Anchors
|
# Anchors
|
||||||
contextBegin.sub!(/^\\\#/, '^')
|
contextBegin.sub!(/^\\\#/, '^')
|
||||||
|
@ -66,7 +67,7 @@ def formatRule(searchValue, replaceValue, contextBegin, contextEnd)
|
||||||
end
|
end
|
||||||
|
|
||||||
# Read rules
|
# Read rules
|
||||||
lines = File.read('../lib/soundchange/english.sc', :encoding => 'iso-8859-1').split(/\r?\n/)
|
lines = File.read('../../lib/soundchange/english.sc', :encoding => 'iso-8859-1').split(/\r?\n/)
|
||||||
|
|
||||||
# Parse character class definitions
|
# Parse character class definitions
|
||||||
characterClassLineCount = 0
|
characterClassLineCount = 0
|
||||||
|
|
Loading…
Reference in New Issue