rhubarb-lip-sync/src/tools/asciiCases.rb

34 lines
1013 B
Ruby

require 'open-uri'
require 'csv'
# Create mapping from ASCII characters to related Unicode characters
mapping = Hash.new{ |hash, key| hash[key] = [] }
url = 'http://www.unicode.org/Public/8.0.0/ucd/UnicodeData.txt'
headers = [:code, :name, :category]
CSV.new(open(url), :col_sep => ';', :headers => headers).each do |row|
code = row[:code].hex
next if code < 0x80
break if code > 0x24f
char = [code].pack('U')
name = row[:name]
match = /^LATIN (CAPITAL|SMALL) LETTER ([A-Z])\b(?!.*\bLETTER\b)/.match(name)
if match
baseChar = match[2]
if match[1] == 'SMALL'
baseChar = (baseChar.ord + 0x20).chr
end
mapping[baseChar] << char
end
end
mapping = mapping.sort.to_h
# Generate asciiCases.cpp
File.open('asciiCases.cpp', 'w') do |file|
file.print "// Generated by #{__FILE__}; don't modify by hand!\n\n"
mapping.each do |asciiChar, unicodeChars|
file.print unicodeChars.map { |c| "case U'#{c}':" }.join(' '), "\n"
file.print "\treturn '#{asciiChar}';\n"
end
end