34 lines
1013 B
Ruby
34 lines
1013 B
Ruby
require 'open-uri'
|
|
require 'csv'
|
|
|
|
# Create mapping from ASCII characters to related Unicode characters
|
|
mapping = Hash.new{ |hash, key| hash[key] = [] }
|
|
url = 'http://www.unicode.org/Public/8.0.0/ucd/UnicodeData.txt'
|
|
headers = [:code, :name, :category]
|
|
CSV.new(open(url), :col_sep => ';', :headers => headers).each do |row|
|
|
code = row[:code].hex
|
|
next if code < 0x80
|
|
break if code > 0x24f
|
|
|
|
char = [code].pack('U')
|
|
name = row[:name]
|
|
match = /^LATIN (CAPITAL|SMALL) LETTER ([A-Z])\b(?!.*\bLETTER\b)/.match(name)
|
|
if match
|
|
baseChar = match[2]
|
|
if match[1] == 'SMALL'
|
|
baseChar = (baseChar.ord + 0x20).chr
|
|
end
|
|
mapping[baseChar] << char
|
|
end
|
|
end
|
|
mapping = mapping.sort.to_h
|
|
|
|
# Generate asciiCases.cpp
|
|
File.open('asciiCases.cpp', 'w') do |file|
|
|
file.print "// Generated by #{__FILE__}; don't modify by hand!\n\n"
|
|
mapping.each do |asciiChar, unicodeChars|
|
|
file.print unicodeChars.map { |c| "case U'#{c}':" }.join(' '), "\n"
|
|
file.print "\treturn '#{asciiChar}';\n"
|
|
end
|
|
end
|