require 'open-uri' require 'csv' # Create mapping from ASCII characters to related Unicode characters mapping = Hash.new{ |hash, key| hash[key] = [] } url = 'http://www.unicode.org/Public/8.0.0/ucd/UnicodeData.txt' headers = [:code, :name, :category] CSV.new(open(url), :col_sep => ';', :headers => headers).each do |row| code = row[:code].hex next if code < 0x80 break if code > 0x24f char = [code].pack('U') name = row[:name] match = /^LATIN (CAPITAL|SMALL) LETTER ([A-Z])\b(?!.*\bLETTER\b)/.match(name) if match baseChar = match[2] if match[1] == 'SMALL' baseChar = (baseChar.ord + 0x20).chr end mapping[baseChar] << char end end mapping = mapping.sort.to_h # Generate asciiCases.cpp File.open('asciiCases.cpp', 'w') do |file| file.print "// Generated by #{__FILE__}; don't modify by hand!\n\n" mapping.each do |asciiChar, unicodeChars| file.print unicodeChars.map { |c| "case U'#{c}':" }.join(' '), "\n" file.print "\treturn '#{asciiChar}';\n" end end