class TwitterCldr::Resources::UnicodeDataImporter
Constants
- BLOCKS_FILE
- CASEFOLDING_DATA_FILE
- UNICODE_DATA_FILE
Public Instance Methods
execute()
click to toggle source
# File lib/twitter_cldr/resources/unicode_data_importer.rb, line 19 def execute blocks = import_blocks unicode_data = import_unicode_data(blocks) casefolding_data = import_casefolding_data STDOUT.write('Writing data to disk... ') FileUtils.mkdir_p(output_path) File.open(File.join(output_path, 'blocks.yml'), 'w') do |output| YAML.dump(blocks, output) end FileUtils.mkdir_p(File.join(output_path, 'blocks')) unicode_data.each do |block_name, code_points| File.open(File.join(output_path, 'blocks', "#{block_name}.yml"), 'w') do |output| YAML.dump(code_points, output) end end File.open(File.join(output_path, 'casefolding.yml'), 'w') do |output| YAML.dump(casefolding_data, output) end puts 'done' end
Private Instance Methods
block_name(string)
click to toggle source
# File lib/twitter_cldr/resources/unicode_data_importer.rb, line 122 def block_name(string) string.strip.downcase.gsub(/[\s-]/, '_') end
blocks_file()
click to toggle source
# File lib/twitter_cldr/resources/unicode_data_importer.rb, line 114 def blocks_file requirements[:unicode].source_path_for(BLOCKS_FILE) end
casefold_data_file()
click to toggle source
# File lib/twitter_cldr/resources/unicode_data_importer.rb, line 106 def casefold_data_file requirements[:unicode].source_path_for(CASEFOLDING_DATA_FILE) end
find_block(blocks, code_point)
click to toggle source
# File lib/twitter_cldr/resources/unicode_data_importer.rb, line 118 def find_block(blocks, code_point) blocks.detect { |_, range| range.include?(code_point) } end
import_blocks()
click to toggle source
# File lib/twitter_cldr/resources/unicode_data_importer.rb, line 53 def import_blocks STDOUT.write('Importing blocks... ') blocks = {} File.open(blocks_file) do |input| input.each_line do |line| next unless line =~ /^([0-9A-F]+)\.\.([0-9A-F]+);(.+)$/ range = ($1.hex..$2.hex) name = block_name($3) blocks[name.to_sym] = range end end puts 'done' blocks end
import_casefolding_data()
click to toggle source
# File lib/twitter_cldr/resources/unicode_data_importer.rb, line 91 def import_casefolding_data STDOUT.write('Importing casefolding data... ') casefolding_data = parse_file(casefold_data_file).map do |data| { source: data[0].hex, target: data[2].split(" ").map(&:hex), status: data[1] } end puts 'done' casefolding_data end
import_unicode_data(blocks)
click to toggle source
# File lib/twitter_cldr/resources/unicode_data_importer.rb, line 76 def import_unicode_data(blocks) STDOUT.write('Importing Unicode data... ') unicode_data = Hash.new do |hash, key| hash[key] = Hash.new { |h, k| h[k] = {} } end parse_file(unicode_data_file) do |data| data[0] = data[0].hex unicode_data[find_block(blocks, data[0]).first][data[0]] = data end puts 'done' unicode_data end
output_path()
click to toggle source
# File lib/twitter_cldr/resources/unicode_data_importer.rb, line 49 def output_path params.fetch(:output_path) end
parse_file(file, &block)
click to toggle source
# File lib/twitter_cldr/resources/unicode_data_importer.rb, line 72 def parse_file(file, &block) UnicodeFileParser.parse_standard_file(file, &block) end
unicode_data_file()
click to toggle source
# File lib/twitter_cldr/resources/unicode_data_importer.rb, line 110 def unicode_data_file requirements[:unicode].source_path_for(UNICODE_DATA_FILE) end