class TwitterCldr::Resources::UnicodeDataImporter

Constants

BLOCKS_FILE
CASEFOLDING_DATA_FILE
UNICODE_DATA_FILE

Public Instance Methods

execute() click to toggle source
# File lib/twitter_cldr/resources/unicode_data_importer.rb, line 19
def execute
  blocks           = import_blocks
  unicode_data     = import_unicode_data(blocks)
  casefolding_data = import_casefolding_data

  STDOUT.write('Writing data to disk... ')

  FileUtils.mkdir_p(output_path)

  File.open(File.join(output_path, 'blocks.yml'), 'w') do |output|
    YAML.dump(blocks, output)
  end

  FileUtils.mkdir_p(File.join(output_path, 'blocks'))

  unicode_data.each do |block_name, code_points|
    File.open(File.join(output_path, 'blocks', "#{block_name}.yml"), 'w') do |output|
      YAML.dump(code_points, output)
    end
  end

  File.open(File.join(output_path, 'casefolding.yml'), 'w') do |output|
    YAML.dump(casefolding_data, output)
  end

  puts 'done'
end

Private Instance Methods

block_name(string) click to toggle source
# File lib/twitter_cldr/resources/unicode_data_importer.rb, line 122
def block_name(string)
  string.strip.downcase.gsub(/[\s-]/, '_')
end
blocks_file() click to toggle source
# File lib/twitter_cldr/resources/unicode_data_importer.rb, line 114
def blocks_file
  requirements[:unicode].source_path_for(BLOCKS_FILE)
end
casefold_data_file() click to toggle source
# File lib/twitter_cldr/resources/unicode_data_importer.rb, line 106
def casefold_data_file
  requirements[:unicode].source_path_for(CASEFOLDING_DATA_FILE)
end
find_block(blocks, code_point) click to toggle source
# File lib/twitter_cldr/resources/unicode_data_importer.rb, line 118
def find_block(blocks, code_point)
  blocks.detect { |_, range| range.include?(code_point) }
end
import_blocks() click to toggle source
# File lib/twitter_cldr/resources/unicode_data_importer.rb, line 53
def import_blocks
  STDOUT.write('Importing blocks... ')
  blocks = {}

  File.open(blocks_file) do |input|
    input.each_line do |line|
      next unless line =~ /^([0-9A-F]+)\.\.([0-9A-F]+);(.+)$/

      range = ($1.hex..$2.hex)
      name  = block_name($3)

      blocks[name.to_sym] = range
    end
  end

  puts 'done'
  blocks
end
import_casefolding_data() click to toggle source
# File lib/twitter_cldr/resources/unicode_data_importer.rb, line 91
def import_casefolding_data
  STDOUT.write('Importing casefolding data... ')

  casefolding_data = parse_file(casefold_data_file).map do |data|
    {
      source: data[0].hex,
      target: data[2].split(" ").map(&:hex),
      status: data[1]
    }
  end

  puts 'done'
  casefolding_data
end
import_unicode_data(blocks) click to toggle source
# File lib/twitter_cldr/resources/unicode_data_importer.rb, line 76
def import_unicode_data(blocks)
  STDOUT.write('Importing Unicode data... ')
  unicode_data = Hash.new do |hash, key|
    hash[key] = Hash.new { |h, k| h[k] = {} }
  end

  parse_file(unicode_data_file) do |data|
    data[0] = data[0].hex
    unicode_data[find_block(blocks, data[0]).first][data[0]] = data
  end

  puts 'done'
  unicode_data
end
output_path() click to toggle source
# File lib/twitter_cldr/resources/unicode_data_importer.rb, line 49
def output_path
  params.fetch(:output_path)
end
parse_file(file, &block) click to toggle source
# File lib/twitter_cldr/resources/unicode_data_importer.rb, line 72
def parse_file(file, &block)
  UnicodeFileParser.parse_standard_file(file, &block)
end
unicode_data_file() click to toggle source
# File lib/twitter_cldr/resources/unicode_data_importer.rb, line 110
def unicode_data_file
  requirements[:unicode].source_path_for(UNICODE_DATA_FILE)
end