class TwitterCldr::Resources::SegmentTestsImporter

Constants

CONFORMANCE_FILES
DICTIONARY_BREAK_SAMPLES

Public Instance Methods

execute() click to toggle source
# File lib/twitter_cldr/resources/segment_tests_importer.rb, line 84
def execute
  import_conformance_files
  import_dictionary_break_tests
  import_combined_dictionary_break_test
end

Private Instance Methods

break_iterator() click to toggle source
# File lib/twitter_cldr/resources/segment_tests_importer.rb, line 166
def break_iterator
  @break_iterator ||= requirements[:icu].get_class('com.ibm.icu.text.BreakIterator')
end
conformance_output_path_for(conformance_file) click to toggle source
# File lib/twitter_cldr/resources/segment_tests_importer.rb, line 149
def conformance_output_path_for(conformance_file)
  file = underscore(File.basename(conformance_file).chomp(File.extname(conformance_file)))
  File.join(params.fetch(:output_path), "#{file}.yml")
end
conformance_source_path_for(conformance_file) click to toggle source
# File lib/twitter_cldr/resources/segment_tests_importer.rb, line 145
def conformance_source_path_for(conformance_file)
  requirements[:unicode].source_path_for(conformance_file)
end
create_dictionary_break_test(locale, text_sample) click to toggle source
# File lib/twitter_cldr/resources/segment_tests_importer.rb, line 120
def create_dictionary_break_test(locale, text_sample)
  done = break_iterator.const_get(:DONE)
  iter = break_iterator.get_word_instance(ulocale_class.new(locale))
  iter.set_text(text_sample)
  start = iter.first
  segments = []

  until (stop = iter.next) == done
    segments << text_sample[start...stop]
    start = stop
  end

  {
    locale: locale,
    text: text_sample,
    segments: segments
  }
end
dictionary_test_output_path_for(locale) click to toggle source
# File lib/twitter_cldr/resources/segment_tests_importer.rb, line 154
def dictionary_test_output_path_for(locale)
  File.join(params.fetch(:output_path), 'dictionary_tests', "#{locale}.yml")
end
dump_dictionary_break_test(name, data) click to toggle source
# File lib/twitter_cldr/resources/segment_tests_importer.rb, line 139
def dump_dictionary_break_test(name, data)
  output_file = dictionary_test_output_path_for(name)
  FileUtils.mkdir_p(File.dirname(output_file))
  File.write(output_file, YAML.dump(data))
end
import_combined_dictionary_break_test() click to toggle source
# File lib/twitter_cldr/resources/segment_tests_importer.rb, line 114
def import_combined_dictionary_break_test
  text_sample = DICTIONARY_BREAK_SAMPLES.values.join(' ')
  data = create_dictionary_break_test('en', text_sample)
  dump_dictionary_break_test('combined', data)
end
import_conformance_file(conformance_file) click to toggle source
# File lib/twitter_cldr/resources/segment_tests_importer.rb, line 98
def import_conformance_file(conformance_file)
  source_file = conformance_source_path_for(conformance_file)
  FileUtils.mkdir_p(File.dirname(source_file))
  result = UnicodeFileParser.parse_standard_file(source_file).map(&:first)
  output_file = conformance_output_path_for(conformance_file)
  FileUtils.mkdir_p(File.dirname(output_file))
  File.write(output_file, YAML.dump(result))
end
import_conformance_files() click to toggle source
# File lib/twitter_cldr/resources/segment_tests_importer.rb, line 92
def import_conformance_files
  CONFORMANCE_FILES.each do |test_file|
    import_conformance_file(test_file)
  end
end
import_dictionary_break_tests() click to toggle source
# File lib/twitter_cldr/resources/segment_tests_importer.rb, line 107
def import_dictionary_break_tests
  DICTIONARY_BREAK_SAMPLES.each do |locale, text_sample|
    data = create_dictionary_break_test(locale.to_s, text_sample)
    dump_dictionary_break_test(locale, data)
  end
end
ulocale_class() click to toggle source
# File lib/twitter_cldr/resources/segment_tests_importer.rb, line 162
def ulocale_class
  @ulocale_class ||= requirements[:icu].get_class('com.ibm.icu.util.ULocale')
end
underscore(str) click to toggle source
# File lib/twitter_cldr/resources/segment_tests_importer.rb, line 158
def underscore(str)
  str.gsub(/(.)([A-Z])/, '\1_\2').downcase
end