class TwitterCldr::Resources::SegmentTestsImporter
Constants
- CONFORMANCE_FILES
- DICTIONARY_BREAK_SAMPLES
Public Instance Methods
execute()
click to toggle source
# File lib/twitter_cldr/resources/segment_tests_importer.rb, line 84 def execute import_conformance_files import_dictionary_break_tests import_combined_dictionary_break_test end
Private Instance Methods
break_iterator()
click to toggle source
# File lib/twitter_cldr/resources/segment_tests_importer.rb, line 166 def break_iterator @break_iterator ||= requirements[:icu].get_class('com.ibm.icu.text.BreakIterator') end
conformance_output_path_for(conformance_file)
click to toggle source
# File lib/twitter_cldr/resources/segment_tests_importer.rb, line 149 def conformance_output_path_for(conformance_file) file = underscore(File.basename(conformance_file).chomp(File.extname(conformance_file))) File.join(params.fetch(:output_path), "#{file}.yml") end
conformance_source_path_for(conformance_file)
click to toggle source
# File lib/twitter_cldr/resources/segment_tests_importer.rb, line 145 def conformance_source_path_for(conformance_file) requirements[:unicode].source_path_for(conformance_file) end
create_dictionary_break_test(locale, text_sample)
click to toggle source
# File lib/twitter_cldr/resources/segment_tests_importer.rb, line 120 def create_dictionary_break_test(locale, text_sample) done = break_iterator.const_get(:DONE) iter = break_iterator.get_word_instance(ulocale_class.new(locale)) iter.set_text(text_sample) start = iter.first segments = [] until (stop = iter.next) == done segments << text_sample[start...stop] start = stop end { locale: locale, text: text_sample, segments: segments } end
dictionary_test_output_path_for(locale)
click to toggle source
# File lib/twitter_cldr/resources/segment_tests_importer.rb, line 154 def dictionary_test_output_path_for(locale) File.join(params.fetch(:output_path), 'dictionary_tests', "#{locale}.yml") end
dump_dictionary_break_test(name, data)
click to toggle source
# File lib/twitter_cldr/resources/segment_tests_importer.rb, line 139 def dump_dictionary_break_test(name, data) output_file = dictionary_test_output_path_for(name) FileUtils.mkdir_p(File.dirname(output_file)) File.write(output_file, YAML.dump(data)) end
import_combined_dictionary_break_test()
click to toggle source
# File lib/twitter_cldr/resources/segment_tests_importer.rb, line 114 def import_combined_dictionary_break_test text_sample = DICTIONARY_BREAK_SAMPLES.values.join(' ') data = create_dictionary_break_test('en', text_sample) dump_dictionary_break_test('combined', data) end
import_conformance_file(conformance_file)
click to toggle source
# File lib/twitter_cldr/resources/segment_tests_importer.rb, line 98 def import_conformance_file(conformance_file) source_file = conformance_source_path_for(conformance_file) FileUtils.mkdir_p(File.dirname(source_file)) result = UnicodeFileParser.parse_standard_file(source_file).map(&:first) output_file = conformance_output_path_for(conformance_file) FileUtils.mkdir_p(File.dirname(output_file)) File.write(output_file, YAML.dump(result)) end
import_conformance_files()
click to toggle source
# File lib/twitter_cldr/resources/segment_tests_importer.rb, line 92 def import_conformance_files CONFORMANCE_FILES.each do |test_file| import_conformance_file(test_file) end end
import_dictionary_break_tests()
click to toggle source
# File lib/twitter_cldr/resources/segment_tests_importer.rb, line 107 def import_dictionary_break_tests DICTIONARY_BREAK_SAMPLES.each do |locale, text_sample| data = create_dictionary_break_test(locale.to_s, text_sample) dump_dictionary_break_test(locale, data) end end
ulocale_class()
click to toggle source
# File lib/twitter_cldr/resources/segment_tests_importer.rb, line 162 def ulocale_class @ulocale_class ||= requirements[:icu].get_class('com.ibm.icu.util.ULocale') end
underscore(str)
click to toggle source
# File lib/twitter_cldr/resources/segment_tests_importer.rb, line 158 def underscore(str) str.gsub(/(.)([A-Z])/, '\1_\2').downcase end