class TwitterCldr::Resources::SegmentDictionariesImporter
Constants
- DICTIONARY_FILES
- URL_TEMPLATE
Public Instance Methods
execute()
click to toggle source
# File lib/twitter_cldr/resources/segment_dictionaries_importer.rb, line 27 def execute FileUtils.mkdir_p(output_path) DICTIONARY_FILES.each do |test_file| import_dictionary_file(test_file) end end
Private Instance Methods
import_dictionary_file(dictionary_file)
click to toggle source
# File lib/twitter_cldr/resources/segment_dictionaries_importer.rb, line 37 def import_dictionary_file(dictionary_file) source_url = url_for(dictionary_file) source = URI.open(source_url).read lines = source.split("\n") trie = TwitterCldr::Utils::Trie.new space_regexp = TwitterCldr::Shared::UnicodeRegex.compile('\A[[:Z:][:C:]]+').to_regexp lines.each do |line| line.sub!(space_regexp, '') next if line.start_with?('#') characters, frequency = line.split("\t") frequency = frequency ? frequency.to_i : 0 trie.add(characters.unpack('U*'), frequency) end output_path = output_path_for(dictionary_file) File.write(output_path, Marshal.dump(trie)) end
output_path()
click to toggle source
# File lib/twitter_cldr/resources/segment_dictionaries_importer.rb, line 70 def output_path params.fetch(:output_path) end
output_path_for(dictionary_file)
click to toggle source
# File lib/twitter_cldr/resources/segment_dictionaries_importer.rb, line 65 def output_path_for(dictionary_file) file = File.basename(dictionary_file).chomp(File.extname(dictionary_file)) File.join(output_path, "#{file}.dump") end
url_for(dictionary_file)
click to toggle source
# File lib/twitter_cldr/resources/segment_dictionaries_importer.rb, line 58 def url_for(dictionary_file) URL_TEMPLATE % { icu_version: "release-#{Versions.icu_version.gsub('.', '-')}", path: dictionary_file } end