class TwitterCldr::Resources::TailoringImporter
Constants
- EMPTY_TAILORING_DATA
- IGNORED_TAGS
- LAST_BYTE_MASK
- LEVEL_RULE_REGEXP
- LOCALES_MAP
- SIMPLE_RULES
- SUPPORTED_RULES
Private Instance Methods
collation_element_iterator_class()
click to toggle source
# File lib/twitter_cldr/resources/tailoring_importer.rb, line 140 def collation_element_iterator_class @collation_element_iterator_class ||= get_class('com.ibm.icu.text.CollationElementIterator') end
collator_class()
click to toggle source
# File lib/twitter_cldr/resources/tailoring_importer.rb, line 132 def collator_class @collator_class ||= get_class('com.ibm.icu.text.Collator') end
dump(locale, data)
click to toggle source
# File lib/twitter_cldr/resources/tailoring_importer.rb, line 64 def dump(locale, data) File.open(resource_file_path(locale), 'w') { |file| YAML.dump(data, file) } end
execute()
click to toggle source
# File lib/twitter_cldr/resources/tailoring_importer.rb, line 45 def execute FileUtils.mkdir_p(params[:output_path]) params[:locales].each { |locale| import_locale(locale) } end
get_class(name)
click to toggle source
# File lib/twitter_cldr/resources/tailoring_importer.rb, line 128 def get_class(name) requirements[:icu].get_class(name) end
get_code_points(string)
click to toggle source
# File lib/twitter_cldr/resources/tailoring_importer.rb, line 234 def get_code_points(string) TwitterCldr::Utils::CodePoints.from_string(TwitterCldr::Normalization.normalize(string)) end
get_collation_elements(collator, string)
click to toggle source
# File lib/twitter_cldr/resources/tailoring_importer.rb, line 212 def get_collation_elements(collator, string) iter = collator.get_collation_element_iterator(string) collation_elements = [] ce = iter.next while ce != collation_element_iterator_class::NULLORDER p1 = (ce >> 24) & LAST_BYTE_MASK p2 = (ce >> 16) & LAST_BYTE_MASK primary = p2.zero? ? p1 : (p1 << 8) + p2 secondary = (ce >> 8) & LAST_BYTE_MASK tertiarly = ce & LAST_BYTE_MASK collation_elements << [primary, secondary, tertiarly] ce = iter.next end collation_elements end
get_collation_rules(collations, collation_type)
click to toggle source
# File lib/twitter_cldr/resources/tailoring_importer.rb, line 119 def get_collation_rules(collations, collation_type) collations.at_xpath(%Q(collation[@type="#{collation_type || 'standard'}"])) end
get_collation_xml(locale)
click to toggle source
# File lib/twitter_cldr/resources/tailoring_importer.rb, line 115 def get_collation_xml(locale) File.open(locale_file_path(locale)) { |file| Nokogiri::XML(file) } end
get_default_collation_type(collations)
click to toggle source
# File lib/twitter_cldr/resources/tailoring_importer.rb, line 123 def get_default_collation_type(collations) default_type_node = collations.at_xpath('default[@type]') default_type_node && default_type_node.attr('type') end
import_locale(locale)
click to toggle source
# File lib/twitter_cldr/resources/tailoring_importer.rb, line 50 def import_locale(locale) print "Importing %8s\t--\t" % locale if tailoring_present?(locale) dump(locale, tailoring_data(locale)) puts "Done." else dump(locale, EMPTY_TAILORING_DATA) puts "Missing (generated empty tailoring resource)." end rescue ImportError => e puts "Error: #{e.message}" end
locale_file_path(locale)
click to toggle source
# File lib/twitter_cldr/resources/tailoring_importer.rb, line 76 def locale_file_path(locale) File.join( requirements[:cldr].common_path, 'collation', "#{translated_locale(locale)}.xml" ) end
parse_collator_options(data)
click to toggle source
# File lib/twitter_cldr/resources/tailoring_importer.rb, line 195 def parse_collator_options(data) options = {} if data case_first_setting = data.at_xpath('settings[@caseFirst]') options['case_first'] = case_first_setting.attr('caseFirst').to_sym if case_first_setting end TwitterCldr::Utils.deep_symbolize_keys(options) end
parse_suppressed_contractions(data)
click to toggle source
# File lib/twitter_cldr/resources/tailoring_importer.rb, line 190 def parse_suppressed_contractions(data) node = data && data.at_xpath('suppress_contractions') node ? unicode_set_class.to_array(unicode_set_class.new(node.text)).to_a.join : '' end
parse_tailorings(data, locale)
click to toggle source
# File lib/twitter_cldr/resources/tailoring_importer.rb, line 144 def parse_tailorings(data, locale) rules = data && data.at_xpath('rules') return '' unless rules collator = collator_class.get_instance(Java::JavaUtil::Locale.new(locale.to_s)) rules.children.map do |child| validate_tailoring_rule(child) if child.name =~ LEVEL_RULE_REGEXP if $2.empty? table_entry_for_rule(collator, child.text) else child.text.chars.map { |char| table_entry_for_rule(collator, char) } end elsif child.name == 'x' context = '' child.children.inject([]) do |memo, c| if SIMPLE_RULES.include?(c.name) memo << table_entry_for_rule(collator, context + c.text) elsif c.name == 'context' context = c.text elsif c.name == 'comment' elsif c.name != 'extend' raise ImportError, "Rule '#{c.name}' inside <x></x> is not supported." end memo end else raise ImportError, "Tag '#{child.name}' is not supported." unless IGNORED_TAGS.include?(child.name) end end.flatten.compact.join("\n") end
resource_file_path(locale)
click to toggle source
# File lib/twitter_cldr/resources/tailoring_importer.rb, line 82 def resource_file_path(locale) File.join(params[:output_path], "#{locale}.yml") end
table_entry_for_rule(collator, tailored_value)
click to toggle source
# File lib/twitter_cldr/resources/tailoring_importer.rb, line 180 def table_entry_for_rule(collator, tailored_value) code_points = get_code_points(tailored_value).map { |cp| cp.to_s(16).upcase.rjust(4, '0') } collation_elements = get_collation_elements(collator, tailored_value).map do |ce| ce.map { |l| l.to_s(16).upcase }.join(', ') end "#{code_points.join(' ')}; [#{collation_elements.join('][')}]" end
tailoring_data(locale)
click to toggle source
# File lib/twitter_cldr/resources/tailoring_importer.rb, line 86 def tailoring_data(locale) doc = get_collation_xml(locale).at_xpath('//collations') collations = doc.at_xpath('//collations') collation_alias = collations.at_xpath('alias[@path="//ldml/collations"]') aliased_locale = collation_alias && collation_alias.attr('source') return tailoring_data(aliased_locale) if aliased_locale collation_type = get_default_collation_type(collations) collation_rules = get_collation_rules(collations, collation_type) unless collation_rules language_type = doc.at_xpath('//identity/language').attr('type') # try to fall back to language collation (e.g., from zh-Hant to zh) with the same collation type if language_type != locale.to_s collations = get_collation_xml(language_type).at_xpath('//collations') collation_rules = get_collation_rules(collations, collation_type) end end { collator_options: parse_collator_options(collation_rules), tailored_table: parse_tailorings(collation_rules, locale), suppressed_contractions: parse_suppressed_contractions(collation_rules) } end
tailoring_present?(locale)
click to toggle source
# File lib/twitter_cldr/resources/tailoring_importer.rb, line 68 def tailoring_present?(locale) File.file?(locale_file_path(locale)) end
translated_locale(locale)
click to toggle source
# File lib/twitter_cldr/resources/tailoring_importer.rb, line 72 def translated_locale(locale) LOCALES_MAP.fetch(locale, locale) end
unicode_set_class()
click to toggle source
# File lib/twitter_cldr/resources/tailoring_importer.rb, line 136 def unicode_set_class @unicode_set_class ||= get_class('com.ibm.icu.text.UnicodeSet') end
validate_tailoring_rule(rule)
click to toggle source
# File lib/twitter_cldr/resources/tailoring_importer.rb, line 206 def validate_tailoring_rule(rule) return if IGNORED_TAGS.include?(rule.name) raise ImportError, "Rule '#{rule.name}' is not supported." unless SUPPORTED_RULES.include?(rule.name) end