class TwitterCldr::Resources::TailoringImporter

Constants

EMPTY_TAILORING_DATA
IGNORED_TAGS
LAST_BYTE_MASK
LEVEL_RULE_REGEXP
LOCALES_MAP
SIMPLE_RULES
SUPPORTED_RULES

Private Instance Methods

collation_element_iterator_class() click to toggle source
# File lib/twitter_cldr/resources/tailoring_importer.rb, line 140
def collation_element_iterator_class
  @collation_element_iterator_class ||= get_class('com.ibm.icu.text.CollationElementIterator')
end
collator_class() click to toggle source
# File lib/twitter_cldr/resources/tailoring_importer.rb, line 132
def collator_class
  @collator_class ||= get_class('com.ibm.icu.text.Collator')
end
dump(locale, data) click to toggle source
# File lib/twitter_cldr/resources/tailoring_importer.rb, line 64
def dump(locale, data)
  File.open(resource_file_path(locale), 'w') { |file| YAML.dump(data, file) }
end
execute() click to toggle source
# File lib/twitter_cldr/resources/tailoring_importer.rb, line 45
def execute
  FileUtils.mkdir_p(params[:output_path])
  params[:locales].each { |locale| import_locale(locale) }
end
get_class(name) click to toggle source
# File lib/twitter_cldr/resources/tailoring_importer.rb, line 128
def get_class(name)
  requirements[:icu].get_class(name)
end
get_code_points(string) click to toggle source
# File lib/twitter_cldr/resources/tailoring_importer.rb, line 234
def get_code_points(string)
  TwitterCldr::Utils::CodePoints.from_string(TwitterCldr::Normalization.normalize(string))
end
get_collation_elements(collator, string) click to toggle source
# File lib/twitter_cldr/resources/tailoring_importer.rb, line 212
def get_collation_elements(collator, string)
  iter = collator.get_collation_element_iterator(string)

  collation_elements = []
  ce = iter.next

  while ce != collation_element_iterator_class::NULLORDER
    p1 = (ce >> 24) & LAST_BYTE_MASK
    p2 = (ce >> 16) & LAST_BYTE_MASK

    primary   = p2.zero? ? p1 : (p1 << 8) + p2
    secondary = (ce >> 8) & LAST_BYTE_MASK
    tertiarly = ce & LAST_BYTE_MASK

    collation_elements << [primary, secondary, tertiarly]

    ce = iter.next
  end

  collation_elements
end
get_collation_rules(collations, collation_type) click to toggle source
# File lib/twitter_cldr/resources/tailoring_importer.rb, line 119
def get_collation_rules(collations, collation_type)
  collations.at_xpath(%Q(collation[@type="#{collation_type || 'standard'}"]))
end
get_collation_xml(locale) click to toggle source
# File lib/twitter_cldr/resources/tailoring_importer.rb, line 115
def get_collation_xml(locale)
  File.open(locale_file_path(locale)) { |file| Nokogiri::XML(file) }
end
get_default_collation_type(collations) click to toggle source
# File lib/twitter_cldr/resources/tailoring_importer.rb, line 123
def get_default_collation_type(collations)
  default_type_node = collations.at_xpath('default[@type]')
  default_type_node && default_type_node.attr('type')
end
import_locale(locale) click to toggle source
# File lib/twitter_cldr/resources/tailoring_importer.rb, line 50
def import_locale(locale)
  print "Importing %8s\t--\t" % locale

  if tailoring_present?(locale)
    dump(locale, tailoring_data(locale))
    puts "Done."
  else
    dump(locale, EMPTY_TAILORING_DATA)
    puts "Missing (generated empty tailoring resource)."
  end
rescue ImportError => e
  puts "Error: #{e.message}"
end
locale_file_path(locale) click to toggle source
# File lib/twitter_cldr/resources/tailoring_importer.rb, line 76
def locale_file_path(locale)
  File.join(
    requirements[:cldr].common_path, 'collation', "#{translated_locale(locale)}.xml"
  )
end
parse_collator_options(data) click to toggle source
# File lib/twitter_cldr/resources/tailoring_importer.rb, line 195
def parse_collator_options(data)
  options = {}

  if data
    case_first_setting = data.at_xpath('settings[@caseFirst]')
    options['case_first'] = case_first_setting.attr('caseFirst').to_sym if case_first_setting
  end

  TwitterCldr::Utils.deep_symbolize_keys(options)
end
parse_suppressed_contractions(data) click to toggle source
# File lib/twitter_cldr/resources/tailoring_importer.rb, line 190
def parse_suppressed_contractions(data)
  node = data && data.at_xpath('suppress_contractions')
  node ? unicode_set_class.to_array(unicode_set_class.new(node.text)).to_a.join : ''
end
parse_tailorings(data, locale) click to toggle source
# File lib/twitter_cldr/resources/tailoring_importer.rb, line 144
def parse_tailorings(data, locale)
  rules = data && data.at_xpath('rules')

  return '' unless rules

  collator = collator_class.get_instance(Java::JavaUtil::Locale.new(locale.to_s))

  rules.children.map do |child|
    validate_tailoring_rule(child)

    if child.name =~ LEVEL_RULE_REGEXP
      if $2.empty?
        table_entry_for_rule(collator, child.text)
      else
        child.text.chars.map { |char| table_entry_for_rule(collator, char) }
      end
    elsif child.name == 'x'
      context = ''
      child.children.inject([]) do |memo, c|
        if SIMPLE_RULES.include?(c.name)
          memo << table_entry_for_rule(collator, context + c.text)
        elsif c.name == 'context'
          context = c.text
        elsif c.name == 'comment'
        elsif c.name != 'extend'
          raise ImportError, "Rule '#{c.name}' inside <x></x> is not supported."
        end

        memo
      end
    else
      raise ImportError, "Tag '#{child.name}' is not supported." unless IGNORED_TAGS.include?(child.name)
    end
  end.flatten.compact.join("\n")
end
resource_file_path(locale) click to toggle source
# File lib/twitter_cldr/resources/tailoring_importer.rb, line 82
def resource_file_path(locale)
  File.join(params[:output_path], "#{locale}.yml")
end
table_entry_for_rule(collator, tailored_value) click to toggle source
# File lib/twitter_cldr/resources/tailoring_importer.rb, line 180
def table_entry_for_rule(collator, tailored_value)
  code_points = get_code_points(tailored_value).map { |cp| cp.to_s(16).upcase.rjust(4, '0') }

  collation_elements = get_collation_elements(collator, tailored_value).map do |ce|
    ce.map { |l| l.to_s(16).upcase }.join(', ')
  end

  "#{code_points.join(' ')}; [#{collation_elements.join('][')}]"
end
tailoring_data(locale) click to toggle source
# File lib/twitter_cldr/resources/tailoring_importer.rb, line 86
def tailoring_data(locale)
  doc = get_collation_xml(locale).at_xpath('//collations')

  collations = doc.at_xpath('//collations')

  collation_alias = collations.at_xpath('alias[@path="//ldml/collations"]')
  aliased_locale  = collation_alias && collation_alias.attr('source')

  return tailoring_data(aliased_locale) if aliased_locale

  collation_type  = get_default_collation_type(collations)
  collation_rules = get_collation_rules(collations, collation_type)

  unless collation_rules
    language_type = doc.at_xpath('//identity/language').attr('type')
    # try to fall back to language collation (e.g., from zh-Hant to zh) with the same collation type
    if language_type != locale.to_s
      collations      = get_collation_xml(language_type).at_xpath('//collations')
      collation_rules = get_collation_rules(collations, collation_type)
    end
  end

  {
      collator_options:        parse_collator_options(collation_rules),
      tailored_table:          parse_tailorings(collation_rules, locale),
      suppressed_contractions: parse_suppressed_contractions(collation_rules)
  }
end
tailoring_present?(locale) click to toggle source
# File lib/twitter_cldr/resources/tailoring_importer.rb, line 68
def tailoring_present?(locale)
  File.file?(locale_file_path(locale))
end
translated_locale(locale) click to toggle source
# File lib/twitter_cldr/resources/tailoring_importer.rb, line 72
def translated_locale(locale)
  LOCALES_MAP.fetch(locale, locale)
end
unicode_set_class() click to toggle source
# File lib/twitter_cldr/resources/tailoring_importer.rb, line 136
def unicode_set_class
  @unicode_set_class ||= get_class('com.ibm.icu.text.UnicodeSet')
end
validate_tailoring_rule(rule) click to toggle source
# File lib/twitter_cldr/resources/tailoring_importer.rb, line 206
def validate_tailoring_rule(rule)
  return if IGNORED_TAGS.include?(rule.name)

  raise ImportError, "Rule '#{rule.name}' is not supported." unless SUPPORTED_RULES.include?(rule.name)
end