class TwitterCldr::Shared::Hyphenator

Constants

BASE_RESOURCE_PATH
DEFAULT_LEFT_HYPHEN_MIN
DEFAULT_NO_HYPHEN
DEFAULT_RIGHT_HYPHEN_MIN

Attributes

locale[R]
options[R]
rules[R]
trie[R]

Public Class Methods

get(locale) click to toggle source
# File lib/twitter_cldr/shared/hyphenator.rb, line 19
def get(locale)
  locale = find_supported_locale(locale)

  unless locale
    raise UnsupportedLocaleError,
      "'#{locale}' is not a supported hyphenation locale"
  end

  cache[locale] ||= begin
    resource = resource_for(locale)
    new(resource[:rules], locale, resource[:options])
  end
end
new(rules, locale, options) click to toggle source
# File lib/twitter_cldr/shared/hyphenator.rb, line 71
def initialize(rules, locale, options)
  @rules = rules
  @locale = locale
  @options = options
  @trie = build_trie_from(rules)
end
supported_locale?(locale) click to toggle source
# File lib/twitter_cldr/shared/hyphenator.rb, line 33
def supported_locale?(locale)
  !!find_supported_locale(locale)
end
supported_locales() click to toggle source
# File lib/twitter_cldr/shared/hyphenator.rb, line 37
def supported_locales
  @supported_locales ||= begin
    absolute_resource_path = TwitterCldr.absolute_resource_path(
      File.join(BASE_RESOURCE_PATH)
    )

    files = Dir.glob(File.join(absolute_resource_path, '*.yml'))
    files.map { |f| File.basename(f).chomp('.yml') }
  end
end

Private Class Methods

cache() click to toggle source
# File lib/twitter_cldr/shared/hyphenator.rb, line 60
def cache
  @cache ||= {}
end
find_supported_locale(locale) click to toggle source
# File lib/twitter_cldr/shared/hyphenator.rb, line 50
def find_supported_locale(locale)
  maximized_locale = Locale.parse(locale.to_s).maximize

  maximized_locale.permutations('-').find do |locale_candidate|
    TwitterCldr.resource_exists?(
      *BASE_RESOURCE_PATH, locale_candidate
    )
  end
end
resource_for(locale) click to toggle source
# File lib/twitter_cldr/shared/hyphenator.rb, line 64
def resource_for(locale)
  TwitterCldr.get_resource(*BASE_RESOURCE_PATH, locale)
end

Public Instance Methods

each_chunk(text) { |tap { last_pos = pos }| ... } click to toggle source
# File lib/twitter_cldr/shared/hyphenator.rb, line 83
def each_chunk(text)
  if block_given?
    last_pos = 0

    each_position(text) do |pos|
      yield text[last_pos...pos].tap { last_pos = pos }
    end

    if last_pos < text.size
      yield text[last_pos..text.size]
    end
  else
    to_enum(__method__, text)
  end
end
each_position(text) { |idx| ... } click to toggle source
# File lib/twitter_cldr/shared/hyphenator.rb, line 99
def each_position(text)
  if block_given?
    text = ".#{text}."
    break_weights = break_weights_for(text)

    left = left_hyphen_min
    right = text.size - right_hyphen_min - 2

    (left...right).each do |idx|
      yield idx if break_weights[idx].odd?
    end
  else
    to_enum(__method__, text)
  end
end
hyphenate(text, hyphen = "\u00AD") click to toggle source

0x00AD is a soft hyphen

# File lib/twitter_cldr/shared/hyphenator.rb, line 79
def hyphenate(text, hyphen = "\u00AD")
  each_chunk(text).to_a.join(hyphen)
end

Private Instance Methods

break_weights_for(text) click to toggle source
# File lib/twitter_cldr/shared/hyphenator.rb, line 117
def break_weights_for(text)
  break_weights = Array.new(text.size - 1, 0)

  text.each_char.with_index do |char, idx|
    subtrie = trie.root
    counter = idx

    while subtrie
      subtrie = subtrie.child(text[counter])
      counter += 1

      if subtrie && subtrie.has_value?
        update_break_weights(subtrie.value, break_weights, idx)
      end
    end
  end

  remove_illegal_hyphens(break_weights, text)
end
build_trie_from(rules) click to toggle source
# File lib/twitter_cldr/shared/hyphenator.rb, line 179
def build_trie_from(rules)
  TwitterCldr::Utils::Trie.new.tap do |trie|
    rules.each do |rule|
      trie.add(rule.gsub(/\d/, '').each_char, rule)
    end
  end
end
left_hyphen_min() click to toggle source
# File lib/twitter_cldr/shared/hyphenator.rb, line 165
def left_hyphen_min
  @left_hyphen_min ||=
    options.fetch(:lefthyphenmin, DEFAULT_LEFT_HYPHEN_MIN).to_i
end
no_hyphen() click to toggle source
# File lib/twitter_cldr/shared/hyphenator.rb, line 175
def no_hyphen
  @no_hyphen ||= options.fetch(:nohyphen, DEFAULT_NO_HYPHEN)
end
remove_illegal_hyphens(break_weights, text) click to toggle source
# File lib/twitter_cldr/shared/hyphenator.rb, line 157
def remove_illegal_hyphens(break_weights, text)
  break_weights.map.with_index do |break_weight, idx|
    next break_weight if idx.zero?
    next 0 if no_hyphen.include?(text[idx - 1])
    break_weight
  end
end
right_hyphen_min() click to toggle source
# File lib/twitter_cldr/shared/hyphenator.rb, line 170
def right_hyphen_min
  @right_hyphen_min ||=
    options.fetch(:righthyphenmin, DEFAULT_RIGHT_HYPHEN_MIN).to_i
end
update_break_weights(pattern, break_weights, start_idx) click to toggle source
# File lib/twitter_cldr/shared/hyphenator.rb, line 137
def update_break_weights(pattern, break_weights, start_idx)
  pattern_idx = 0

  pattern.each_char do |segment|
    if segment =~ /\d/
      int_seg = segment.to_i
      idx = (start_idx + pattern_idx) - 1
      break if idx >= break_weights.size

      break_weights[idx] = if break_weights[idx] > int_seg
        break_weights[idx]
      else
        int_seg
      end
    else
      pattern_idx += 1
    end
  end
end