class TwitterCldr::Shared::Hyphenator
Constants
- BASE_RESOURCE_PATH
- DEFAULT_LEFT_HYPHEN_MIN
- DEFAULT_NO_HYPHEN
- DEFAULT_RIGHT_HYPHEN_MIN
Attributes
locale[R]
options[R]
rules[R]
trie[R]
Public Class Methods
get(locale)
click to toggle source
# File lib/twitter_cldr/shared/hyphenator.rb, line 19 def get(locale) locale = find_supported_locale(locale) unless locale raise UnsupportedLocaleError, "'#{locale}' is not a supported hyphenation locale" end cache[locale] ||= begin resource = resource_for(locale) new(resource[:rules], locale, resource[:options]) end end
new(rules, locale, options)
click to toggle source
# File lib/twitter_cldr/shared/hyphenator.rb, line 71 def initialize(rules, locale, options) @rules = rules @locale = locale @options = options @trie = build_trie_from(rules) end
supported_locale?(locale)
click to toggle source
# File lib/twitter_cldr/shared/hyphenator.rb, line 33 def supported_locale?(locale) !!find_supported_locale(locale) end
supported_locales()
click to toggle source
# File lib/twitter_cldr/shared/hyphenator.rb, line 37 def supported_locales @supported_locales ||= begin absolute_resource_path = TwitterCldr.absolute_resource_path( File.join(BASE_RESOURCE_PATH) ) files = Dir.glob(File.join(absolute_resource_path, '*.yml')) files.map { |f| File.basename(f).chomp('.yml') } end end
Private Class Methods
cache()
click to toggle source
# File lib/twitter_cldr/shared/hyphenator.rb, line 60 def cache @cache ||= {} end
find_supported_locale(locale)
click to toggle source
# File lib/twitter_cldr/shared/hyphenator.rb, line 50 def find_supported_locale(locale) maximized_locale = Locale.parse(locale.to_s).maximize maximized_locale.permutations('-').find do |locale_candidate| TwitterCldr.resource_exists?( *BASE_RESOURCE_PATH, locale_candidate ) end end
resource_for(locale)
click to toggle source
# File lib/twitter_cldr/shared/hyphenator.rb, line 64 def resource_for(locale) TwitterCldr.get_resource(*BASE_RESOURCE_PATH, locale) end
Public Instance Methods
each_chunk(text) { |tap { last_pos = pos }| ... }
click to toggle source
# File lib/twitter_cldr/shared/hyphenator.rb, line 83 def each_chunk(text) if block_given? last_pos = 0 each_position(text) do |pos| yield text[last_pos...pos].tap { last_pos = pos } end if last_pos < text.size yield text[last_pos..text.size] end else to_enum(__method__, text) end end
each_position(text) { |idx| ... }
click to toggle source
# File lib/twitter_cldr/shared/hyphenator.rb, line 99 def each_position(text) if block_given? text = ".#{text}." break_weights = break_weights_for(text) left = left_hyphen_min right = text.size - right_hyphen_min - 2 (left...right).each do |idx| yield idx if break_weights[idx].odd? end else to_enum(__method__, text) end end
hyphenate(text, hyphen = "\u00AD")
click to toggle source
0x00AD is a soft hyphen
# File lib/twitter_cldr/shared/hyphenator.rb, line 79 def hyphenate(text, hyphen = "\u00AD") each_chunk(text).to_a.join(hyphen) end
Private Instance Methods
break_weights_for(text)
click to toggle source
# File lib/twitter_cldr/shared/hyphenator.rb, line 117 def break_weights_for(text) break_weights = Array.new(text.size - 1, 0) text.each_char.with_index do |char, idx| subtrie = trie.root counter = idx while subtrie subtrie = subtrie.child(text[counter]) counter += 1 if subtrie && subtrie.has_value? update_break_weights(subtrie.value, break_weights, idx) end end end remove_illegal_hyphens(break_weights, text) end
build_trie_from(rules)
click to toggle source
# File lib/twitter_cldr/shared/hyphenator.rb, line 179 def build_trie_from(rules) TwitterCldr::Utils::Trie.new.tap do |trie| rules.each do |rule| trie.add(rule.gsub(/\d/, '').each_char, rule) end end end
left_hyphen_min()
click to toggle source
# File lib/twitter_cldr/shared/hyphenator.rb, line 165 def left_hyphen_min @left_hyphen_min ||= options.fetch(:lefthyphenmin, DEFAULT_LEFT_HYPHEN_MIN).to_i end
no_hyphen()
click to toggle source
# File lib/twitter_cldr/shared/hyphenator.rb, line 175 def no_hyphen @no_hyphen ||= options.fetch(:nohyphen, DEFAULT_NO_HYPHEN) end
remove_illegal_hyphens(break_weights, text)
click to toggle source
# File lib/twitter_cldr/shared/hyphenator.rb, line 157 def remove_illegal_hyphens(break_weights, text) break_weights.map.with_index do |break_weight, idx| next break_weight if idx.zero? next 0 if no_hyphen.include?(text[idx - 1]) break_weight end end
right_hyphen_min()
click to toggle source
# File lib/twitter_cldr/shared/hyphenator.rb, line 170 def right_hyphen_min @right_hyphen_min ||= options.fetch(:righthyphenmin, DEFAULT_RIGHT_HYPHEN_MIN).to_i end
update_break_weights(pattern, break_weights, start_idx)
click to toggle source
# File lib/twitter_cldr/shared/hyphenator.rb, line 137 def update_break_weights(pattern, break_weights, start_idx) pattern_idx = 0 pattern.each_char do |segment| if segment =~ /\d/ int_seg = segment.to_i idx = (start_idx + pattern_idx) - 1 break if idx >= break_weights.size break_weights[idx] = if break_weights[idx] > int_seg break_weights[idx] else int_seg end else pattern_idx += 1 end end end