class TwitterCldr::Segmentation::Dictionary

Attributes

trie[R]

Public Class Methods

burmese() click to toggle source
# File lib/twitter_cldr/segmentation/dictionary.rb, line 11
def burmese
  get('burmese')
end
cj() click to toggle source
# File lib/twitter_cldr/segmentation/dictionary.rb, line 15
def cj
  get('cj')
end
get(name) click to toggle source
# File lib/twitter_cldr/segmentation/dictionary.rb, line 31
def get(name)
  dictionary_cache[name] ||= begin
    resource = TwitterCldr.get_resource(
      'shared', 'segments', 'dictionaries', "#{name}dict.dump"
    )

    new(resource)
  end
end
khmer() click to toggle source
# File lib/twitter_cldr/segmentation/dictionary.rb, line 19
def khmer
  get('khmer')
end
lao() click to toggle source
# File lib/twitter_cldr/segmentation/dictionary.rb, line 23
def lao
  get('lao')
end
new(trie) click to toggle source
# File lib/twitter_cldr/segmentation/dictionary.rb, line 50
def initialize(trie)
  @trie = trie
end
thai() click to toggle source
# File lib/twitter_cldr/segmentation/dictionary.rb, line 27
def thai
  get('thai')
end

Private Class Methods

dictionary_cache() click to toggle source
# File lib/twitter_cldr/segmentation/dictionary.rb, line 43
def dictionary_cache
  @dictionary_cache ||= {}
end

Public Instance Methods

matches(cursor, max_search_length, limit) click to toggle source
# File lib/twitter_cldr/segmentation/dictionary.rb, line 54
def matches(cursor, max_search_length, limit)
  return 0 if cursor.length == 0

  count = 0
  num_chars = 1
  current = trie.root.child(cursor.codepoint)
  values = []
  lengths = []

  until current.nil?
    if current.has_value? && count < limit
      values << current.value
      lengths << num_chars
      count += 1
    end

    break if num_chars >= max_search_length

    current = current.child(
      cursor.codepoint(cursor.position + num_chars)
    )

    num_chars += 1
  end

  [count, values, lengths, num_chars]
end