class TwitterCldr::Segmentation::PossibleWord

Constants

POSSIBLE_WORD_LIST_MAX

list size, limited by the maximum number of words in the dictionary that form a nested sequence.

Public Class Methods

new() click to toggle source
# File lib/twitter_cldr/segmentation/possible_word.rb, line 13
def initialize
  @lengths = []
  @count = nil
  @offset = -1
end

Public Instance Methods

accept_marked(cursor) click to toggle source

select the currently marked candidate, point after it in the text, and invalidate self

# File lib/twitter_cldr/segmentation/possible_word.rb, line 46
def accept_marked(cursor)
  cursor.position = @offset + @lengths[@mark]
  @lengths[@mark]
end
back_up(cursor) click to toggle source

back up from the current candidate to the next shorter one; return true if that exists and point the text after it

# File lib/twitter_cldr/segmentation/possible_word.rb, line 53
def back_up(cursor)
  if @current > 0
    @current -= 1
    cursor.position = @offset + @lengths[@current]
    return true
  end

  false
end
candidates(cursor, dictionary, end_pos) click to toggle source

fill the list of candidates if needed, select the longest, and return the number found

# File lib/twitter_cldr/segmentation/possible_word.rb, line 20
def candidates(cursor, dictionary, end_pos)
  start = cursor.position

  if start != @offset
    @offset = start
    @count, _, @lengths, @prefix = dictionary.matches(
      cursor, end_pos - start, POSSIBLE_WORD_LIST_MAX
    )

    # dictionary leaves text after longest prefix, not longest word, so back up.
    if @count <= 0
      cursor.position = start
    end
  end

  if @count > 0
    cursor.position = start + @lengths[@count - 1]
  end

  @current = @count - 1
  @mark = @current

  return @count
end
longest_prefix() click to toggle source

return the longest prefix this candidate location shares with a dictionary word

# File lib/twitter_cldr/segmentation/possible_word.rb, line 64
def longest_prefix
  @prefix
end
mark_current() click to toggle source

mark the current candidate as the one we like

# File lib/twitter_cldr/segmentation/possible_word.rb, line 69
def mark_current
  @mark = @current
end