module Text::PorterStemming

Constants

C
CC
MEQ1
MGR0
MGR1
STEP_2_LIST
STEP_3_LIST
SUFFIX_1_REGEXP
SUFFIX_2_REGEXP
V
VOWEL_IN_STEM
VV

Public Class Methods

stem(word) click to toggle source
# File lib/text/porter_stemming.rb, line 82
def self.stem(word)

  # make a copy of the given object and convert it to a string.
  word = word.dup.to_str

  return word if word.length < 3

  # now map initial y to Y so that the patterns never treat it as vowel
  word[0] = 'Y' if word[0] == ?y

  # Step 1a
  if word =~ /(ss|i)es$/
    word = $` + $1
  elsif word =~ /([^s])s$/
    word = $` + $1
  end

  # Step 1b
  if word =~ /eed$/
    word.chop! if $` =~ MGR0
  elsif word =~ /(ed|ing)$/
    stem = $`
    if stem =~ VOWEL_IN_STEM
      word = stem
      case word
        when /(at|bl|iz)$/             then word << "e"
        when /([^aeiouylsz])\1$/       then word.chop!
        when /^#{CC}#{V}[^aeiouwxy]$/o then word << "e"
      end
    end
  end

  if word =~ /y$/
    stem = $`
    word = stem + "i" if stem =~ VOWEL_IN_STEM
  end

  # Step 2
  if word =~ SUFFIX_1_REGEXP
    stem = $`
    suffix = $1
    # print "stem= " + stem + "\n" + "suffix=" + suffix + "\n"
    if stem =~ MGR0
      word = stem + STEP_2_LIST[suffix]
    end
  end

  # Step 3
  if word =~ /(icate|ative|alize|iciti|ical|ful|ness)$/
    stem = $`
    suffix = $1
    if stem =~ MGR0
      word = stem + STEP_3_LIST[suffix]
    end
  end

  # Step 4
  if word =~ SUFFIX_2_REGEXP
    stem = $`
    if stem =~ MGR1
      word = stem
    end
  elsif word =~ /(s|t)(ion)$/
    stem = $` + $1
    if stem =~ MGR1
      word = stem
    end
  end

  #  Step 5
  if word =~ /e$/
    stem = $`
    if (stem =~ MGR1) ||
        (stem =~ MEQ1 && stem !~ /^#{CC}#{V}[^aeiouwxy]$/o)
      word = stem
    end
  end

  if word =~ /ll$/ && word =~ MGR1
    word.chop!
  end

  # and turn initial Y back to y
  word[0] = 'y' if word[0] == ?Y

  word
end