module Text::PorterStemming
Constants
- C
- CC
- MEQ1
- MGR0
- MGR1
- STEP_2_LIST
- STEP_3_LIST
- SUFFIX_1_REGEXP
- SUFFIX_2_REGEXP
- V
- VOWEL_IN_STEM
- VV
Public Class Methods
stem(word)
click to toggle source
# File lib/text/porter_stemming.rb, line 82 def self.stem(word) # make a copy of the given object and convert it to a string. word = word.dup.to_str return word if word.length < 3 # now map initial y to Y so that the patterns never treat it as vowel word[0] = 'Y' if word[0] == ?y # Step 1a if word =~ /(ss|i)es$/ word = $` + $1 elsif word =~ /([^s])s$/ word = $` + $1 end # Step 1b if word =~ /eed$/ word.chop! if $` =~ MGR0 elsif word =~ /(ed|ing)$/ stem = $` if stem =~ VOWEL_IN_STEM word = stem case word when /(at|bl|iz)$/ then word << "e" when /([^aeiouylsz])\1$/ then word.chop! when /^#{CC}#{V}[^aeiouwxy]$/o then word << "e" end end end if word =~ /y$/ stem = $` word = stem + "i" if stem =~ VOWEL_IN_STEM end # Step 2 if word =~ SUFFIX_1_REGEXP stem = $` suffix = $1 # print "stem= " + stem + "\n" + "suffix=" + suffix + "\n" if stem =~ MGR0 word = stem + STEP_2_LIST[suffix] end end # Step 3 if word =~ /(icate|ative|alize|iciti|ical|ful|ness)$/ stem = $` suffix = $1 if stem =~ MGR0 word = stem + STEP_3_LIST[suffix] end end # Step 4 if word =~ SUFFIX_2_REGEXP stem = $` if stem =~ MGR1 word = stem end elsif word =~ /(s|t)(ion)$/ stem = $` + $1 if stem =~ MGR1 word = stem end end # Step 5 if word =~ /e$/ stem = $` if (stem =~ MGR1) || (stem =~ MEQ1 && stem !~ /^#{CC}#{V}[^aeiouwxy]$/o) word = stem end end if word =~ /ll$/ && word =~ MGR1 word.chop! end # and turn initial Y back to y word[0] = 'y' if word[0] == ?Y word end