class AnyStyle::Feature::Words
Constants
- TITLE_WORDS
Attributes
dictionary[R]
Public Class Methods
new(dictionary:, **opts)
click to toggle source
Calls superclass method
AnyStyle::Feature::new
# File lib/anystyle/feature/words.rb 30 def initialize(dictionary:, **opts) 31 super(**opts) 32 @dictionary = dictionary 33 end
Public Instance Methods
classify(word)
click to toggle source
# File lib/anystyle/feature/words.rb 64 def classify(word) 65 case word 66 when /^(\d+|[vx]?iii?|i?[vx]|)$/i 67 :number 68 when /\d/ 69 :numeric 70 when nil 71 :none 72 else 73 :alpha 74 end 75 end
observe(token, **opts)
click to toggle source
# File lib/anystyle/feature/words.rb 35 def observe(token, **opts) 36 words = token.scan(/\S+/).map { |word| canonize word }.reject(&:empty?) 37 spacers = token.scan(/\S\s\s+\S/) 38 numbers = token.scan(/\d+(\.\d+)?/) 39 title = words.count { |word| TITLE_WORDS.include?(word) } 40 counts = dictionary.tag_counts(words) 41 42 if words.length > 0 43 len = words.map(&:length).sort 44 avg = len.reduce(0, :+) / len.length 45 med = len.length.even? ? 46 len[(len.length - 1) / 2, 2].reduce(0, :+) / 2 : 47 len[len.length / 2] 48 else 49 avg, med = 0, 0 50 end 51 52 [ 53 words.length, 54 avg, 55 med, 56 spacers.length, 57 classify(words[0]), 58 numbers.length, 59 ratio(title, words.length), 60 *counts.map { |cnt| ratio(cnt, words.length) } 61 ] 62 end