class AnyStyle::Feature::Words

Constants

TITLE_WORDS

Attributes

dictionary[R]

Public Class Methods

new(dictionary:, **opts) click to toggle source
Calls superclass method AnyStyle::Feature::new
   # File lib/anystyle/feature/words.rb
30 def initialize(dictionary:, **opts)
31   super(**opts)
32   @dictionary = dictionary
33 end

Public Instance Methods

classify(word) click to toggle source
   # File lib/anystyle/feature/words.rb
64 def classify(word)
65   case word
66   when /^(\d+|[vx]?iii?|i?[vx]|)$/i
67     :number
68   when /\d/
69     :numeric
70   when nil
71     :none
72   else
73     :alpha
74   end
75 end
observe(token, **opts) click to toggle source
   # File lib/anystyle/feature/words.rb
35 def observe(token, **opts)
36   words = token.scan(/\S+/).map { |word| canonize word }.reject(&:empty?)
37   spacers = token.scan(/\S\s\s+\S/)
38   numbers = token.scan(/\d+(\.\d+)?/)
39   title = words.count { |word| TITLE_WORDS.include?(word) }
40   counts = dictionary.tag_counts(words)
41 
42   if words.length > 0
43     len = words.map(&:length).sort
44     avg = len.reduce(0, :+) / len.length
45     med = len.length.even? ?
46       len[(len.length - 1) / 2, 2].reduce(0, :+) / 2 :
47       len[len.length / 2]
48   else
49     avg, med = 0, 0
50   end
51 
52   [
53     words.length,
54     avg,
55     med,
56     spacers.length,
57     classify(words[0]),
58     numbers.length,
59     ratio(title, words.length),
60     *counts.map { |cnt| ratio(cnt, words.length) }
61   ]
62 end