class AnyStyle::Feature::Line

Public Instance Methods

classify(chars) click to toggle source
   # File lib/anystyle/feature/line.rb
27 def classify(chars)
28   case chars.lstrip
29   when /\.\s*\.\s*\.\s*\.|……+/, /\p{L}\s{5,}\d+$/
30     :toc
31   when /^[\[\(]?\d+\.?[\]\)]?\s+\p{L}+/
32     :list
33   when /^(\p{Lu}\.?)\s*(\d+\.)+\s+\p{L}+/
34     :title
35   when /^(\w+\s)?(tab(le|elle|\.)|fig(ure|\.)|equation|graph|abb(ildung)?)/i
36     :cap
37   when /^\p{Pd}?\d+\p{Pd}?$/, /^[ivx]+$/i
38     :num
39   when /copyright|©|rights reserved/i
40     :copyright
41   when /https?:\/\//i
42     :http
43   else
44     :none
45   end
46 end
observe(token, page:, seq:, **opts) click to toggle source
   # File lib/anystyle/feature/line.rb
 4 def observe(token, page:, seq:, **opts)
 5   chars = display_chars(token)
 6 
 7   lttrs = count(chars, /\p{L}/)
 8   upper = count(chars, /\p{Lu}/)
 9   punct = count(chars, /[\p{Pd}:.,&\(\)"'”„’‚´«「『‘“`»」』]/)
10   white = count(chars, /\s/)
11   width = chars.length
12 
13   [
14     lttrs,
15     width,
16     ratio(upper, lttrs),
17     ratio(lttrs, chars.length),
18     ratio(white, chars.length),
19     ratio(punct, chars.length),
20     ratio(width, page.width),
21     classify(chars),
22     page_ratio(seq.line_counts[chars], seq.pages.length),
23     page_ratio(seq.nnum_counts[nnum(chars)], seq.pages.length)
24   ]
25 end
page_ratio(a, b) click to toggle source
   # File lib/anystyle/feature/line.rb
48 def page_ratio(a, b)
49   r = a.to_f / b
50   r == 1 ? '=' : r > 1 ? '+' : (r * 10).round
51 end