class HttpSpell::SpellChecker

Public Class Methods

new(personal_dictionary_path = nil, tracing: false) click to toggle source
# File lib/httpspell/spellchecker.rb, line 3
def initialize(personal_dictionary_path = nil, tracing: false)
  @personal_dictionary_arg = "-p #{personal_dictionary_path}" if personal_dictionary_path
  @tracing = tracing
end

Public Instance Methods

check(doc, lang) click to toggle source
# File lib/httpspell/spellchecker.rb, line 8
def check(doc, lang)
  commands = [
    'pandoc --from html --to plain',
    "hunspell -d #{translate(lang)} #{@personal_dictionary_arg} -i UTF-8 -l",
  ]

  if @tracing
    warn "Piping the HTML document into the following chain of commands:"
    warn commands
  end

  Open3.pipeline_rw(*commands) do |stdin, stdout, _wait_thrs|
    stdin.puts(doc)
    stdin.close
    stdout.read.split.uniq
  end
end

Private Instance Methods

translate(lang) click to toggle source

The W3C [recommends](www.w3.org/International/questions/qa-html-language-declarations) to specify language using identifiers as per [RFC 5646](tools.ietf.org/html/rfc5646) which uses dashes. Hunspell, however, uses underscores. This method translates RFC-style identifiers to hunspell-style.

# File lib/httpspell/spellchecker.rb, line 32
def translate(lang)
  lang.tr('-', '_')
end