class Clausewitz::Spelling::Checker

Constants

DEFAULT_SUGGESTION_COUNT

Public Class Methods

new(opts = {}) click to toggle source
# File lib/clausewitz/spelling/checker.rb, line 19
def initialize(opts = {})
  @custom_dict_root = opts[:custom_dict_root]
  @custom_dict_root = Pathname.new(@custom_dict_root) if @custom_dict_root
  @custom_dicts     = opts[:custom_dicts]     || []
  @dialect_map      = opts[:dialect_map]      || {}
  @suggestion_count = opts[:suggestion_count] || DEFAULT_SUGGESTION_COUNT
  @verbose          = opts[:verbose]
  @commit_range     = opts[:commit_range]

  @check_cache = {}
  @loaded_dicts = {}
end

Public Instance Methods

check_file(filepath) click to toggle source
# File lib/clausewitz/spelling/checker.rb, line 52
def check_file(filepath)
  $stderr.puts "Checking file '#{filepath}'..." if @verbose
  results = []
  begin
    filepath = Pathname.new(filepath)
    validate_filepath!(filepath)
  rescue => e
    return InvalidFilepathResult.new(filepath, e)
  end

  $stderr.puts "Skipping directory '#{filepath}'..." if filepath.directory?

  begin
    contents = Clausewitz::Localisation.parse_file(filepath)
  rescue Clausewitz::Localisation::UnparseableFileError => e
    return UnparseableFileResult.new(filepath, e.errors)
  rescue => e
    return UnparseableFileResult.new(filepath, e)
  end

  changed_keys = Set.new
  if @commit_range
    diff = `git diff -U0 '#{@commit_range}' '#{filepath}' 2>/dev/null`
    diff = diff.force_encoding('UTF-8')
    changed_lines = diff.lines.select { |line| line =~ /^\+ / }
    changed_lines.each do |line|
      match = /\+  ([\w\d.'_-]+):([0-9]+)? \"/.match(line)
      next unless match
      if match[2]
        changed_keys.add(match[1] + ':' + match[2])
      else
        changed_keys.add(match[1])
      end
    end
  end

  checks = contents.map do |lang_name, entries|
    if @commit_range
      ignore = entries&.keys&.select do |key|
        !changed_keys.include?(key)
      end
    end
    ignore ||= []
    lc = language_config(lang_name)
    begin
      load_dictionary!(lc) unless @loaded_dicts.keys.include?(lang_name)
    rescue ArgumentError => e
      if e.message =~ /unable to find the dictionary/
        MissingLangResult.new(lc.clausewitz_name)
      end
    else
      check_entries(entries, lc, ignore)
    end
  end
  FileResults.new(filepath, checks)
end
load_dictionary!(config) click to toggle source
# File lib/clausewitz/spelling/checker.rb, line 32
def load_dictionary!(config)
  if @dialect_map.key?(config.name)
    config.select_dialect(@dialect_map[config.name])
  end

  dict = FFI::Hunspell.dict(config.full_name)

  @custom_dicts.each do |custom_dict|
    path = @custom_dict_root.join("#{config.full_name}_#{custom_dict}")
    path = Pathname.new("#{path}.dic")
    if path.exist?
      dict.add_dic(path.to_s)
    else
      $stderr.puts("Could not load dictionary '#{path}', skipping...")
    end
  end

  @loaded_dicts[config.name] = dict
end

Private Instance Methods

check_entries(entries, lc, ignore = []) click to toggle source
# File lib/clausewitz/spelling/checker.rb, line 111
def check_entries(entries, lc, ignore = [])
  spellcheck_ignore = entries&.delete('spellcheck_ignore')
  ignored_keys = spellcheck_ignore ? spellcheck_ignore.split(',') : []
  ignored_keys << 'spellcheck_ignore'
  ignored_keys.concat(ignore)
  if ignored_keys.include?('all')
    return IgnoredLangResult.new(lc.clausewitz_name)
  end
  return LangResults.new(lc.clausewitz_name, []) unless entries
  checks = entries.map do |key, entry|
    if ignored_keys.include?(key)
      IgnoredEntryResult.new(key)
    else
      check_entry(key, entry, lc)
    end
  end
  LangResults.new(lc.clausewitz_name, checks)
end
check_entry(key, entry, lc) click to toggle source
# File lib/clausewitz/spelling/checker.rb, line 130
def check_entry(key, entry, lc)
  return NullEntryResult.new(key) unless entry

  # We don't want to pay attention to scripted localisation, so we'll strip
  # it out before we start.
  # TODO: Look into supporting escaped square brackets as part of the
  #       string.
  entry.gsub!(/\[.+\]/, '')
  entry.gsub!(/\$([A-Z]|\||\d|=)+\$/, '')

  # Remove other localisation bits we don't care about.
  entry.gsub!(/§(%|\*|=|\d|[A-Za-z]|\+|-|!)/, '')
  entry.gsub!(/(£|\$)\w+(\|.+\$)?/, '')

  ## We should also remove punctuation that is never part of words, like
  ## exclamation points, commas, semi-colons, and question marks.
  ## We should be using proper apostrophes for possessives in our loc.
  #entry.gsub!(/(!|;|\?|"|“|”|…|:|\(|\))/, '')

  ## If a word has one full stop at the end with no other full stops
  ## elsewhere in the word, it's probably an acronym or initialism like
  ## U.S.A. and so we should avoid stripping it. Otherwise, it's probably
  ## the end of a sentence and can be ignored.
  #words = entry.split(/\s|—/)
  #words.map! do |word|
  #  word.sub!(/^'/, '')
  #  word.sub!(/'?,?'?$/, '')

  #  if word.end_with?('...')
  #    word.sub(/\.\.\.$/, '')
  #  elsif word =~ /[[:alpha:]]\.$/ && word.chars.count('.') == 1
  #    word.sub(/\.$/, '')
  #  elsif word =~ /\d\.$/ && word.chars.count('.') <= 2
  #    word.sub(/\.$/, '')
  #  else
  #    word
  #  end
  #end.join(" ")

  opts = {
    language: lc.base.to_sym,
    punctuation: :none,
    downcase: false
  }
  entry.gsub!('\n', ' ')
  entry.gsub!('\"', '"')
  words = PragmaticTokenizer::Tokenizer.new(opts).tokenize(entry)
  words = words.map { |word| word.split('—') }.flatten(1)
  words.map! do |word|
    if word =~ /[[:alpha:]]\.$/ && word.chars.count('.') == 1
      word.sub(/\.$/, '')
    elsif word =~ /:$/
      word.sub(/:$/, '')
    else
      word
    end
  end

  checks = words.map do |word|
    check = check_word(word, lc)
    unless @check_cache.key?([[word, lc]])
      @check_cache[[word, lc]] = check
    end
    check
  end.compact
  EntryResults.new(key, checks)
end
check_word(word, lc) click to toggle source
# File lib/clausewitz/spelling/checker.rb, line 198
def check_word(word, lc)
  return @check_cache[[word, lc]] if @check_cache.key?([word, lc])

  return if is_number?(word)
  return if is_plural_number?(word)
  return if is_ordinal?(word)
  return if is_percentage?(word)
  return if is_initial?(word)
  return if is_psalm?(word)
  return if is_tag?(word)

  lang_dict = @loaded_dicts[lc.name]
  if !lang_dict.check?(word)
    suggestions = lang_dict.suggest(word).take(@suggestion_count)
    MisspelledWordResult.new(word, suggestions)
  end
end
is_icon?(word) click to toggle source
# File lib/clausewitz/spelling/checker.rb, line 228
def is_icon?(word)
  word =~ /^£\w+/
end
is_initial?(word) click to toggle source
# File lib/clausewitz/spelling/checker.rb, line 224
def is_initial?(word)
  word =~ /^[A-Z]\.$/
end
is_number?(word) click to toggle source
# File lib/clausewitz/spelling/checker.rb, line 232
def is_number?(word)
  Float(word) != nil rescue false
end
is_ordinal?(word) click to toggle source
# File lib/clausewitz/spelling/checker.rb, line 236
def is_ordinal?(word)
  word =~ /[0-9]+(th|st|nd|rd)/
end
is_percentage?(word) click to toggle source
# File lib/clausewitz/spelling/checker.rb, line 240
def is_percentage?(word)
  word =~ /(-|\+)?[0-9]+(\.[0-9]+)?%/ ||
    word =~ /%(-|\+)?[0-9]+(\.[0-9]+)?/
end
is_plural_number?(word) click to toggle source
# File lib/clausewitz/spelling/checker.rb, line 216
def is_plural_number?(word)
  word =~ /\d+'s/
end
is_psalm?(word) click to toggle source
# File lib/clausewitz/spelling/checker.rb, line 220
def is_psalm?(word)
  word =~ /^\d+:\d+$/
end
is_tag?(word) click to toggle source
# File lib/clausewitz/spelling/checker.rb, line 245
def is_tag?(word)
  word =~ /@\w\w\w/
end
language_config(language_name) click to toggle source
# File lib/clausewitz/spelling/checker.rb, line 249
def language_config(language_name)
  language_name = "l_#{language_name}" if language_name !~ /^l_/
  lang_config = Localisation::LANG_MAP.find do |config_key, _|
    language_name == config_key
  end
  fail("Unknown language '#{language_name}'!") unless lang_config
  lang_config.last
end
validate_filepath!(filepath) click to toggle source

Make sure a file to be checked is actually present and readable.

# File lib/clausewitz/spelling/checker.rb, line 259
def validate_filepath!(filepath)
  puts ("No such file '#{filepath}'!") unless filepath.exist?
  puts ("Cannot read '#{filepath}'!")  unless filepath.readable?
end