class Clausewitz::Spelling::Checker
Constants
- DEFAULT_SUGGESTION_COUNT
Public Class Methods
new(opts = {})
click to toggle source
# File lib/clausewitz/spelling/checker.rb, line 19 def initialize(opts = {}) @custom_dict_root = opts[:custom_dict_root] @custom_dict_root = Pathname.new(@custom_dict_root) if @custom_dict_root @custom_dicts = opts[:custom_dicts] || [] @dialect_map = opts[:dialect_map] || {} @suggestion_count = opts[:suggestion_count] || DEFAULT_SUGGESTION_COUNT @verbose = opts[:verbose] @commit_range = opts[:commit_range] @check_cache = {} @loaded_dicts = {} end
Public Instance Methods
check_file(filepath)
click to toggle source
# File lib/clausewitz/spelling/checker.rb, line 52 def check_file(filepath) $stderr.puts "Checking file '#{filepath}'..." if @verbose results = [] begin filepath = Pathname.new(filepath) validate_filepath!(filepath) rescue => e return InvalidFilepathResult.new(filepath, e) end $stderr.puts "Skipping directory '#{filepath}'..." if filepath.directory? begin contents = Clausewitz::Localisation.parse_file(filepath) rescue Clausewitz::Localisation::UnparseableFileError => e return UnparseableFileResult.new(filepath, e.errors) rescue => e return UnparseableFileResult.new(filepath, e) end changed_keys = Set.new if @commit_range diff = `git diff -U0 '#{@commit_range}' '#{filepath}' 2>/dev/null` diff = diff.force_encoding('UTF-8') changed_lines = diff.lines.select { |line| line =~ /^\+ / } changed_lines.each do |line| match = /\+ ([\w\d.'_-]+):([0-9]+)? \"/.match(line) next unless match if match[2] changed_keys.add(match[1] + ':' + match[2]) else changed_keys.add(match[1]) end end end checks = contents.map do |lang_name, entries| if @commit_range ignore = entries&.keys&.select do |key| !changed_keys.include?(key) end end ignore ||= [] lc = language_config(lang_name) begin load_dictionary!(lc) unless @loaded_dicts.keys.include?(lang_name) rescue ArgumentError => e if e.message =~ /unable to find the dictionary/ MissingLangResult.new(lc.clausewitz_name) end else check_entries(entries, lc, ignore) end end FileResults.new(filepath, checks) end
load_dictionary!(config)
click to toggle source
# File lib/clausewitz/spelling/checker.rb, line 32 def load_dictionary!(config) if @dialect_map.key?(config.name) config.select_dialect(@dialect_map[config.name]) end dict = FFI::Hunspell.dict(config.full_name) @custom_dicts.each do |custom_dict| path = @custom_dict_root.join("#{config.full_name}_#{custom_dict}") path = Pathname.new("#{path}.dic") if path.exist? dict.add_dic(path.to_s) else $stderr.puts("Could not load dictionary '#{path}', skipping...") end end @loaded_dicts[config.name] = dict end
Private Instance Methods
check_entries(entries, lc, ignore = [])
click to toggle source
# File lib/clausewitz/spelling/checker.rb, line 111 def check_entries(entries, lc, ignore = []) spellcheck_ignore = entries&.delete('spellcheck_ignore') ignored_keys = spellcheck_ignore ? spellcheck_ignore.split(',') : [] ignored_keys << 'spellcheck_ignore' ignored_keys.concat(ignore) if ignored_keys.include?('all') return IgnoredLangResult.new(lc.clausewitz_name) end return LangResults.new(lc.clausewitz_name, []) unless entries checks = entries.map do |key, entry| if ignored_keys.include?(key) IgnoredEntryResult.new(key) else check_entry(key, entry, lc) end end LangResults.new(lc.clausewitz_name, checks) end
check_entry(key, entry, lc)
click to toggle source
# File lib/clausewitz/spelling/checker.rb, line 130 def check_entry(key, entry, lc) return NullEntryResult.new(key) unless entry # We don't want to pay attention to scripted localisation, so we'll strip # it out before we start. # TODO: Look into supporting escaped square brackets as part of the # string. entry.gsub!(/\[.+\]/, '') entry.gsub!(/\$([A-Z]|\||\d|=)+\$/, '') # Remove other localisation bits we don't care about. entry.gsub!(/§(%|\*|=|\d|[A-Za-z]|\+|-|!)/, '') entry.gsub!(/(£|\$)\w+(\|.+\$)?/, '') ## We should also remove punctuation that is never part of words, like ## exclamation points, commas, semi-colons, and question marks. ## We should be using proper apostrophes for possessives in our loc. #entry.gsub!(/(!|;|\?|"|“|”|…|:|\(|\))/, '') ## If a word has one full stop at the end with no other full stops ## elsewhere in the word, it's probably an acronym or initialism like ## U.S.A. and so we should avoid stripping it. Otherwise, it's probably ## the end of a sentence and can be ignored. #words = entry.split(/\s|—/) #words.map! do |word| # word.sub!(/^'/, '') # word.sub!(/'?,?'?$/, '') # if word.end_with?('...') # word.sub(/\.\.\.$/, '') # elsif word =~ /[[:alpha:]]\.$/ && word.chars.count('.') == 1 # word.sub(/\.$/, '') # elsif word =~ /\d\.$/ && word.chars.count('.') <= 2 # word.sub(/\.$/, '') # else # word # end #end.join(" ") opts = { language: lc.base.to_sym, punctuation: :none, downcase: false } entry.gsub!('\n', ' ') entry.gsub!('\"', '"') words = PragmaticTokenizer::Tokenizer.new(opts).tokenize(entry) words = words.map { |word| word.split('—') }.flatten(1) words.map! do |word| if word =~ /[[:alpha:]]\.$/ && word.chars.count('.') == 1 word.sub(/\.$/, '') elsif word =~ /:$/ word.sub(/:$/, '') else word end end checks = words.map do |word| check = check_word(word, lc) unless @check_cache.key?([[word, lc]]) @check_cache[[word, lc]] = check end check end.compact EntryResults.new(key, checks) end
check_word(word, lc)
click to toggle source
# File lib/clausewitz/spelling/checker.rb, line 198 def check_word(word, lc) return @check_cache[[word, lc]] if @check_cache.key?([word, lc]) return if is_number?(word) return if is_plural_number?(word) return if is_ordinal?(word) return if is_percentage?(word) return if is_initial?(word) return if is_psalm?(word) return if is_tag?(word) lang_dict = @loaded_dicts[lc.name] if !lang_dict.check?(word) suggestions = lang_dict.suggest(word).take(@suggestion_count) MisspelledWordResult.new(word, suggestions) end end
is_icon?(word)
click to toggle source
# File lib/clausewitz/spelling/checker.rb, line 228 def is_icon?(word) word =~ /^£\w+/ end
is_initial?(word)
click to toggle source
# File lib/clausewitz/spelling/checker.rb, line 224 def is_initial?(word) word =~ /^[A-Z]\.$/ end
is_number?(word)
click to toggle source
# File lib/clausewitz/spelling/checker.rb, line 232 def is_number?(word) Float(word) != nil rescue false end
is_ordinal?(word)
click to toggle source
# File lib/clausewitz/spelling/checker.rb, line 236 def is_ordinal?(word) word =~ /[0-9]+(th|st|nd|rd)/ end
is_percentage?(word)
click to toggle source
# File lib/clausewitz/spelling/checker.rb, line 240 def is_percentage?(word) word =~ /(-|\+)?[0-9]+(\.[0-9]+)?%/ || word =~ /%(-|\+)?[0-9]+(\.[0-9]+)?/ end
is_plural_number?(word)
click to toggle source
# File lib/clausewitz/spelling/checker.rb, line 216 def is_plural_number?(word) word =~ /\d+'s/ end
is_psalm?(word)
click to toggle source
# File lib/clausewitz/spelling/checker.rb, line 220 def is_psalm?(word) word =~ /^\d+:\d+$/ end
is_tag?(word)
click to toggle source
# File lib/clausewitz/spelling/checker.rb, line 245 def is_tag?(word) word =~ /@\w\w\w/ end
language_config(language_name)
click to toggle source
# File lib/clausewitz/spelling/checker.rb, line 249 def language_config(language_name) language_name = "l_#{language_name}" if language_name !~ /^l_/ lang_config = Localisation::LANG_MAP.find do |config_key, _| language_name == config_key end fail("Unknown language '#{language_name}'!") unless lang_config lang_config.last end
validate_filepath!(filepath)
click to toggle source
Make sure a file to be checked is actually present and readable.
# File lib/clausewitz/spelling/checker.rb, line 259 def validate_filepath!(filepath) puts ("No such file '#{filepath}'!") unless filepath.exist? puts ("Cannot read '#{filepath}'!") unless filepath.readable? end