class Analyzers::Utils::HumanLanguageDetector

NOTE the implementation decisions are based on the result of benchmarks/language_detector.rb

Public Class Methods

new() click to toggle source
# File lib/crypto-toolbox/analyzers/utils/human_language_detector.rb, line 6
def initialize
  @spell_checker = ::Analyzers::Utils::SpellChecker.new
  @ascii_checker = ::Analyzers::Utils::AsciiLanguageDetector.new
end

Public Instance Methods

human_language?(buffer) click to toggle source
# File lib/crypto-toolbox/analyzers/utils/human_language_detector.rb, line 22
def human_language?(buffer)
  ascii_valid?(buffer) && spell_valid?(buffer)
end
human_language_entries(buffers,spellcheck: true ) click to toggle source

NOTE: we dont use the human_language? method to be faster at processing and more idiomatic

# File lib/crypto-toolbox/analyzers/utils/human_language_detector.rb, line 13
def human_language_entries(buffers,spellcheck: true )
  filtered = buffers.select{|b| ascii_valid?(b) }
  if spellcheck
    buffers.select{|b| spell_valid?(b) }
  else
    filtered
  end
end

Private Instance Methods

ascii_valid?(buf) click to toggle source
# File lib/crypto-toolbox/analyzers/utils/human_language_detector.rb, line 28
def ascii_valid?(buf)
  @ascii_checker.ascii_lingual?(buf)
end
spell_valid?(buf) click to toggle source
# File lib/crypto-toolbox/analyzers/utils/human_language_detector.rb, line 32
def spell_valid?(buf)
  @spell_checker.human_language?(buf.str)
end