class CharDet::Latin1Prober

Public Class Methods

new() click to toggle source
Calls superclass method CharDet::CharSetProber::new
# File lib/rchardet/latin1prober.rb, line 95
def initialize
  super
  reset()
end

Public Instance Methods

feed(aBuf) click to toggle source
# File lib/rchardet/latin1prober.rb, line 110
def feed(aBuf)
  aBuf = filter_with_english_letters(aBuf)
  aBuf.each_byte do |b|
    c = b.chr
    charClass = Latin1_CharToClass[c.bytes.first]
    freq = Latin1ClassModel[(@lastCharClass * CLASS_NUM) + charClass]
    if freq == 0
      @state = ENotMe
      break
    end
    @freqCounter[freq] += 1
    @lastCharClass = charClass
  end

  return get_state()
end
get_charset_name() click to toggle source
# File lib/rchardet/latin1prober.rb, line 106
def get_charset_name
  return "windows-1252"
end
get_confidence() click to toggle source
# File lib/rchardet/latin1prober.rb, line 127
def get_confidence
  if get_state() == ENotMe
    return 0.01
  end

  total = @freqCounter.inject{|a,b| a+b} 
  if total < 0.01
    confidence = 0.0
  else
    confidence = (@freqCounter[3] / total) - (@freqCounter[1] * 20.0 / total)
  end
  if confidence < 0.0
    confidence = 0.0
  end
  # lower the confidence of latin1 so that other more accurate detector
  # can take priority.
  confidence = confidence * 0.5
  return confidence
end
reset() click to toggle source
Calls superclass method CharDet::CharSetProber#reset
# File lib/rchardet/latin1prober.rb, line 100
def reset
  @lastCharClass = OTH
  @freqCounter = [0] * FREQ_CAT_NUM
  super
end