class CharDet::Latin1Prober
Public Class Methods
new()
click to toggle source
Calls superclass method
CharDet::CharSetProber::new
# File lib/rchardet/latin1prober.rb, line 95 def initialize super reset() end
Public Instance Methods
feed(aBuf)
click to toggle source
# File lib/rchardet/latin1prober.rb, line 110 def feed(aBuf) aBuf = filter_with_english_letters(aBuf) aBuf.each_byte do |b| c = b.chr charClass = Latin1_CharToClass[c.bytes.first] freq = Latin1ClassModel[(@lastCharClass * CLASS_NUM) + charClass] if freq == 0 @state = ENotMe break end @freqCounter[freq] += 1 @lastCharClass = charClass end return get_state() end
get_charset_name()
click to toggle source
# File lib/rchardet/latin1prober.rb, line 106 def get_charset_name return "windows-1252" end
get_confidence()
click to toggle source
# File lib/rchardet/latin1prober.rb, line 127 def get_confidence if get_state() == ENotMe return 0.01 end total = @freqCounter.inject{|a,b| a+b} if total < 0.01 confidence = 0.0 else confidence = (@freqCounter[3] / total) - (@freqCounter[1] * 20.0 / total) end if confidence < 0.0 confidence = 0.0 end # lower the confidence of latin1 so that other more accurate detector # can take priority. confidence = confidence * 0.5 return confidence end
reset()
click to toggle source
Calls superclass method
CharDet::CharSetProber#reset
# File lib/rchardet/latin1prober.rb, line 100 def reset @lastCharClass = OTH @freqCounter = [0] * FREQ_CAT_NUM super end