module Analects::Encoding
Constants
- BIG5
- GB
Public Instance Methods
from_big5(str)
click to toggle source
# File lib/analects/encoding.rb, line 17 def from_big5(str) recode(BIG5, str) end
from_gb(str)
click to toggle source
# File lib/analects/encoding.rb, line 13 def from_gb(str) recode(GB, str) end
ratings(str)
click to toggle source
Crude way to guess which encoding it is
# File lib/analects/encoding.rb, line 33 def ratings(str) all_valid_cjk(str).map do |enc| [ enc, recode(enc, str).codepoints.map do |point| Analects::Models::Zi.codepoint_ranges.map.with_index do |range, idx| next 6-idx if range.include?(point) 0 end.inject(:+) end.inject(:+) ] end.sort_by(&:last).reverse end
recode(enc, str)
click to toggle source
# File lib/analects/encoding.rb, line 9 def recode(enc, str) str.force_encoding(enc).encode('UTF-8') end
valid_cjk(str)
click to toggle source
# File lib/analects/encoding.rb, line 21 def valid_cjk(str) [GB, BIG5].map do |enc| begin recode(enc, str) enc rescue ::Encoding::UndefinedConversionError rescue ::Encoding::InvalidByteSequenceError end end.compact end