module TingYun::Support::Serialize::EncodingNormalizer::EncodingNormalizer
Public Class Methods
normalize(str)
click to toggle source
# File lib/ting_yun/support/serialize/encoding_normalizer.rb, line 48 def self.normalize(str) encoding = str.encoding if (encoding == Encoding::UTF_8 || encoding == Encoding::ISO_8859_1) && str.valid_encoding? return str end # If the encoding is not valid, or it's ASCII-8BIT, we know conversion to # UTF-8 is likely to fail, so treat it as ISO-8859-1 (byte-preserving). normalized = str.dup if encoding == Encoding::ASCII_8BIT || !str.valid_encoding? normalized.force_encoding(Encoding::ISO_8859_1) else # Encoding is valid and non-binary, so it might be cleanly convertible # to UTF-8. Give it a try and fall back to ISO-8859-1 if it fails. begin normalized.encode!(Encoding::UTF_8) rescue normalized.force_encoding(Encoding::ISO_8859_1) end end normalized end