module DR::Encoding

Public Instance Methods

fix_utf8(s=nil) click to toggle source

if a mostly utf8 has some mixed in latin1 characters, replace the invalid characters

# File lib/dr/base/encoding.rb, line 8
def fix_utf8(s=nil)
  s=self if s.nil? #if we are included
  if String.method_defined?(:scrub)
    #Ruby 2.1
    #cf http://ruby-doc.org/core-2.1.0/String.html#method-i-scrub
    return s.scrub {|bytes| '<'+bytes.unpack('H*')[0]+'>' }
  else
    return DR::Encoding.to_utf8(s)
  end
end
to_utf8(s=nil,from:nil) click to toggle source
# File lib/dr/base/encoding.rb, line 19
def to_utf8(s=nil,from:nil)
  s=self if s.nil? #if we are included
  from=s.encoding if from.nil?
  if String.method_defined?(:encode)
    #Ruby 1.9
    return s.encode('UTF-8',from, :invalid => :replace, :undef => :replace,
             :fallback => Proc.new { |bytes| '<'+bytes.unpack('H*')[0]+'>' }
           )
  else
    #Ruby 1.8
    ic = Iconv.new(from, 'UTF-8//IGNORE')
    return ic.iconv(s)
  end
end
to_utf8!(s=nil,from:nil) click to toggle source

assume ruby>=1.9 here

# File lib/dr/base/encoding.rb, line 35
def to_utf8!(s=nil,from:nil)
  s=self if s.nil? #if we are included
  from=s.encoding if from.nil?
  return s.encode!('UTF-8',from, :invalid => :replace, :undef => :replace,
           :fallback => Proc.new { |bytes| '<'+bytes.unpack('H*')[0]+'>' }
         )
end