module JavaProperties::Encoding::Unicode

Module to encode and decode unicode chars This code is highly influced by Florian Frank's JSON gem @see github.com/flori/json/

Constants

EMPTY_8BIT_STRING

@private

MAP

@private

Public Class Methods

decode!(text) click to toggle source

Decodes all unicode chars from escape sequences in place @param text [String] @return [String] The encoded text for chaining

# File lib/java-properties/encoding/unicode.rb, line 46
def self.decode!(text)
  string = text.dup
  string = string.gsub(%r((?:\\[uU](?:[A-Fa-f\d]{4}))+)) do |c|
    c.downcase!
    bytes = EMPTY_8BIT_STRING.dup
    i = 0
    while c[6 * i] == ?\\ && c[6 * i + 1] == ?u
      bytes << c[6 * i + 2, 2].to_i(16) << c[6 * i + 4, 2].to_i(16)
      i += 1
    end
    bytes.encode("utf-8", "utf-16be")
  end
  string.force_encoding(::Encoding::UTF_8)

  text.replace string
  text
end
encode!(text) click to toggle source

Decodes all unicode chars into escape sequences in place @param text [String] @return [String] The decoded text for chaining

# File lib/java-properties/encoding/unicode.rb, line 67
def self.encode!(text)
  string = text.dup
  string.force_encoding(::Encoding::ASCII_8BIT)
  string.gsub!(/["\\\x0-\x1f]/n) { |c| MAP[c] || c }
  string.gsub!(/(
    (?:
     [\xc2-\xdf][\x80-\xbf]    |
     [\xe0-\xef][\x80-\xbf]{2} |
     [\xf0-\xf4][\x80-\xbf]{3}
    )+ |
    [\x80-\xc1\xf5-\xff]       # invalid
  )/nx) { |c|
    c.size == 1 and raise "Invalid utf8 byte: '#{c}'"
    s = c.encode("utf-16be", "utf-8").unpack('H*')[0]
    s.force_encoding(::Encoding::ASCII_8BIT)
    s.gsub!(/.{4}/n, '\\\\u\&')
    s.force_encoding(::Encoding::UTF_8)
  }
  string.force_encoding(::Encoding::UTF_8)
  text.replace string
  text
end

Private Class Methods

hex(codepoint) click to toggle source
# File lib/java-properties/encoding/unicode.rb, line 96
def self.hex(codepoint)
  hex  = codepoint.to_s(16)
  size = [4, hex.size].max
  target_size = size.even? ? size : size+1
  hex.rjust(target_size, '0')
end
unicode(code) click to toggle source
# File lib/java-properties/encoding/unicode.rb, line 92
def self.unicode(code)
  code.chr(::Encoding::UTF_8)
end