class Translatomatic::EscapedUnicode

Class to encode and decode unicode chars. This code is highly influenced by Florian Frank's JSON gem @see github.com/jnbt/java-properties @see github.com/flori/json/

Constants

EMPTY_8BIT_STRING

@private

ESCAPE_REGEX

@private

MAP

@private

Public Class Methods

escape(text) click to toggle source

Decodes all unicode chars into escape sequences @param text [String] @return [String] The decoded text

# File lib/translatomatic/escaped_unicode.rb, line 22
def escape(text)
  string = text.dup
  string.force_encoding(::Encoding::ASCII_8BIT)
  string.gsub!(/["\\\x0-\x1f]/n) { |c| MAP[c] || c }
  string.gsub!(ESCAPE_REGEX) { |c| escape_char(c) }
  string.force_encoding(::Encoding::UTF_8)
  string
end
unescape(text) click to toggle source

Decodes all unicode chars from escape sequences @param text [String] @return [String] The encoded text

# File lib/translatomatic/escaped_unicode.rb, line 11
def unescape(text)
  string = text.gsub(/(?:\\[uU](?:[A-Fa-f\d]{4}))+/) do |c|
    unescape_char(c)
  end
  string.force_encoding(::Encoding::UTF_8)
  string
end

Private Class Methods

escape_char(c) click to toggle source
# File lib/translatomatic/escaped_unicode.rb, line 46
def escape_char(c)
  (c.size == 1) && raise(t('unicode.invalid_byte', byte: c))
  s = c.encode('utf-16be', 'utf-8').unpack('H*')[0]
  s.force_encoding(::Encoding::ASCII_8BIT)
  s.gsub!(/.{4}/n, '\\\\u\&')
  s.force_encoding(::Encoding::UTF_8)
end
unescape_char(c) click to toggle source
# File lib/translatomatic/escaped_unicode.rb, line 33
def unescape_char(c)
  c.downcase!
  bytes = EMPTY_8BIT_STRING.dup
  i = 0
  while c[i] == '\\' && c[i + 1] == 'u'
    (1..2).each do |j|
      bytes << c[i + j * 2, 2].to_i(16)
    end
    i += 6
  end
  bytes.encode('utf-8', 'utf-16be')
end