class Translatomatic::EscapedUnicode
Class to encode and decode unicode chars. This code is highly influenced by Florian Frank's JSON gem @see github.com/jnbt/java-properties @see github.com/flori/json/
Constants
- EMPTY_8BIT_STRING
@private
- ESCAPE_REGEX
@private
- MAP
@private
Public Class Methods
escape(text)
click to toggle source
Decodes all unicode chars into escape sequences @param text [String] @return [String] The decoded text
# File lib/translatomatic/escaped_unicode.rb, line 22 def escape(text) string = text.dup string.force_encoding(::Encoding::ASCII_8BIT) string.gsub!(/["\\\x0-\x1f]/n) { |c| MAP[c] || c } string.gsub!(ESCAPE_REGEX) { |c| escape_char(c) } string.force_encoding(::Encoding::UTF_8) string end
unescape(text)
click to toggle source
Decodes all unicode chars from escape sequences @param text [String] @return [String] The encoded text
# File lib/translatomatic/escaped_unicode.rb, line 11 def unescape(text) string = text.gsub(/(?:\\[uU](?:[A-Fa-f\d]{4}))+/) do |c| unescape_char(c) end string.force_encoding(::Encoding::UTF_8) string end
Private Class Methods
escape_char(c)
click to toggle source
# File lib/translatomatic/escaped_unicode.rb, line 46 def escape_char(c) (c.size == 1) && raise(t('unicode.invalid_byte', byte: c)) s = c.encode('utf-16be', 'utf-8').unpack('H*')[0] s.force_encoding(::Encoding::ASCII_8BIT) s.gsub!(/.{4}/n, '\\\\u\&') s.force_encoding(::Encoding::UTF_8) end
unescape_char(c)
click to toggle source
# File lib/translatomatic/escaped_unicode.rb, line 33 def unescape_char(c) c.downcase! bytes = EMPTY_8BIT_STRING.dup i = 0 while c[i] == '\\' && c[i + 1] == 'u' (1..2).each do |j| bytes << c[i + j * 2, 2].to_i(16) end i += 6 end bytes.encode('utf-8', 'utf-16be') end