module MDUrl::Decode
Constants
- COMPONENT_CHARS
- DEFTAULT_CHARS
Public Class Methods
decode(string, exclude = nil)
click to toggle source
Decode
percent-encoded string.
# File lib/mdurl-rb/decode.rb, line 33 def self.decode(string, exclude = nil) if !exclude.is_a? String exclude = DEFTAULT_CHARS end cache = getDecodeCache(exclude) return string.gsub(/(%[a-f0-9]{2})+/i) do |seq| result = '' i = 0 l = seq.length while i < l b1 = seq.slice((i + 1)...(i + 3)).to_i(16) if (b1 < 0x80) result += cache[b1] i += 3 next end if ((b1 & 0xE0) == 0xC0 && (i + 3 < l)) # 110xxxxx 10xxxxxx b2 = seq.slice((i + 4)...(i + 6)).to_i(16) if ((b2 & 0xC0) == 0x80) char = ((b1 << 6) & 0x7C0) | (b2 & 0x3F) if (char < 0x80) result += "\ufffd\ufffd" else result += char.chr(Encoding::UTF_8) end i += 6 next end end if ((b1 & 0xF0) == 0xE0 && (i + 6 < l)) # 1110xxxx 10xxxxxx 10xxxxxx b2 = seq.slice((i + 4)...(i + 6)).to_i(16) b3 = seq.slice((i + 7)...(i + 9)).to_i(16) if ((b2 & 0xC0) == 0x80 && (b3 & 0xC0) == 0x80) char = ((b1 << 12) & 0xF000) | ((b2 << 6) & 0xFC0) | (b3 & 0x3F) if (char < 0x800 || (char >= 0xD800 && char <= 0xDFFF)) result += "\ufffd\ufffd\ufffd" else result += char.chr(Encoding::UTF_8) end i += 9 next end end if ((b1 & 0xF8) == 0xF0 && (i + 9 < l)) # 111110xx 10xxxxxx 10xxxxxx 10xxxxxx b2 = seq.slice((i + 4)...(i + 6)).to_i(16) b3 = seq.slice((i + 7)...(i + 9)).to_i(16) b4 = seq.slice((i + 10)...(i + 12)).to_i(16) if ((b2 & 0xC0) == 0x80 && (b3 & 0xC0) == 0x80 && (b4 & 0xC0) == 0x80) char = ((b1 << 18) & 0x1C0000) | ((b2 << 12) & 0x3F000) | ((b3 << 6) & 0xFC0) | (b4 & 0x3F) if (char < 0x10000 || char > 0x10FFFF) result += "\ufffd\ufffd\ufffd\ufffd" else # TODO don't know how to handle surrogate pairs properly. char -= 0x10000 result += [0xD800 + (char >> 10), 0xDC00 + (char & 0x3FF)].map{|c| c.chr(Encoding::UTF_8)}.join # high = ((char - 0x10000) / 0x400).floor + 0xD800 # low = ((char - 0x10000) % 0x400) + 0xDC00 # result += '\u' + [high, low].map { |x| x.to_s(16) }.join('\u').downcase end i += 12 next end end result += "\ufffd" i += 3 end result end end
getDecodeCache(exclude)
click to toggle source
# File lib/mdurl-rb/decode.rb, line 11 def self.getDecodeCache(exclude) cache = @@decodeCache[exclude] return cache if (cache) cache = @@decodeCache[exclude] = [] (0...128).each do |i| ch = i.chr cache.push(ch) end (0...exclude.length).each do |i| ch = exclude[i].ord cache[ch] = '%' + ('0' + ch.to_s(16).upcase).slice(-2, 2) end return cache end