module MDUrl::Decode

Constants

COMPONENT_CHARS
DEFTAULT_CHARS

Public Class Methods

decode(string, exclude = nil) click to toggle source

Decode percent-encoded string.

# File lib/mdurl-rb/decode.rb, line 33
def self.decode(string, exclude = nil)
  if !exclude.is_a? String
    exclude = DEFTAULT_CHARS
  end

  cache = getDecodeCache(exclude)

  return string.gsub(/(%[a-f0-9]{2})+/i) do |seq|
    result = ''

    i = 0
    l = seq.length
    while i < l
      b1 = seq.slice((i + 1)...(i + 3)).to_i(16)

      if (b1 < 0x80)
        result += cache[b1]
        i += 3
        next
      end

      if ((b1 & 0xE0) == 0xC0 && (i + 3 < l))
        # 110xxxxx 10xxxxxx
        b2 = seq.slice((i + 4)...(i + 6)).to_i(16)

        if ((b2 & 0xC0) == 0x80)
          char = ((b1 << 6) & 0x7C0) | (b2 & 0x3F)

          if (char < 0x80)
            result += "\ufffd\ufffd"
          else
            result += char.chr(Encoding::UTF_8)
          end

          i += 6
          next
        end
      end

      if ((b1 & 0xF0) == 0xE0 && (i + 6 < l))
        # 1110xxxx 10xxxxxx 10xxxxxx
        b2 = seq.slice((i + 4)...(i + 6)).to_i(16)
        b3 = seq.slice((i + 7)...(i + 9)).to_i(16)

        if ((b2 & 0xC0) == 0x80 && (b3 & 0xC0) == 0x80)
          char = ((b1 << 12) & 0xF000) | ((b2 << 6) & 0xFC0) | (b3 & 0x3F)

          if (char < 0x800 || (char >= 0xD800 && char <= 0xDFFF))
            result += "\ufffd\ufffd\ufffd"
          else
            result += char.chr(Encoding::UTF_8)
          end

          i += 9
          next
        end
      end

      if ((b1 & 0xF8) == 0xF0 && (i + 9 < l))
        # 111110xx 10xxxxxx 10xxxxxx 10xxxxxx
        b2 = seq.slice((i + 4)...(i + 6)).to_i(16)
        b3 = seq.slice((i + 7)...(i + 9)).to_i(16)
        b4 = seq.slice((i + 10)...(i + 12)).to_i(16)

        if ((b2 & 0xC0) == 0x80 && (b3 & 0xC0) == 0x80 && (b4 & 0xC0) == 0x80)
          char = ((b1 << 18) & 0x1C0000) | ((b2 << 12) & 0x3F000) | ((b3 << 6) & 0xFC0) | (b4 & 0x3F)

          if (char < 0x10000 || char > 0x10FFFF)
            result += "\ufffd\ufffd\ufffd\ufffd"
          else
            # TODO don't know how to handle surrogate pairs properly.
            char   -= 0x10000
            result += [0xD800 + (char >> 10), 0xDC00 + (char & 0x3FF)].map{|c| c.chr(Encoding::UTF_8)}.join

            # high = ((char - 0x10000) / 0x400).floor + 0xD800
            # low  = ((char - 0x10000) % 0x400) + 0xDC00
            # result += '\u' + [high, low].map { |x| x.to_s(16) }.join('\u').downcase
          end

          i += 12
          next
        end
      end

      result += "\ufffd"
      i += 3
    end

    result
  end
end
getDecodeCache(exclude) click to toggle source
# File lib/mdurl-rb/decode.rb, line 11
def self.getDecodeCache(exclude)
  cache = @@decodeCache[exclude]
  return cache if (cache)

  cache = @@decodeCache[exclude] = []

  (0...128).each do |i|
    ch = i.chr
    cache.push(ch)
  end

  (0...exclude.length).each do |i|
    ch = exclude[i].ord
    cache[ch] = '%' + ('0' + ch.to_s(16).upcase).slice(-2, 2)
  end

  return cache
end