class UrlNormalizer

Constants

VERSION

Public Class Methods

new(uri) click to toggle source
# File lib/url_normalizer.rb, line 20
def initialize uri
  @uri = uri
end
normalize(url) click to toggle source
# File lib/url_normalizer.rb, line 12
def self.normalize url
  url.sub!(/#(?!\!)[^#]*$/,'')

  uri = Addressable::URI.parse(url)

  @@normalizer_for[uri.host].new(uri).normalize
end
normalize_for(domain) click to toggle source
# File lib/url_normalizer.rb, line 8
def self.normalize_for domain
  @@normalizer_for[domain] = self
end

Public Instance Methods

build_query(params) click to toggle source
# File lib/url_normalizer.rb, line 64
def build_query(params)
  params.map do |name,values|
    escaped_name = encode_component name
    if values.length > 0
      values.map do |value|
        escaped_value = encode_component value
        "#{escaped_name}=#{escaped_value}"
      end
    else
      ["#{escaped_name}"]
    end
  end.flatten.join("&")
end
clean_query(query) click to toggle source
# File lib/url_normalizer.rb, line 42
def clean_query query
  return unless query

  uri_params = CGI.parse(query)

  if forbidden_uri_params
    forbidden_params = forbidden_uri_params.map(&:to_s)
    uri_params.reject! {|k,v| forbidden_params.include? k}
  end

  if whitelisted_uri_params
    allowed_params = whitelisted_uri_params.map(&:to_s)
    uri_params.select! {|k,v| allowed_params.include? k}
  end

  build_query(uri_params)
end
encode_component(component) click to toggle source
# File lib/url_normalizer.rb, line 60
def encode_component component
  Addressable::URI.encode_component component
end
forbidden_uri_params() click to toggle source
# File lib/url_normalizer.rb, line 34
def forbidden_uri_params
  [:utm_source, :utm_content, :utm_medium, :utm_campaign]
end
normalize() click to toggle source
# File lib/url_normalizer.rb, line 24
def normalize
  uri = @uri

  uri.query = clean_query(uri.query)
  uri.normalize!

  url = uri.to_s
  url.sub(/\?$/,'')
end
whitelisted_uri_params() click to toggle source
# File lib/url_normalizer.rb, line 38
def whitelisted_uri_params
  nil
end