class UrlNormalizer
Constants
- VERSION
Public Class Methods
new(uri)
click to toggle source
# File lib/url_normalizer.rb, line 20 def initialize uri @uri = uri end
normalize(url)
click to toggle source
# File lib/url_normalizer.rb, line 12 def self.normalize url url.sub!(/#(?!\!)[^#]*$/,'') uri = Addressable::URI.parse(url) @@normalizer_for[uri.host].new(uri).normalize end
normalize_for(domain)
click to toggle source
# File lib/url_normalizer.rb, line 8 def self.normalize_for domain @@normalizer_for[domain] = self end
Public Instance Methods
build_query(params)
click to toggle source
# File lib/url_normalizer.rb, line 64 def build_query(params) params.map do |name,values| escaped_name = encode_component name if values.length > 0 values.map do |value| escaped_value = encode_component value "#{escaped_name}=#{escaped_value}" end else ["#{escaped_name}"] end end.flatten.join("&") end
clean_query(query)
click to toggle source
# File lib/url_normalizer.rb, line 42 def clean_query query return unless query uri_params = CGI.parse(query) if forbidden_uri_params forbidden_params = forbidden_uri_params.map(&:to_s) uri_params.reject! {|k,v| forbidden_params.include? k} end if whitelisted_uri_params allowed_params = whitelisted_uri_params.map(&:to_s) uri_params.select! {|k,v| allowed_params.include? k} end build_query(uri_params) end
encode_component(component)
click to toggle source
# File lib/url_normalizer.rb, line 60 def encode_component component Addressable::URI.encode_component component end
forbidden_uri_params()
click to toggle source
# File lib/url_normalizer.rb, line 34 def forbidden_uri_params [:utm_source, :utm_content, :utm_medium, :utm_campaign] end
normalize()
click to toggle source
# File lib/url_normalizer.rb, line 24 def normalize uri = @uri uri.query = clean_query(uri.query) uri.normalize! url = uri.to_s url.sub(/\?$/,'') end
whitelisted_uri_params()
click to toggle source
# File lib/url_normalizer.rb, line 38 def whitelisted_uri_params nil end