class Twingly::URL
Constants
- ACCEPTED_SCHEMES
- CARRIAGE_RETURN
- CUSTOM_PSL
- ENDS_WITH_SLASH
- ERRORS_TO_EXTEND
- LEADING_AND_TRAILING_WHITESPACE
- LINE_FEED
- NBSP
- SPACE
- STARTS_WITH_WWW
- VERSION
- WHITESPACE_CHARS
Attributes
addressable_uri[R]
public_suffix_domain[R]
Public Class Methods
new(addressable_uri, public_suffix_domain)
click to toggle source
# File lib/twingly/url.rb, line 113 def initialize(addressable_uri, public_suffix_domain) @addressable_uri = addressable_uri @public_suffix_domain = public_suffix_domain end
parse(potential_url)
click to toggle source
# File lib/twingly/url.rb, line 49 def parse(potential_url) internal_parse(potential_url) rescue Twingly::URL::Error, Twingly::URL::Error::ParseError => error NullURL.new rescue Exception => error error.extend(Twingly::URL::Error) raise end
Private Class Methods
clean_input(input)
click to toggle source
# File lib/twingly/url.rb, line 82 def clean_input(input) input = String(input) input = input.scrub input = strip_whitespace(input) end
internal_parse(input)
click to toggle source
# File lib/twingly/url.rb, line 58 def internal_parse(input) potential_url = clean_input(input) addressable_uri = Addressable::URI.heuristic_parse(potential_url) raise Twingly::URL::Error::ParseError if addressable_uri.nil? scheme = addressable_uri.scheme raise Twingly::URL::Error::ParseError unless scheme =~ ACCEPTED_SCHEMES # URLs that can't be normalized should not be valid try_addressable_normalize(addressable_uri) host = addressable_uri.host public_suffix_domain = PublicSuffix.parse(host, list: CUSTOM_PSL, default_rule: nil) raise Twingly::URL::Error::ParseError if public_suffix_domain.nil? raise Twingly::URL::Error::ParseError if public_suffix_domain.sld.nil? new(addressable_uri, public_suffix_domain) rescue *ERRORS_TO_EXTEND => error error.extend(Twingly::URL::Error) raise end
strip_whitespace(input)
click to toggle source
# File lib/twingly/url.rb, line 88 def strip_whitespace(input) return input unless input.encoding == Encoding::UTF_8 input.gsub(LEADING_AND_TRAILING_WHITESPACE, "") end
try_addressable_normalize(addressable_uri)
click to toggle source
Workaround for the following bug in addressable: github.com/sporkmonger/addressable/issues/224
# File lib/twingly/url.rb, line 96 def try_addressable_normalize(addressable_uri) addressable_uri.normalize rescue ArgumentError => error if error.message.include?("invalid byte sequence in UTF-8") raise Twingly::URL::Error::ParseError end raise end
Public Instance Methods
<=>(other)
click to toggle source
# File lib/twingly/url.rb, line 211 def <=>(other) self.to_s <=> other.to_s end
domain()
click to toggle source
# File lib/twingly/url.rb, line 142 def domain public_suffix_domain.domain end
eql?(other)
click to toggle source
# File lib/twingly/url.rb, line 215 def eql?(other) return false unless other.is_a?(self.class) self.hash == other.hash end
hash()
click to toggle source
# File lib/twingly/url.rb, line 221 def hash self.to_s.hash end
host()
click to toggle source
# File lib/twingly/url.rb, line 146 def host addressable_uri.host end
inspect()
click to toggle source
# File lib/twingly/url.rb, line 229 def inspect sprintf("#<%s:0x%x %s>", self.class.name, __id__, self.to_s) end
normalized()
click to toggle source
# File lib/twingly/url.rb, line 162 def normalized normalized_url = addressable_uri.dup normalized_url.scheme = normalized_scheme normalized_url.host = normalized_host normalized_url.path = normalized_path self.class.parse(normalized_url) end
normalized_host()
click to toggle source
# File lib/twingly/url.rb, line 176 def normalized_host host = addressable_uri.normalized_host domain = public_suffix_domain unless domain.subdomain? host = "www.#{host}" end host = normalize_blogspot(host, domain) host end
normalized_path()
click to toggle source
# File lib/twingly/url.rb, line 189 def normalized_path path = strip_trailing_slashes(addressable_uri.path) (path.empty?) ? "/" : path end
normalized_scheme()
click to toggle source
# File lib/twingly/url.rb, line 172 def normalized_scheme scheme.downcase end
origin()
click to toggle source
# File lib/twingly/url.rb, line 150 def origin addressable_uri.origin end
password()
click to toggle source
# File lib/twingly/url.rb, line 203 def password addressable_uri.password.to_s end
path()
click to toggle source
# File lib/twingly/url.rb, line 154 def path addressable_uri.path end
scheme()
click to toggle source
# File lib/twingly/url.rb, line 118 def scheme addressable_uri.scheme end
sld()
click to toggle source
# File lib/twingly/url.rb, line 126 def sld public_suffix_domain.sld end
tld()
click to toggle source
# File lib/twingly/url.rb, line 130 def tld public_suffix_domain.tld end
to_s()
click to toggle source
# File lib/twingly/url.rb, line 225 def to_s addressable_uri.to_s end
trd()
click to toggle source
# File lib/twingly/url.rb, line 122 def trd public_suffix_domain.trd.to_s end
ttld()
click to toggle source
Many ccTLDs have a second level underneath their ccTLD, use this when you don't care about the second level.
[1]: en.wikipedia.org/wiki/Second-level_domain
# File lib/twingly/url.rb, line 138 def ttld tld.split(".").last end
user()
click to toggle source
# File lib/twingly/url.rb, line 199 def user addressable_uri.user.to_s end
userinfo()
click to toggle source
# File lib/twingly/url.rb, line 195 def userinfo addressable_uri.userinfo.to_s end
valid?()
click to toggle source
# File lib/twingly/url.rb, line 207 def valid? true end
without_scheme()
click to toggle source
# File lib/twingly/url.rb, line 158 def without_scheme self.to_s.sub(/\A#{scheme}:/, "") end
Private Instance Methods
normalize_blogspot(host, domain)
click to toggle source
# File lib/twingly/url.rb, line 237 def normalize_blogspot(host, domain) if domain.sld.downcase == "blogspot" host.sub(STARTS_WITH_WWW, "").sub(/#{domain.tld}\z/i, "com") else host end end
strip_trailing_slashes(path)
click to toggle source
# File lib/twingly/url.rb, line 245 def strip_trailing_slashes(path) path.sub(ENDS_WITH_SLASH, "") end