class TheFox::Sengi::Uri

Constants

URI_CLASSES

Public Class Methods

new(url) click to toggle source
# File lib/sengi/uri.rb, line 12
def initialize(url)
        @uri = nil
        @hash = nil
        @is_blacklisted = nil
        @is_ignored = nil
        @is_ignored_reason = 'nil'
        @hash_id_key_name = nil
        @id = nil
        @key_name = nil
        @domain_nowww = nil
        @domain_nowww_hash = nil
        @domain_original_hash = nil
        @domain_hash_id_key_name = nil
        @domain_id = nil
        @domain_key_name = nil
        @request_id = nil
        @request_key_name = nil
        @response_id = nil
        @response_key_name = nil
        @response_size = 0
        @response_content_type = ''
        
        begin
                @uri = URI(url)
        rescue Exception => e
                @uri = nil
        end
        
        validate
        if is_valid?
                append_slash
                host_downcase
                remove_fragment
                domain_setup
                
                @uri_class = @uri.class
                @hash = Digest::SHA256.hexdigest(to_s)
                @hash_id_key_name = "urls:id:#{@hash}"
        end
end

Public Instance Methods

domain_hash_id_key_name() click to toggle source
# File lib/sengi/uri.rb, line 122
def domain_hash_id_key_name
        @domain_hash_id_key_name
end
domain_id() click to toggle source
# File lib/sengi/uri.rb, line 131
def domain_id
        @domain_id
end
domain_id=(domain_id) click to toggle source
# File lib/sengi/uri.rb, line 126
def domain_id=(domain_id)
        @domain_id = domain_id
        @domain_key_name = "domains:#{@domain_id}"
end
domain_key_name() click to toggle source
# File lib/sengi/uri.rb, line 135
def domain_key_name
        @domain_key_name
end
domain_nowww() click to toggle source
# File lib/sengi/uri.rb, line 110
def domain_nowww
        @domain_nowww
end
domain_nowww_hash() click to toggle source
# File lib/sengi/uri.rb, line 114
def domain_nowww_hash
        @domain_nowww_hash
end
domain_original_hash() click to toggle source
# File lib/sengi/uri.rb, line 118
def domain_original_hash
        @domain_original_hash
end
hash_id_key_name() click to toggle source

def hash_id_key_name=(hash_id_key_name)

@hash_id_key_name = hash_id_key_name

end

# File lib/sengi/uri.rb, line 89
def hash_id_key_name
        @hash_id_key_name
end
id() click to toggle source
# File lib/sengi/uri.rb, line 98
def id
        @id
end
id=(id) click to toggle source
# File lib/sengi/uri.rb, line 93
def id=(id)
        @id = id
        @key_name = "urls:#{@id}"
end
is_blacklisted() click to toggle source
# File lib/sengi/uri.rb, line 65
def is_blacklisted
        @is_blacklisted
end
is_blacklisted=(is_blacklisted) click to toggle source
# File lib/sengi/uri.rb, line 61
def is_blacklisted=(is_blacklisted)
        @is_blacklisted = is_blacklisted
end
is_ignored() click to toggle source
# File lib/sengi/uri.rb, line 73
def is_ignored
        @is_ignored
end
is_ignored=(is_ignored) click to toggle source
# File lib/sengi/uri.rb, line 69
def is_ignored=(is_ignored)
        @is_ignored = is_ignored
end
is_ignored_reason() click to toggle source
# File lib/sengi/uri.rb, line 81
def is_ignored_reason
        @is_ignored_reason
end
is_ignored_reason=(is_ignored_reason) click to toggle source
# File lib/sengi/uri.rb, line 77
def is_ignored_reason=(is_ignored_reason)
        @is_ignored_reason = is_ignored_reason
end
is_relative?(uri = nil) click to toggle source
# File lib/sengi/uri.rb, line 233
def is_relative?(uri = nil)
        @uri_class == URI::Generic ||
        (!uri.nil? && uri.ruri.host == @uri.host)
end
is_valid?() click to toggle source
# File lib/sengi/uri.rb, line 53
def is_valid?
        !@uri.nil?
end
join(suburi) click to toggle source
# File lib/sengi/uri.rb, line 229
def join(suburi)
        self.class.new(URI.join(@uri, suburi.ruri).to_s)
end
key_name() click to toggle source

def key_name=(key_name)

@key_name = key_name

end

# File lib/sengi/uri.rb, line 106
def key_name
        @key_name
end
request_id() click to toggle source
# File lib/sengi/uri.rb, line 144
def request_id
        @request_id
end
request_id=(request_id) click to toggle source
# File lib/sengi/uri.rb, line 139
def request_id=(request_id)
        @request_id = request_id
        @request_key_name = "requests:#{@request_id}"
end
request_key_name() click to toggle source
# File lib/sengi/uri.rb, line 148
def request_key_name
        @request_key_name
end
response_content_type() click to toggle source
# File lib/sengi/uri.rb, line 177
def response_content_type
        @response_content_type
end
response_content_type=(response_content_type) click to toggle source
# File lib/sengi/uri.rb, line 173
def response_content_type=(response_content_type)
        @response_content_type = response_content_type.to_s
end
response_id() click to toggle source
# File lib/sengi/uri.rb, line 157
def response_id
        @response_id
end
response_id=(response_id) click to toggle source
# File lib/sengi/uri.rb, line 152
def response_id=(response_id)
        @response_id = response_id
        @response_key_name = "responses:#{@response_id}"
end
response_key_name() click to toggle source
# File lib/sengi/uri.rb, line 161
def response_key_name
        @response_key_name
end
response_size() click to toggle source
# File lib/sengi/uri.rb, line 169
def response_size
        @response_size
end
response_size=(response_size) click to toggle source
# File lib/sengi/uri.rb, line 165
def response_size=(response_size)
        @response_size = response_size.to_s
end
ruri() click to toggle source
# File lib/sengi/uri.rb, line 57
def ruri
        @uri
end
to_hash() click to toggle source
# File lib/sengi/uri.rb, line 185
def to_hash
        @hash
end
to_http() click to toggle source
# File lib/sengi/uri.rb, line 189
def to_http
        http_uri = @uri.clone
        http_uri.scheme = 'http'
        http_uri
end
to_s() click to toggle source
# File lib/sengi/uri.rb, line 181
def to_s
        "#{@uri}"
end
weight(ref_uri = nil) click to toggle source
# File lib/sengi/uri.rb, line 195
def weight(ref_uri = nil)
        is_subdomain = false
        
        if !@uri.host.nil? && !ref_uri.nil? && !ref_uri.ruri.host.nil?
                #puts "#{@uri.host}"
                #puts "#{ref_uri.ruri.host}"
                
                a_ss = @uri.host[ref_uri.ruri.host]
                #puts "a: '#{a_ss}'"
                
                if a_ss.nil?
                        b_ss = ref_uri.ruri.host[@uri.host]
                        #puts "b: '#{b_ss}'"
                        
                        if !b_ss.nil?
                                is_subdomain = true
                        end
                else
                        is_subdomain = true
                end
        end
        
        if false
        elsif @uri_class == URI::Generic then return 100
        elsif @uri_class == URI::HTTP
                if is_subdomain
                        return 200
                end
                return 250
        elsif @uri_class == URI::HTTPS then return 290
        end
        return 999
end

Private Instance Methods

append_slash() click to toggle source
# File lib/sengi/uri.rb, line 255
def append_slash
        url = to_s
        
        #puts "url: '#{@url}'"
        #puts "request uri: '#{@uri.request_uri}'"
        #puts "class: '#{@uri.class}'"
        
        if @uri.class == URI::HTTP && @uri.request_uri == '/' && url[-1] != '/'
                @uri = URI("#{url}/")
        end
end
domain_setup() click to toggle source
# File lib/sengi/uri.rb, line 277
def domain_setup
        if !@uri.nil? && !@uri.host.nil?
                @domain_nowww = @uri.host.sub(/^www\./, '')
                @domain_nowww_hash = Digest::SHA256.hexdigest(@domain_nowww)
                @domain_original_hash = Digest::SHA256.hexdigest(@uri.host)
                @domain_hash_id_key_name = "domains:id:#{@domain_nowww_hash}"
        end
end
host_downcase() click to toggle source
# File lib/sengi/uri.rb, line 267
def host_downcase
        if @uri.class != URI::Generic
                @uri.host = @uri.host.downcase
        end
end
remove_fragment() click to toggle source
# File lib/sengi/uri.rb, line 273
def remove_fragment
        @uri.fragment = nil
end
validate() click to toggle source
# File lib/sengi/uri.rb, line 240
def validate
        if is_valid?
                s = to_s.downcase
                #puts "s '#{s[0..3]}'"
                if s[0..10] == 'javascript:' ||
                        s[0..3] == 'tel:'
                        @uri = nil
                end
        end
        
        if is_valid? && !URI_CLASSES.include?(@uri.class)
                @uri = nil
        end
end