class Gman::Importer
Constants
- BLACKLIST
Known false positives from vendored lists
- REGEX_CHECKS
Attributes
domain_list[RW]
Public Class Methods
new(domains)
click to toggle source
# File lib/gman/importer.rb, line 64 def initialize(domains) @domain_list = DomainList.new(data: domains) end
Public Instance Methods
current()
click to toggle source
# File lib/gman/importer.rb, line 94 def current @current ||= DomainList.current end
domain_resolves?(domain)
click to toggle source
Verifies that the given domain has an MX record, and thus is valid
# File lib/gman/importer.rb, line 114 def domain_resolves?(domain) domain = Addressable::URI.new(host: domain).normalize.host return true if ip?(domain) returns_record?(domain, 'NS') || returns_record?(domain, 'MX') end
import(options = {})
click to toggle source
# File lib/gman/importer.rb, line 98 def import(options = {}) logger.info "Current: #{Gman::DomainList.current.count} domains" logger.info "Adding: #{domain_list.count} domains" normalize_domains! ensure_validity!(options) add_to_current logger.info "New: #{current.count} domains" end
logger()
click to toggle source
# File lib/gman/importer.rb, line 68 def logger @logger ||= Logger.new($stdout) end
normalize_domain(domain)
click to toggle source
# File lib/gman/importer.rb, line 72 def normalize_domain(domain) domain = Gman.new(domain).to_s domain.to_s.downcase.strip.gsub(/^www./, '').gsub(%r{/$}, '') end
reject(domain, reason)
click to toggle source
if RECONCILING=true, return the reason, rather than a bool and silence log output
# File lib/gman/importer.rb, line 87 def reject(domain, reason) return reason if ENV['RECONCILING'] logger.info "👎 `#{domain}`: #{reason}" false end
resolver()
click to toggle source
# File lib/gman/importer.rb, line 109 def resolver @resolver ||= Resolv::DNS.new(nameserver: ['1.1.1.1', '8.8.8.8']) end
valid_domain?(domain, options = {})
click to toggle source
# File lib/gman/importer.rb, line 77 def valid_domain?(domain, options = {}) return false if !options[:skip_dupe] && !ensure_not_dupe(domain) return false unless ensure_valid(domain) return false if !options[:skip_resolve] && !ensure_resolves(domain) true end
Private Instance Methods
add_to_current()
click to toggle source
# File lib/gman/importer.rb, line 178 def add_to_current domain_list.data.each do |group, domains| current.data[group] ||= [] current.data[group].concat domains end current.write end
dupe?(domain)
click to toggle source
# File lib/gman/importer.rb, line 161 def dupe?(domain) current.domains.include?(domain) || current.parent_domain(domain) end
ensure_not_dupe(domain)
click to toggle source
# File lib/gman/importer.rb, line 150 def ensure_not_dupe(domain) return true unless dupe?(domain) if current.domains.include?(domain) reject(domain, 'duplicate') else parent = current.parent_domain(domain) reject(domain, "subdomain of #{parent}") end end
ensure_regex(domain)
click to toggle source
# File lib/gman/importer.rb, line 123 def ensure_regex(domain) REGEX_CHECKS.each do |msg, regex| return reject(domain, msg) if domain&.match?(regex) end true end
ensure_resolves(domain)
click to toggle source
# File lib/gman/importer.rb, line 144 def ensure_resolves(domain) return reject(domain, 'unresolvable') unless domain_resolves?(domain) true end
ensure_valid(domain)
click to toggle source
# File lib/gman/importer.rb, line 130 def ensure_valid(domain) return false if domain.empty? if BLACKLIST.include?(domain) reject(domain, 'blacklist') elsif !PublicSuffix.valid?("foo.#{domain}") reject(domain, 'invalid') elsif Swot.is_academic?(domain) reject(domain, 'academic') else ensure_regex(domain) end end
ensure_validity!(options = {})
click to toggle source
# File lib/gman/importer.rb, line 172 def ensure_validity!(options = {}) domain_list.data.each_value do |domains| domains.select! { |domain| valid_domain?(domain, options) } end end
ip?(domain)
click to toggle source
# File lib/gman/importer.rb, line 186 def ip?(domain) resolver.getaddress(domain) rescue Resolv::ResolvError false end
normalize_domains!()
click to toggle source
# File lib/gman/importer.rb, line 165 def normalize_domains! domain_list.to_h.each_value do |domains| domains.map! { |domain| normalize_domain(domain) } domains.uniq! end end
returns_record?(domain, type)
click to toggle source
# File lib/gman/importer.rb, line 192 def returns_record?(domain, type) type = Object.const_get "Resolv::DNS::Resource::IN::#{type}" resolver.getresource(domain, type) rescue Resolv::ResolvError false end