class Gman::Importer

Constants

BLACKLIST

Known false positives from vendored lists

REGEX_CHECKS

Attributes

domain_list[RW]

Public Class Methods

new(domains) click to toggle source
# File lib/gman/importer.rb, line 64
def initialize(domains)
  @domain_list = DomainList.new(data: domains)
end

Public Instance Methods

current() click to toggle source
# File lib/gman/importer.rb, line 94
def current
  @current ||= DomainList.current
end
domain_resolves?(domain) click to toggle source

Verifies that the given domain has an MX record, and thus is valid

# File lib/gman/importer.rb, line 114
def domain_resolves?(domain)
  domain = Addressable::URI.new(host: domain).normalize.host
  return true if ip?(domain)

  returns_record?(domain, 'NS') || returns_record?(domain, 'MX')
end
import(options = {}) click to toggle source
# File lib/gman/importer.rb, line 98
def import(options = {})
  logger.info "Current: #{Gman::DomainList.current.count} domains"
  logger.info "Adding: #{domain_list.count} domains"

  normalize_domains!
  ensure_validity!(options)

  add_to_current
  logger.info "New: #{current.count} domains"
end
logger() click to toggle source
# File lib/gman/importer.rb, line 68
def logger
  @logger ||= Logger.new($stdout)
end
normalize_domain(domain) click to toggle source
# File lib/gman/importer.rb, line 72
def normalize_domain(domain)
  domain = Gman.new(domain).to_s
  domain.to_s.downcase.strip.gsub(/^www./, '').gsub(%r{/$}, '')
end
reject(domain, reason) click to toggle source

if RECONCILING=true, return the reason, rather than a bool and silence log output

# File lib/gman/importer.rb, line 87
def reject(domain, reason)
  return reason if ENV['RECONCILING']

  logger.info "👎 `#{domain}`: #{reason}"
  false
end
resolver() click to toggle source
# File lib/gman/importer.rb, line 109
def resolver
  @resolver ||= Resolv::DNS.new(nameserver: ['1.1.1.1', '8.8.8.8'])
end
valid_domain?(domain, options = {}) click to toggle source
# File lib/gman/importer.rb, line 77
def valid_domain?(domain, options = {})
  return false if !options[:skip_dupe] && !ensure_not_dupe(domain)
  return false unless ensure_valid(domain)
  return false if !options[:skip_resolve] && !ensure_resolves(domain)

  true
end

Private Instance Methods

add_to_current() click to toggle source
# File lib/gman/importer.rb, line 178
def add_to_current
  domain_list.data.each do |group, domains|
    current.data[group] ||= []
    current.data[group].concat domains
  end
  current.write
end
dupe?(domain) click to toggle source
# File lib/gman/importer.rb, line 161
def dupe?(domain)
  current.domains.include?(domain) || current.parent_domain(domain)
end
ensure_not_dupe(domain) click to toggle source
# File lib/gman/importer.rb, line 150
def ensure_not_dupe(domain)
  return true unless dupe?(domain)

  if current.domains.include?(domain)
    reject(domain, 'duplicate')
  else
    parent = current.parent_domain(domain)
    reject(domain, "subdomain of #{parent}")
  end
end
ensure_regex(domain) click to toggle source
# File lib/gman/importer.rb, line 123
def ensure_regex(domain)
  REGEX_CHECKS.each do |msg, regex|
    return reject(domain, msg) if domain&.match?(regex)
  end
  true
end
ensure_resolves(domain) click to toggle source
# File lib/gman/importer.rb, line 144
def ensure_resolves(domain)
  return reject(domain, 'unresolvable') unless domain_resolves?(domain)

  true
end
ensure_valid(domain) click to toggle source
# File lib/gman/importer.rb, line 130
def ensure_valid(domain)
  return false if domain.empty?

  if BLACKLIST.include?(domain)
    reject(domain, 'blacklist')
  elsif !PublicSuffix.valid?("foo.#{domain}")
    reject(domain, 'invalid')
  elsif Swot.is_academic?(domain)
    reject(domain, 'academic')
  else
    ensure_regex(domain)
  end
end
ensure_validity!(options = {}) click to toggle source
# File lib/gman/importer.rb, line 172
def ensure_validity!(options = {})
  domain_list.data.each_value do |domains|
    domains.select! { |domain| valid_domain?(domain, options) }
  end
end
ip?(domain) click to toggle source
# File lib/gman/importer.rb, line 186
def ip?(domain)
  resolver.getaddress(domain)
rescue Resolv::ResolvError
  false
end
normalize_domains!() click to toggle source
# File lib/gman/importer.rb, line 165
def normalize_domains!
  domain_list.to_h.each_value do |domains|
    domains.map! { |domain| normalize_domain(domain) }
    domains.uniq!
  end
end
returns_record?(domain, type) click to toggle source
# File lib/gman/importer.rb, line 192
def returns_record?(domain, type)
  type = Object.const_get "Resolv::DNS::Resource::IN::#{type}"
  resolver.getresource(domain, type)
rescue Resolv::ResolvError
  false
end