class Gman::DomainList
Constants
- COMMENT_REGEX
Attributes
Public Class Methods
The current, government domain list
# File lib/gman/domain_list.rb, line 11 def current DomainList.new(path: Gman.list_path) end
# File lib/gman/domain_list.rb, line 15 def from_file(path) DomainList.new(path: path) end
# File lib/gman/domain_list.rb, line 19 def from_hash(hash) DomainList.new(data: hash) end
# File lib/gman/domain_list.rb, line 23 def from_public_suffix(string) DomainList.new(contents: string) end
# File lib/gman/domain_list.rb, line 29 def initialize(path: nil, contents: nil, data: nil) @path = path @contents = contents @data = data.reject { |_, domains| domains.compact.empty? } if data end
Public Instance Methods
Alphabetize groups and domains within each group We need to ensure exceptions appear after their coresponding rules
# File lib/gman/domain_list.rb, line 86 def alphabetize @data = data.sort_by { |k, _v| k.downcase }.to_h @data.map do |_group, domains| domains.sort! { |a, b| sort_with_exceptions(a, b) } domains.uniq! end end
Returns the raw content of the domain list as a string
# File lib/gman/domain_list.rb, line 36 def contents @contents ||= if path File.new(path, 'r:utf-8').read else to_s end end
Return the total number of domains in the list
# File lib/gman/domain_list.rb, line 80 def count domains.count end
Returns the parsed contents of the domain list as a hash in the form for group => domains
# File lib/gman/domain_list.rb, line 46 def data @data ||= string_to_hash(contents) end
Return an array of strings representing all domains on the list
# File lib/gman/domain_list.rb, line 75 def domains data.values.flatten.compact.sort.uniq end
Returns an array of strings representing the list groups
# File lib/gman/domain_list.rb, line 70 def groups data.keys end
Given a domain, find any domain on the list that includes that domain E.g., ‘fcc.gov` would be the parent of `data.fcc.gov`
# File lib/gman/domain_list.rb, line 117 def parent_domain(domain) domains.find { |c| domain =~ /\.#{Regexp.escape(c)}$/ } end
Returns the path to the domain list on disk
# File lib/gman/domain_list.rb, line 52 def path @path ||= Gman.list_path end
returns an instance of our custom public suffix list list behaves like PublicSuffix::List but is limited to our whitelisted domains
# File lib/gman/domain_list.rb, line 59 def public_suffix_list @public_suffix_list ||= PublicSuffix::List.parse(contents) end
The string representation of the domain list, in public suffix format
# File lib/gman/domain_list.rb, line 101 def to_s current_group = output = +'' data.sort_by { |group, _| group.downcase }.each do |group, domains| if group != current_group output << "\n\n" unless current_group.empty? # first entry output << "// #{group}\n" current_group = group end output << domains.join("\n") end output end
domain is on the domain list
# File lib/gman/domain_list.rb, line 64 def valid?(domain) rule = public_suffix_list.find(domain, default: nil) !(rule.nil? || rule.is_a?(PublicSuffix::Rule::Exception)) end
Write the domain list to disk
# File lib/gman/domain_list.rb, line 95 def write alphabetize File.write(path, to_public_suffix) end
Private Instance Methods
# File lib/gman/domain_list.rb, line 135 def array_to_hash(lines) domain_hash = {} group = '' lines.each do |line| if COMMENT_REGEX.match?(line) group = COMMENT_REGEX.match(line)[1] else safe_push(domain_hash, group, line.downcase) end end domain_hash end
Add a value to an array in a hash, creating the array if necessary hash - the hash key - the key within that hash to add the value to value - the single value to push into the array at hash
# File lib/gman/domain_list.rb, line 152 def safe_push(hash, key, value) return if value.empty? hash[key] ||= [] hash[key].push value end
# File lib/gman/domain_list.rb, line 159 def sort_with_exceptions(left, right) if left.start_with?('!') && !right.start_with?('!') 1 elsif right.start_with?('!') && !left.start_with?('!') -1 else left <=> right end end
# File lib/gman/domain_list.rb, line 131 def string_to_array(string) string.gsub(/\r\n?/, "\n").split("\n") end
Parse a public-suffix formatted string into a hash of groups => [domains]
# File lib/gman/domain_list.rb, line 124 def string_to_hash(string) return unless string lines = string_to_array(string) array_to_hash(lines) end