class SiteDiff::Config

SiteDiff Configuration.

Constants

ALLOWED_CONFIG_KEYS

Keys allowed in config files. TODO: Deprecate repeated params before_url and after_url. TODO: Create a method self.supports TODO: Deprecate in favor of self.supports key, subkey, subkey…

ALLOWED_SETTINGS_KEYS

Keys allowed in the “settings” key. TODO: Create a method self.supports TODO: Deprecate in favor of self.supports key, subkey, subkey…

DEFAULT_CONFIG

Default SiteDiff config.

DEFAULT_FILENAME

Default config file.

DEFAULT_PATHS_FILENAME

Default paths file.

Attributes

directory[R]

Public Class Methods

create_regexp(string_param) click to toggle source

Creates a RegExp from a string.

# File lib/sitediff/config.rb, line 457
def self.create_regexp(string_param)
  begin
    @return_value = string_param == '' ? nil : Regexp.new(string_param)
  rescue SiteDiffException => e
    @return_value = nil
    SiteDiff.log 'Invalid RegExp: ' + string_param, :error
    SiteDiff.log e.message, :error
    # TODO: Use SiteDiff.log type :debug
    # SiteDiff.log e.backtrace, :error if options[:verbose]
  end
  @return_value
end
merge(first, second) click to toggle source

Merges two normalized Hashes according to the following rules: 1 paths are merged as arrays. 2 before and after: for each subhash H (e.g. ['before’]):

a)  if first[H] and second[H] are expected to be arrays, their values
    are merged as such,
b)  if first[H] and second[H] are expected to be scalars, the value for
    second[H] is kept if and only if first[H] is nil.

For example, merge(h1, h2) results in h3:

(h1) before: {selector: foo, sanitization: [pattern: foo]} (h2) before: {selector: bar, sanitization: [pattern: bar]} (h3) before: {selector: foo, sanitization: [pattern: foo, pattern: bar]}

# File lib/sitediff/config.rb, line 133
def self.merge(first, second)
  result = {
    'before' => {},
    'after' => {},
    'output' => [],
    'settings' => {}
  }

  # Merge sanitization rules.
  Sanitizer::TOOLS.values.flatten(1).each do |key|
    result[key] = second[key] || first[key]
    result.delete(key) unless result[key]
  end

  # Rule 1.
  %w[before after].each do |pos|
    first[pos] ||= {}
    second[pos] ||= {}

    # If only the second hash has the value.
    unless first[pos]
      result[pos] = second[pos] || {}
      next
    end

    result[pos] = first[pos].merge!(second[pos]) do |key, a, b|
      # Rule 2a.
      result[pos][key] = if Sanitizer::TOOLS[:array].include? key
                           (a || []) + (b || [])
                         elsif key == 'settings'
                           b
                         else
                           a || b # Rule 2b.
                         end
    end
  end

  # Merge output array.
  result['output'] += (first['output'] || []) + (second['output'] || [])

  # Merge url_report keys.
  %w[before_url_report after_url_report].each do |pos|
    result[pos] = first[pos] || second[pos]
  end

  # Merge settings.
  result['settings'] = merge_deep(
    first['settings'] || {},
    second['settings'] || {}
  )

  # Merge report labels.
  result['report'] = merge_deep(
    first['report'] || {},
    second['report'] || {}
  )

  result
end
merge_deep(first, second) click to toggle source

Merges 2 iterable objects deeply.

# File lib/sitediff/config.rb, line 195
def self.merge_deep(first, second)
  first.merge(second) do |_key, val1, val2|
    case val1.class
    when Hash
      self.class.merge_deep(val1, val2 || {})
    when Array
      val1 + (val2 || [])
    else
      val2
    end
  end
end
new(file, directory) click to toggle source

Creates a SiteDiff Config object.

# File lib/sitediff/config.rb, line 247
def initialize(file, directory)
  # Fallback to default config filename, if none is specified.
  file = File.join(directory, DEFAULT_FILENAME) if file.nil?
  unless File.exist?(file)
    path = File.expand_path(file)
    raise InvalidConfig, "Missing config file #{path}."
  end
  @config = Config.merge(DEFAULT_CONFIG, Config.load_conf(file))
  @file = file
  @directory = directory

  # Validate configurations.
  validate
end
normalize(conf) click to toggle source

Takes a Hash and normalizes it to the following form by merging globals into before and after. A normalized config Hash looks like this:

paths:
- /about

before:
  url: http://before
  selector: body
  ## Note: use either `selector` or `regions`, but not both
  regions:
    - name: title
      selector: .field-name-title h2
    - name: body
      selector: .field-name-field-news-description .field-item
  dom_transform:
  - type: remove
    selector: script

after:
  url: http://after
  selector: body

## Note: use `output` only with `regions`
output:
  - title
  - author
  - source
  - body
# File lib/sitediff/config.rb, line 99
def self.normalize(conf)
  tools = Sanitizer::TOOLS

  # Merge globals
  %w[before after].each do |pos|
    conf[pos] ||= {}
    tools[:array].each do |key|
      conf[pos][key] ||= []
      conf[pos][key] += conf[key] if conf[key]
    end
    tools[:scalar].each { |key| conf[pos][key] ||= conf[key] }
    conf[pos]['url'] ||= conf[pos + '_url']
    conf[pos]['curl_opts'] = conf['curl_opts']
  end

  # Normalize paths.
  conf['paths'] = Config.normalize_paths(conf['paths'])

  conf.select { |k, _v| ALLOWED_CONFIG_KEYS.include? k }
end
remove_defaults(data) click to toggle source

Removes default parameters from a config hash.

I know this is weird, but it'll be fixed. The config management needs to be streamlined further.

# File lib/sitediff/config.rb, line 223
def self.remove_defaults(data)
  # Create a deep copy of the config data.
  result = data

  # Exclude default settings.
  result['settings'].delete_if do |key, value|
    value == DEFAULT_CONFIG['settings'][key] || !value
  end

  # Exclude default curl opts.
  result['settings']['curl_opts'] ||= {}
  result['settings']['curl_opts'].delete_if do |key, value|
    value == UriWrapper::DEFAULT_CURL_OPTS[key.to_sym]
  end

  # Delete curl opts if empty.
  unless result['settings']['curl_opts'].length.positive?
    result['settings'].delete('curl_opts')
  end

  result
end
stringify_keys(object) click to toggle source

Returns object clone with stringified keys. TODO: Make this method available globally, if required.

# File lib/sitediff/config.rb, line 440
def self.stringify_keys(object)
  # Do nothing if it is not an object.
  return object unless object.respond_to?('each_key')

  # Convert symbol indices to strings.
  output = {}
  object.each_key do |old_k|
    new_k = old_k.is_a?(Symbol) ? old_k.to_s : old_k
    output[new_k] = stringify_keys object[old_k]
  end

  # Return the new hash with string indices.
  output
end

Private Class Methods

load_conf(file, visited = []) click to toggle source

loads a single YAML configuration file, merges all its 'included' files and returns a normalized Hash.

# File lib/sitediff/config.rb, line 551
def self.load_conf(file, visited = [])
  # don't get fooled by a/../a/ or symlinks
  file = File.realpath(file)
  if visited.include? file
    raise InvalidConfig, "Circular dependency: #{file}"
  end

  conf = load_raw_yaml(file) # not normalized yet
  visited << file

  # normalize and merge includes
  includes = conf['includes'] || []
  conf = Config.normalize(conf)
  includes.each do |dep|
    # include paths are relative to the including file.
    dep = File.join(File.dirname(file), dep)
    conf = Config.merge(conf, load_conf(dep, visited))
  end
  conf
end
load_raw_yaml(file) click to toggle source

reads a YAML file and raises an InvalidConfig if the file is not valid.

# File lib/sitediff/config.rb, line 531
def self.load_raw_yaml(file)
  # TODO: Only show this in verbose mode.
  SiteDiff.log "Reading config file: #{Pathname.new(file).expand_path}"
  conf = YAML.load_file(file) || {}

  unless conf.is_a? Hash
    raise InvalidConfig, "Invalid configuration file: '#{file}'"
  end

  conf.each_key do |k, _v|
    unless ALLOWED_CONFIG_KEYS.include? k
      raise InvalidConfig, "Unknown configuration key (#{file}): '#{k}'"
    end
  end

  conf
end
normalize_paths(paths) click to toggle source
# File lib/sitediff/config.rb, line 525
def self.normalize_paths(paths)
  paths ||= []
  paths.map { |p| (p[0] == '/' ? p : "/#{p}").chomp }
end

Public Instance Methods

after(apply_preset = false) click to toggle source

Get “after” site configuration.

# File lib/sitediff/config.rb, line 274
def after(apply_preset = false)
  section :after, apply_preset
end
after_time=(time) click to toggle source

Set crawl time for 'after'

# File lib/sitediff/config.rb, line 339
def after_time=(time)
  @config['report']['after_time'] = time
end
after_url() click to toggle source

Get “after” site URL.

# File lib/sitediff/config.rb, line 279
def after_url
  result = after
  result['url'] if result
end
all() click to toggle source

Gets all loaded configuration except defaults.

@return [Hash]

Config data.
# File lib/sitediff/config.rb, line 213
def all
  result = Marshal.load(Marshal.dump(@config))
  self.class.remove_defaults(result)
end
before(apply_preset = false) click to toggle source

Get “before” site configuration.

# File lib/sitediff/config.rb, line 263
def before(apply_preset = false)
  section :before, apply_preset
end
before_time=(time) click to toggle source

Set crawl time for 'before'

# File lib/sitediff/config.rb, line 334
def before_time=(time)
  @config['report']['before_time'] = time
end
before_url() click to toggle source

Get “before” site URL.

# File lib/sitediff/config.rb, line 268
def before_url
  result = before
  result['url'] if result
end
curl_opts() click to toggle source

Return merged CURL options.

# File lib/sitediff/config.rb, line 472
def curl_opts
  # We do want string keys here
  bool_hash = { 'true' => true, 'false' => false }
  curl_opts = UriWrapper::DEFAULT_CURL_OPTS
              .clone
              .merge(settings['curl_opts'] || {})
  curl_opts.each { |k, v| curl_opts[k] = bool_hash.fetch(v, v) }
  curl_opts
end
export() click to toggle source

Get export option

# File lib/sitediff/config.rb, line 307
def export
  @config['export']
end
export=(export) click to toggle source

Set export option

# File lib/sitediff/config.rb, line 312
def export=(export)
  @config['export'] = export
end
ignore_whitespace() click to toggle source

Get ignore_whitespace option

# File lib/sitediff/config.rb, line 297
def ignore_whitespace
  @config['ignore_whitespace']
end
ignore_whitespace=(ignore_whitespace) click to toggle source

Set ignore_whitespace option

# File lib/sitediff/config.rb, line 302
def ignore_whitespace=(ignore_whitespace)
  @config['ignore_whitespace'] = ignore_whitespace
end
output() click to toggle source

Get output option

# File lib/sitediff/config.rb, line 317
def output
  @config['output']
end
output=(output) click to toggle source

Set output option

# File lib/sitediff/config.rb, line 322
def output=(output)
  raise 'Output must be an Array' unless output.is_a? Array

  @config['output'] = output
end
paths() click to toggle source

Get paths.

# File lib/sitediff/config.rb, line 285
def paths
  @config['paths']
end
paths=(paths) click to toggle source

Set paths.

# File lib/sitediff/config.rb, line 290
def paths=(paths)
  raise 'Paths must be an Array' unless paths.is_a? Array

  @config['paths'] = Config.normalize_paths(paths)
end
paths_file_read(file = nil) click to toggle source

Reads a collection of paths from a file.

@param [String] file

A file containing one path per line.

@return [Integer]

Number of paths read.
# File lib/sitediff/config.rb, line 367
def paths_file_read(file = nil)
  file ||= File.join(@directory, DEFAULT_PATHS_FILENAME)

  unless File.exist? file
    raise Config::InvalidConfig, "File not found: #{file}"
  end

  self.paths = File.readlines(file)

  # Return the number of paths.
  paths.length
end
paths_file_write(paths, file = nil) click to toggle source

Writes an array of paths to a file.

@param [Array] paths

An array of paths.

@param [String] file

Optional path to a file.
# File lib/sitediff/config.rb, line 350
def paths_file_write(paths, file = nil)
  unless paths.is_a?(Array) && paths.length.positive?
    raise SiteDiffException, 'Write failed. Invalid paths.'
  end

  file ||= File.join(@directory, DEFAULT_PATHS_FILENAME)
  File.open(file, 'w+') { |f| f.puts(paths) }
end
report() click to toggle source

Return report display settings.

# File lib/sitediff/config.rb, line 329
def report
  @config['report']
end
roots() click to toggle source

Get roots.

Example: If the config has a “before” and “after” sections, then roots will be [“before”, “after”].

# File lib/sitediff/config.rb, line 385
def roots
  @roots = { 'after' => after_url }
  @roots['before'] = before_url if before
  @roots
end
setting(key) click to toggle source

Gets a setting.

@param [String] key

A key.

@return [*]

A value, if exists.
# File lib/sitediff/config.rb, line 399
def setting(key)
  key = key.to_s if key.is_a?(Symbol)
  return @config['settings'][key] if @config['settings'].key?(key)
end
settings() click to toggle source

Gets all settings.

TODO: Make sure the settings are not writable.

@return [Hash]

All settings.
# File lib/sitediff/config.rb, line 411
def settings
  @config['settings']
end
validate(opts = {}) click to toggle source

Checks if the configuration is usable for diff-ing. TODO: Do we actually need the opts argument?

# File lib/sitediff/config.rb, line 417
def validate(opts = {})
  opts = { need_before: true }.merge(opts)

  if opts[:need_before] && !before['url']
    raise InvalidConfig, "Undefined 'before' base URL."
  end

  raise InvalidConfig, "Undefined 'after' base URL." unless after['url']

  # Validate interval and concurrency.
  interval = setting(:interval)
  concurrency = setting(:concurrency)
  if interval.to_i != 0 && concurrency != 1
    raise InvalidConfig, 'Concurrency must be 1 when an interval is set.'
  end

  # Validate preset.
  Preset.exist? setting(:preset), true if setting(:preset)
end

Private Instance Methods

section(name, with_preset = false) click to toggle source

Returns one of the “before” or “after” sections.

@param [String|Symbol]

Section name. Example: before, after.

@param [Boolean] with_preset

Whether to merge with preset config (if any).

@return [Hash|Nil]

Section data or Nil.
# File lib/sitediff/config.rb, line 494
def section(name, with_preset = false)
  name = name.to_s if name.is_a? Symbol

  # Validate section.
  unless %w[before after].include? name
    raise SiteDiffException, '"name" must be one of "before" or "after".'
  end

  # Return nil if section is not defined.
  return nil unless @config[name]

  result = @config[name]

  # Merge preset rules, if required.
  preset = setting(:preset)
  if with_preset && !preset.nil?
    preset_config = Preset.read preset

    # Merge plugins with array values.
    # TODO: This won't be required after plugin declarations are improved.
    # See https://rm.ewdev.ca/issues/18301
    Sanitizer::TOOLS[:array].each do |key|
      if preset_config[key]
        result[key] = (result[key] || []) + preset_config[key]
      end
    end
  end

  result
end