class SiteDiff::Api

Sitediff API interface.

Public Class Methods

init(options) click to toggle source

Intialize a SiteDiff project.

Calling:

SiteDiff::Api.init(
  depth: 3,
  directory: 'sitediff',
  concurrency: 3,
  interval: 0,
  include: nil,
  exclude: '*.pdf',
  preset: 'drupal',
  curl_opts: {timeout: 60},
  crawl: false
)
# File lib/sitediff/api.rb, line 37
def self.init(options)
  # Prepare a config object and write it to the file system.
  creator = SiteDiff::Config::Creator.new(options[:debug], options[:before_url], options[:after_url])
  include_regex = Config.create_regexp(options[:include])
  exclude_regex = Config.create_regexp(options[:exclude])
  creator.create(
    depth: options[:depth],
    directory: options[:directory],
    concurrency: options[:concurrency],
    interval: options[:interval],
    include: include_regex,
    exclude: exclude_regex,
    preset: options[:preset],
    curl_opts: options[:curl_opts]
  )
  SiteDiff.log "Created #{creator.config_file.expand_path}", :success

  # TODO: implement crawl ^^^
  # Discover paths, if enabled.
  # if options[:crawl]
  #   crawl(creator.config_file)
  #   SiteDiff.log 'You can now run "sitediff diff".', :success
  # else
  #   SiteDiff.log 'Run "sitediff crawl" to discover paths. You should then be able to run "sitediff diff".', :info
  # end
end
new(directory, config_file = nil) click to toggle source

Initializes new Api object.

# File lib/sitediff/api.rb, line 17
def initialize(directory, config_file = nil)
  @dir = get_dir(directory)
  @config = SiteDiff::Config.new(config_file, @dir)
end

Public Instance Methods

crawl() click to toggle source

Crawl the `before` site to determine `paths`.

# File lib/sitediff/api.rb, line 149
def crawl
  # Prepare cache.
  @cache = SiteDiff::Cache.new(
    create: true,
    directory: @dir
  )
  @cache.write_tags << :before << :after

  # Crawl with Hydra to discover paths.
  hydra = Typhoeus::Hydra.new(
    max_concurrency: @config.setting(:concurrency)
  )
  @paths = {}
  @config.roots.each do |tag, url|
    Crawler.new(
      hydra,
      url,
      @config.setting(:interval),
      @config.setting(:include),
      @config.setting(:exclude),
      @config.setting(:depth),
      @config.curl_opts,
      @debug
    ) do |info|
      SiteDiff.log "Visited #{info.uri}, cached."
      after_crawl(tag, info)
    end
  end
  hydra.run

  # Write paths to a file.
  @paths = @paths.values.reduce(&:|).to_a.sort
  @config.paths_file_write(@paths)

  # Log output.
  file = Pathname.new(@dir) + Config::DEFAULT_PATHS_FILENAME
  SiteDiff.log ''
  SiteDiff.log "#{@paths.length} page(s) found."
  SiteDiff.log "Created #{file.expand_path}.", :success, 'done'
end
diff(options) click to toggle source

Diff the `before` and `after`.

Calling:

Api.diff(
  paths: options['paths'],
  paths_file: options['paths-file'],
  ignore_whitespace: options['ignore-whitespace'],
  export: options['export'],
  before: options['before'],
  after: options['after'],
  cached: options['cached'],
  verbose: options['verbose'],
  report_format: options['report-format'],
  before_report: options['before-report'],
  after_report: options['after-report'],
  cli_mode: false
)
# File lib/sitediff/api.rb, line 82
def diff(options)
  @config.ignore_whitespace = options[:ignore_whitespace]
  @config.export = options[:export]
  # Apply "paths" override, if any.
  if options[:paths]
    @config.paths = options[:paths]
  else
    paths_file = options[:paths_file]
    paths_file ||= File.join(@dir, Config::DEFAULT_PATHS_FILENAME)
    paths_file = File.expand_path(paths_file)

    paths_count = @config.paths_file_read(paths_file)
    SiteDiff.log "Read #{paths_count} paths from: #{paths_file}"
  end

  # TODO: Why do we allow before and after override during diff?
  @config.before['url'] = options[:before] if options[:before]
  @config.after['url'] = options[:after] if options[:after]

  # Prepare cache.
  cache = SiteDiff::Cache.new(
    create: options[:cached] != 'none',
    directory: @dir
  )
  cache.read_tags << :before if %w[before all].include?(options[:cached])
  cache.read_tags << :after if %w[after all].include?(options[:cached])
  cache.write_tags << :before << :after

  # Run sitediff.
  sitediff = SiteDiff.new(
    @config,
    cache,
    options[:verbose],
    options[:debug]
  )
  num_failing = sitediff.run
  exit_code = num_failing.positive? ? 2 : 0

  # Generate HTML report.
  if options[:report_format] == 'html' || @config.export
    sitediff.report.generate_html(
      @dir,
      options[:before_report],
      options[:after_report]
    )
  end

  # Generate JSON report.
  if options[:report_format] == 'json' && @config.export == false
    sitediff.report.generate_json @dir
  end

  SiteDiff.log 'Run "sitediff serve" to see a report.' unless options[:export]
rescue Config::InvalidConfig => e
  SiteDiff.log "Invalid configuration: #{e.message}", :error
  SiteDiff.log e.backtrace, :error if options[:verbose]
rescue Config::ConfigNotFound => e
  SiteDiff.log "Invalid configuration: #{e.message}", :error
  SiteDiff.log e.backtrace, :error if options[:verbose]
else # no exception was raised
  # Thor::Error  --> exit(1), guaranteed by exit_on_failure?
  # Failing diff --> exit(2), populated above
  exit(exit_code) if options[:cli_mode]
end
serve(options) click to toggle source

Serves SiteDiff report for accessing in the browser.

Calling:

api.serve(browse: true, port: 13080)
# File lib/sitediff/api.rb, line 195
def serve(options)
  @cache = Cache.new(directory: @dir)
  @cache.read_tags << :before << :after

  SiteDiff::Webserver::ResultServer.new(
    options[:port],
    @dir,
    browse: options[:browse],
    cache: @cache,
    config: @config
  ).wait
rescue SiteDiffException => e
  SiteDiff.log e.message, :error
  SiteDiff.log e.backtrace, :error if options[:verbose]
end
store(options) click to toggle source
# File lib/sitediff/api.rb, line 213
def store(options)
  # TODO: Figure out how to remove this config.validate call.
  @config.validate(need_before: false)
  @config.paths_file_read

  @cache = SiteDiff::Cache.new(directory: @dir, create: true)
  @cache.write_tags << :before

  base = options[:url] || @config.after['url']
  fetcher = SiteDiff::Fetch.new(@cache,
                                @config.paths,
                                @config.setting(:interval),
                                @config.setting(:concurrency),
                                get_curl_opts(@config.settings),
                                options[:debug],
                                before: base)
  fetcher.run do |path, _res|
    SiteDiff.log "Visited #{path}, cached"
  end
end

Private Instance Methods

after_crawl(tag, info) click to toggle source

Processes a crawled path.

# File lib/sitediff/api.rb, line 247
def after_crawl(tag, info)
  path = UriWrapper.canonicalize(info.relative)

  # Register the path.
  @paths[tag] = [] unless @paths[tag]
  @paths[tag] << path

  result = info.read_result

  # Write result to applicable cache.
  @cache.set(tag, path, result)
  # If single-site, cache "after" as "before".
  @cache.set(:before, path, result) unless @config.roots[:before]

  # TODO: Restore application of rules.
  # @rules.handle_page(tag, res.content, info.document) if @rules && !res.error
end
get_dir(directory) click to toggle source

Ensures that the given directory exists.

# File lib/sitediff/api.rb, line 238
def get_dir(directory)
  # Create the dir. Must go before cache initialization!
  @dir = Pathname.new(directory || '.')
  @dir.mkpath unless @dir.directory?
  @dir.to_s
end