class SiteDiff::Api
Sitediff API interface.
Public Class Methods
init(options)
click to toggle source
Intialize a SiteDiff
project.
Calling:
SiteDiff::Api.init( depth: 3, directory: 'sitediff', concurrency: 3, interval: 0, include: nil, exclude: '*.pdf', preset: 'drupal', curl_opts: {timeout: 60}, crawl: false )
# File lib/sitediff/api.rb, line 37 def self.init(options) # Prepare a config object and write it to the file system. creator = SiteDiff::Config::Creator.new(options[:debug], options[:before_url], options[:after_url]) include_regex = Config.create_regexp(options[:include]) exclude_regex = Config.create_regexp(options[:exclude]) creator.create( depth: options[:depth], directory: options[:directory], concurrency: options[:concurrency], interval: options[:interval], include: include_regex, exclude: exclude_regex, preset: options[:preset], curl_opts: options[:curl_opts] ) SiteDiff.log "Created #{creator.config_file.expand_path}", :success # TODO: implement crawl ^^^ # Discover paths, if enabled. # if options[:crawl] # crawl(creator.config_file) # SiteDiff.log 'You can now run "sitediff diff".', :success # else # SiteDiff.log 'Run "sitediff crawl" to discover paths. You should then be able to run "sitediff diff".', :info # end end
new(directory, config_file = nil)
click to toggle source
Initializes new Api
object.
# File lib/sitediff/api.rb, line 17 def initialize(directory, config_file = nil) @dir = get_dir(directory) @config = SiteDiff::Config.new(config_file, @dir) end
Public Instance Methods
crawl()
click to toggle source
Crawl the `before` site to determine `paths`.
# File lib/sitediff/api.rb, line 149 def crawl # Prepare cache. @cache = SiteDiff::Cache.new( create: true, directory: @dir ) @cache.write_tags << :before << :after # Crawl with Hydra to discover paths. hydra = Typhoeus::Hydra.new( max_concurrency: @config.setting(:concurrency) ) @paths = {} @config.roots.each do |tag, url| Crawler.new( hydra, url, @config.setting(:interval), @config.setting(:include), @config.setting(:exclude), @config.setting(:depth), @config.curl_opts, @debug ) do |info| SiteDiff.log "Visited #{info.uri}, cached." after_crawl(tag, info) end end hydra.run # Write paths to a file. @paths = @paths.values.reduce(&:|).to_a.sort @config.paths_file_write(@paths) # Log output. file = Pathname.new(@dir) + Config::DEFAULT_PATHS_FILENAME SiteDiff.log '' SiteDiff.log "#{@paths.length} page(s) found." SiteDiff.log "Created #{file.expand_path}.", :success, 'done' end
diff(options)
click to toggle source
Diff
the `before` and `after`.
Calling:
Api.diff( paths: options['paths'], paths_file: options['paths-file'], ignore_whitespace: options['ignore-whitespace'], export: options['export'], before: options['before'], after: options['after'], cached: options['cached'], verbose: options['verbose'], report_format: options['report-format'], before_report: options['before-report'], after_report: options['after-report'], cli_mode: false )
# File lib/sitediff/api.rb, line 82 def diff(options) @config.ignore_whitespace = options[:ignore_whitespace] @config.export = options[:export] # Apply "paths" override, if any. if options[:paths] @config.paths = options[:paths] else paths_file = options[:paths_file] paths_file ||= File.join(@dir, Config::DEFAULT_PATHS_FILENAME) paths_file = File.expand_path(paths_file) paths_count = @config.paths_file_read(paths_file) SiteDiff.log "Read #{paths_count} paths from: #{paths_file}" end # TODO: Why do we allow before and after override during diff? @config.before['url'] = options[:before] if options[:before] @config.after['url'] = options[:after] if options[:after] # Prepare cache. cache = SiteDiff::Cache.new( create: options[:cached] != 'none', directory: @dir ) cache.read_tags << :before if %w[before all].include?(options[:cached]) cache.read_tags << :after if %w[after all].include?(options[:cached]) cache.write_tags << :before << :after # Run sitediff. sitediff = SiteDiff.new( @config, cache, options[:verbose], options[:debug] ) num_failing = sitediff.run exit_code = num_failing.positive? ? 2 : 0 # Generate HTML report. if options[:report_format] == 'html' || @config.export sitediff.report.generate_html( @dir, options[:before_report], options[:after_report] ) end # Generate JSON report. if options[:report_format] == 'json' && @config.export == false sitediff.report.generate_json @dir end SiteDiff.log 'Run "sitediff serve" to see a report.' unless options[:export] rescue Config::InvalidConfig => e SiteDiff.log "Invalid configuration: #{e.message}", :error SiteDiff.log e.backtrace, :error if options[:verbose] rescue Config::ConfigNotFound => e SiteDiff.log "Invalid configuration: #{e.message}", :error SiteDiff.log e.backtrace, :error if options[:verbose] else # no exception was raised # Thor::Error --> exit(1), guaranteed by exit_on_failure? # Failing diff --> exit(2), populated above exit(exit_code) if options[:cli_mode] end
serve(options)
click to toggle source
Serves SiteDiff
report for accessing in the browser.
Calling:
api.serve(browse: true, port: 13080)
# File lib/sitediff/api.rb, line 195 def serve(options) @cache = Cache.new(directory: @dir) @cache.read_tags << :before << :after SiteDiff::Webserver::ResultServer.new( options[:port], @dir, browse: options[:browse], cache: @cache, config: @config ).wait rescue SiteDiffException => e SiteDiff.log e.message, :error SiteDiff.log e.backtrace, :error if options[:verbose] end
store(options)
click to toggle source
# File lib/sitediff/api.rb, line 213 def store(options) # TODO: Figure out how to remove this config.validate call. @config.validate(need_before: false) @config.paths_file_read @cache = SiteDiff::Cache.new(directory: @dir, create: true) @cache.write_tags << :before base = options[:url] || @config.after['url'] fetcher = SiteDiff::Fetch.new(@cache, @config.paths, @config.setting(:interval), @config.setting(:concurrency), get_curl_opts(@config.settings), options[:debug], before: base) fetcher.run do |path, _res| SiteDiff.log "Visited #{path}, cached" end end
Private Instance Methods
after_crawl(tag, info)
click to toggle source
Processes a crawled path.
# File lib/sitediff/api.rb, line 247 def after_crawl(tag, info) path = UriWrapper.canonicalize(info.relative) # Register the path. @paths[tag] = [] unless @paths[tag] @paths[tag] << path result = info.read_result # Write result to applicable cache. @cache.set(tag, path, result) # If single-site, cache "after" as "before". @cache.set(:before, path, result) unless @config.roots[:before] # TODO: Restore application of rules. # @rules.handle_page(tag, res.content, info.document) if @rules && !res.error end
get_dir(directory)
click to toggle source
Ensures that the given directory exists.
# File lib/sitediff/api.rb, line 238 def get_dir(directory) # Create the dir. Must go before cache initialization! @dir = Pathname.new(directory || '.') @dir.mkpath unless @dir.directory? @dir.to_s end