module Arachni::Framework::Parts::Audit
Provides {Page} audit functionality and everything related to it, like handling the {Session} and {Trainer}.
@author Tasos “Zapotek” Laskos <tasos.laskos@arachni-scanner.com>
Attributes
failures[R]
@return [Array<String>]
Page URLs which elicited no response from the server and were not audited. Not determined by HTTP status codes, we're talking network failures here.
http[R]
@return [Arachni::HTTP]
session[R]
@return [Session]
Web application session manager.
trainer[R]
@return [Trainer]
Public Class Methods
new()
click to toggle source
Calls superclass method
Arachni::Support::Mixins::Observable::new
# File lib/arachni/framework/parts/audit.rb, line 44 def initialize super @http = HTTP::Client.instance # Holds page URLs which returned no response. @failures = [] @retries = {} @current_url = '' reset_session reset_trainer end
Public Instance Methods
audit_page( page )
click to toggle source
@note Will update the {HTTP::Client#cookie_jar} with {Page#cookie_jar}. @note It will audit just the given ‘page` and not any subsequent pages
discovered by the {Trainer} -- i.e. ignore any new elements that might appear as a result.
@note It will pass the ‘page` to the {BrowserCluster} for analysis if the
{Page::Scope#dom_depth_limit_reached? DOM depth limit} has not been reached and push resulting pages to {#push_to_page_queue} but will not audit those pages either.
@param [Page] page
Runs loaded checks against `page`
# File lib/arachni/framework/parts/audit.rb, line 70 def audit_page( page ) return if !page if page.scope.out? print_info "Ignoring page due to exclusion criteria: #{page.dom.url}" return false end # Initialize the BrowserCluster. browser_cluster state.audited_page_count += 1 add_to_sitemap( page ) print_line if page.response.ok? print_status "[HTTP: #{page.code}] #{page.dom.url}" else print_error "[HTTP: #{page.code}] #{page.dom.url}" print_error "[#{page.response.return_code}] #{page.response.return_message}" end if page.platforms.any? print_info "Identified as: #{page.platforms.to_a.join( ', ' )}" end if crawl? pushed = push_paths_from_page( page ) print_info "Analysis resulted in #{pushed.size} usable paths." end if host_has_browser? print_info "DOM depth: #{page.dom.depth} (Limit: " << "#{options.scope.dom_depth_limit})" if page.dom.transitions.any? print_info ' Transitions:' page.dom.print_transitions( method(:print_info), ' ' ) end end # Aside from plugins and whatnot, the Trainer hooks here to update the # ElementFilter so that it'll know if new elements appear during the # audit, so it's a big deal. notify_on_page_audit( page ) @current_url = page.dom.url.to_s http.update_cookies( page.cookie_jar ) # Pass the page to the BrowserCluster to explore its DOM and feed # resulting pages back to the framework. perform_browser_analysis( page ) run_http = false if checks.any? # Remove elements which have already passed through here. pre_audit_element_filter( page ) notify_on_effective_page_audit( page ) # Run checks which **don't** benefit from fingerprinting first, so # that we can use the responses of their HTTP requests to fingerprint # the webapp platforms, so that the checks which **do** benefit from # knowing the remote platforms can run more efficiently. run_http = run_checks( @checks.without_platforms, page ) run_http = true if run_checks( @checks.with_platforms, page ) end notify_after_page_audit( page ) # Makes it easier on the GC but it is important that it be called # **after** all the callbacks have been executed because they may need # access to the cached data and there's no sense in re-parsing. page.clear_cache if Arachni::Check::Auditor.has_timeout_candidates? print_line print_status "Processing timeout-analysis candidates for: #{page.dom.url}" print_info '-------------------------------------------' Arachni::Check::Auditor.timeout_audit_run run_http = true end run_http end
Private Instance Methods
audit()
click to toggle source
Performs the audit.
# File lib/arachni/framework/parts/audit.rb, line 162 def audit handle_signals return if aborted? state.status = :scanning if !pausing? push_to_url_queue( options.url ) options.scope.extend_paths.each { |url| push_to_url_queue( url ) } options.scope.restrict_paths.each { |url| push_to_url_queue( url, true ) } # Initialize the BrowserCluster. browser_cluster # Keep auditing until there are no more resources in the queues and the # browsers have stopped spinning. loop do show_workload_msg = true while !has_audit_workload? && wait_for_browser_cluster? if show_workload_msg print_line print_status 'Workload exhausted, waiting for new pages' << ' from the browser-cluster...' end show_workload_msg = false last_pending_jobs ||= 0 pending_jobs = browser_cluster.pending_job_counter if pending_jobs != last_pending_jobs browser_cluster.print_info "Pending jobs: #{pending_jobs}" browser_cluster.print_debug 'Current jobs:' browser_cluster.workers.each do |worker| browser_cluster.print_debug worker.job.to_s end end last_pending_jobs = pending_jobs sleep 0.1 end audit_queues next sleep( 0.1 ) if wait_for_browser_cluster? break if page_limit_reached? break if !has_audit_workload? end end
audit_queues()
click to toggle source
Audits the {Data::Framework.url_queue URL} and {Data::Framework.page_queue Page} queues while maintaining a valid session with the webapp if we’ve got login capabilities.
# File lib/arachni/framework/parts/audit.rb, line 213 def audit_queues return if @audit_queues_done == false || !has_audit_workload? || page_limit_reached? @audit_queues_done = false while !suspended? && !page_limit_reached? && (page = pop_page) session.ensure_logged_in replenish_page_queue_from_url_queue # We're counting on piggybacking the page queue replenishing on the # page audit, however if there wasn't an audit we need to force an # HTTP run. audit_page( page ) or http.run handle_signals end @audit_queues_done = true end
harvest_http_responses()
click to toggle source
# File lib/arachni/framework/parts/audit.rb, line 236 def harvest_http_responses print_status 'Harvesting HTTP responses...' print_info 'Depending on server responsiveness and network' << ' conditions this may take a while.' http.run end