class Arachni::Browser
@note Depends on PhantomJS 2.1.1.
Real browser driver providing DOM/JS/AJAX support.
@author Tasos “Zapotek” Laskos <tasos.laskos@arachni-scanner.com>
Constants
- ASSET_EXTENSIONS
- ASSET_EXTRACTORS
- BROWSER_SPAWN_TIMEOUT
How much time to wait for the PhantomJS process to spawn before respawning.
- ELEMENT_APPEARANCE_TIMEOUT
How much time to wait for a targeted HTML element to appear on the page after the page is loaded.
- INPUT_EVENTS
- INPUT_EVENTS_TO_FORCE
Attributes
@return [Javascript]
@return [Array<Page>]
Same as {#page_snapshots} but it doesn't deduplicate and only contains pages with sink ({Page::DOM#data_flow_sinks} or {Page::DOM#execution_flow_sinks}) data as populated by {Javascript#data_flow_sinks} and {Javascript#execution_flow_sinks}.
@see Javascript#data_flow_sinks
@see Javascript#execution_flow_sinks
@see Page::DOM#data_flow_sinks
@see Page::DOM#execution_flow_sinks
@return [Hash]
Preloaded resources, by URL.
@return [Selenium::WebDriver]
Selenium driver interface.
@return [Support::LookUp::HashSet]
States that have been visited and should be skipped.
@see skip_state
@see skip_state?
@return [Array<Page::DOM::Transition>]
@return [Watir::Browser]
Watir driver interface.
Public Class Methods
# File lib/arachni/browser.rb, line 160 def add_asset_domain( url ) return if url.to_s.empty? return if !(curl = Arachni::URI( url )) return if !(domain = curl.domain) asset_domains << domain domain end
# File lib/arachni/browser.rb, line 156 def asset_domains @asset_domains ||= Set.new end
@return [String]
Path to the PhantomJS executable.
# File lib/arachni/browser.rb, line 142 def executable @path ||= begin path = Selenium::WebDriver::Platform.find_binary('chromedriver') raise Error::MissingExecutable, 'chromedriver could not be found in PATH.' unless path Selenium::WebDriver::Platform.assert_executable path path end end
@return [Bool]
`true` if a supported browser is in the OS PATH, `false` otherwise.
# File lib/arachni/browser.rb, line 133 def has_executable? executable true rescue Error::MissingExecutable false end
@param [Hash] options @option options [Integer] :concurrency
Maximum number of concurrent connections.
@option options [Bool] :store_pages (true)
Whether to store pages in addition to just passing them to {#on_new_page}.
@option options [Integer] :width (1600)
Window width.
@option options [Integer] :height (1200)
Window height.
Arachni::Support::Mixins::Observable::new
# File lib/arachni/browser.rb, line 181 def initialize( options = {} ) super() @options = options.dup @ignore_scope = options[:ignore_scope] @width = options[:width] || 1600 @height = options[:height] || 1200 @options[:store_pages] = true if !@options.include?( :store_pages ) start_webdriver # User-controlled preloaded responses, by URL. @preloads = {} # Captured pages -- populated by #capture. @captured_pages = [] # Snapshots of the working page resulting from firing of events and # clicking of JS links. @page_snapshots = {} # Same as @page_snapshots but it doesn't deduplicate and only contains # pages with sink (Page::DOM#sink) data as populated by Javascript#flush_sink. @page_snapshots_with_sinks = [] # Captures HTTP::Response objects per URL for open windows. @window_responses = {} # Keeps track of resources which should be skipped -- like already fired # events and clicked links etc. @skip_states = Support::LookUp::HashSet.new( hasher: :persistent_hash ) @transitions = [] @request_transitions = [] @add_request_transitions = true # Last loaded URL. @last_url = nil @javascript = Javascript.new( self ) end
@ private
# File lib/arachni/browser.rb, line 152 def reset @path = nil end
Public Instance Methods
@return [Bool]
`true` if request capturing is enabled, `false` otherwise.
@see start_capture
@see stop_capture
# File lib/arachni/browser.rb, line 763 def capture? !!@capture end
# File lib/arachni/browser.rb, line 888 def capture_snapshot( transition = nil ) pages = [] request_transitions = flush_request_transitions transitions = ([transition] + request_transitions).flatten.compact window_handles = @selenium.window_handles begin window_handles.each do |handle| if window_handles.size > 1 @selenium.switch_to.window( handle ) end # We don't even have an HTTP response for the page, don't # bother trying anything else. next if !response unique_id = javascript.dom_event_digest already_seen = skip_state?( unique_id ) skip_state unique_id with_sinks = javascript.has_sinks? # Avoid a #to_page call if at all possible because it'll generate # loads of data. next if (already_seen && !with_sinks) || (page = to_page).code == 0 if pages.empty? transitions.each do |t| @transitions << t page.dom.push_transition t end end capture_snapshot_with_sink( page ) next if already_seen # Safegued against pages which generate an inf number of DOM # states regardless of UI interactions. transition_id ="#{page.dom.url}:#{page.dom.playable_transitions.map(&:hash)}" transition_id_seen = skip_state?( transition_id ) skip_state transition_id next if transition_id_seen notify_on_new_page( page ) if store_pages? @page_snapshots[unique_id] = page pages << page end end rescue => e print_debug "Could not capture snapshot for: #{@last_url}" if transition print_debug "-- #{transition}" end print_debug print_debug_exception e ensure @selenium.switch_to.default_content end pages end
@return [Array<Page>]
Captured HTTP requests performed by the web page (AJAX etc.) converted into forms of pages to assist with analysis and audit.
# File lib/arachni/browser.rb, line 777 def captured_pages @captured_pages end
# File lib/arachni/browser.rb, line 226 def clear_buffers synchronize do @preloads.clear @captured_pages.clear @page_snapshots.clear @page_snapshots_with_sinks.clear @window_responses.clear end end
@note Only used when running as part of {BrowserCluster} to distribute
page analysis across a pool of browsers.
Distributes the triggering of ‘event` on the element at `element_index` on `page`.
@param [String, Page
, Page::DOM
, HTTP::Response
] resource @param [ElementLocator] locator @param [Symbol] event
# File lib/arachni/browser.rb, line 517 def distribute_event( resource, locator, event ) trigger_event( resource, locator, event ) end
@return [String]
Current URL, as provided by the browser.
# File lib/arachni/browser.rb, line 394 def dom_url @selenium.current_url end
@note Will skip non-visible elements as they can’t be manipulated.
Iterates over all elements which have events and passes their info to the given block.
@yield [ElementLocator,Array<Symbol>]
Element locator along with the element's applicable events along with their handlers and attributes.
# File lib/arachni/browser.rb, line 430 def each_element_with_events( whitelist = []) current_url = self.url javascript.each_dom_element_with_events whitelist do |element| tag_name = element['tag_name'] attributes = element['attributes'] events = element['events'] case tag_name when 'a' href = attributes['href'].to_s if !href.empty? if href.downcase.start_with?( 'javascript:' ) (events[:click] ||= []) << href else next if skip_path?( to_absolute( href, current_url ) ) end end when 'input' if attributes['type'].to_s.downcase == 'image' (events[:click] ||= []) << 'image' end when 'form' action = attributes['action'].to_s if !action.empty? if action.downcase.start_with?( 'javascript:' ) (events[:submit] ||= []) << action else next if skip_path?( to_absolute( action, current_url ) ) end end end next if events.empty? yield ElementLocator.new( tag_name: tag_name, attributes: attributes ), events end self end
Explores the browser’s DOM tree and captures page snapshots for each state change until there are no more available.
@param [Integer] depth
How deep to go into the DOM tree.
@return [Array<Page>]
Page snapshots for each state.
# File lib/arachni/browser.rb, line 406 def explore_and_flush( depth = nil ) pages = [ to_page ] current_depth = 0 loop do bcnt = pages.size pages |= pages.map { |p| load( p ).trigger_events.flush_pages }.flatten break if pages.size == bcnt || (depth && depth >= current_depth) current_depth += 1 end pages.compact end
Triggers ‘event` on `element`.
@param [Selenium::WebDriver::Element, ElementLocator] element @param [Symbol] event @param [Hash] options @option options [Hash<Symbol,String=>String>] :inputs
Values to use to fill-in inputs. Keys should be input names or ids. Defaults to using {OptionGroups::Input} if not specified.
@return [Page::DOM::Transition, false]
Transition if the operation was successful, `nil` otherwise.
# File lib/arachni/browser.rb, line 560 def fire_event( element, event, options = {} ) event = event.to_s.downcase.sub( /^on/, '' ).to_sym locator = nil options[:inputs] = options[:inputs].my_stringify if options[:inputs] if element.is_a? ElementLocator locator = element begin Selenium::WebDriver::Wait.new( timeout: ELEMENT_APPEARANCE_TIMEOUT ). until { element = element.locate( self ) } rescue Selenium::WebDriver::Error::WebDriverError => e print_debug "Element '#{element.inspect}' could not be " << "located for triggering '#{event}'." print_debug print_debug_exception e return end end if locator opening_tag = locator.to_s tag_name = locator.tag_name else opening_tag = element.opening_tag tag_name = element.tag_name locator = ElementLocator.from_html( opening_tag ) end print_debug_level_2 "[start]: #{event} (#{options}) #{locator}" tag_name = tag_name.to_sym notify_on_fire_event( element, event ) if Options.browser_cluster.wait_for_timers? pre_timeouts = javascript.timeouts end begin transition = Page::DOM::Transition.new( locator, event, options ) do force = true # It's better to use the helpers whenever possible instead of # firing events manually. if tag_name == :form fill_in_form_inputs( element, options[:inputs] ) if event == :fill force = false end if event == :submit force = false begin element.find_elements( :css, "input[type='submit'], button[type='submit']" ).first.click rescue => e print_debug "No submit button, will trigger 'submit' event." print_debug_exception e element.submit end end elsif event == :click force = false element.click elsif INPUT_EVENTS.include? event force = INPUT_EVENTS_TO_FORCE.include?( event ) # Send keys will append to the existing value, so we need to # clear it first. The receiving input may not support values # though, so watch out. element.clear if [:input, :textarea].include?( tag_name ) # Simulates real text input and will trigger associated events. # Except for INPUT_EVENTS_TO_FORCE of course. element.send_keys( (options[:value] || value_for( element )).to_s ) end if force print_debug_level_2 "[forcing event]: #{event} (#{options}) #{locator}" fire_event_js locator, event end print_debug_level_2 "[waiting for requests]: #{event} (#{options}) #{locator}" wait_for_pending_requests print_debug_level_2 "[done waiting for requests]: #{event} (#{options}) #{locator}" # Maybe we switched to a different page, wait until the custom # JS env has been put in place. javascript.wait_till_ready javascript.set_element_ids update_cookies end print_debug_level_2 "[done in #{transition.time}s]: #{event} (#{options}) #{locator}" if Options.browser_cluster.wait_for_timers? delay = (javascript.timeouts - pre_timeouts).compact.map { |t| t[1].to_i }.max if delay print_debug_level_2 "Found new timers with max #{delay}ms." delay = [Options.http.request_timeout, delay].min / 1000.0 print_debug_level_2 "Will wait for #{delay}s." sleep delay end end transition rescue Selenium::WebDriver::Error::WebDriverError => e print_debug "Error when triggering event for: #{dom_url}" print_debug "-- '#{event}' on: #{opening_tag} -- #{locator.css}" print_debug print_debug_exception e nil end end
This is essentially the same thing as Watir::Element#fire_event but 10 times faster.
Does not perform any sort of sanitization nor sanity checking, it will just try to trigger the event.
@param [Browser::ElementLocator] locator @param [Symbol,String] event @param [Numeric] wait
Amount of time to wait (in seconds) after triggering the event.
# File lib/arachni/browser.rb, line 698 def fire_event_js( locator, event, wait: 0.1 ) r = javascript.run <<-EOJS var element = document.querySelector( #{locator.css.inspect} ); // Could not be found. if( !element ) return false; // Invisible. if( element.offsetWidth <= 0 && element.offsetHeight <= 0 ) return false; var event = document.createEvent( "Events" ); event.initEvent( "#{event}", true, true ); event.view = window; event.altKey = false; event.ctrlKey = false; event.shiftKey = false; event.metaKey = false; event.keyCode = 0; event.charCode = 'a'; element.dispatchEvent( event ); return true; EOJS sleep( wait ) if r r end
@return [Array<Page>]
Returns {#page_snapshots_with_sinks} and flushes it.
# File lib/arachni/browser.rb, line 960 def flush_page_snapshots_with_sinks @page_snapshots_with_sinks.dup ensure @page_snapshots_with_sinks.clear end
@return [Array<Page>]
Flushes and returns the {#captured_pages captured} and {#page_snapshots snapshot} pages.
@see captured_pages
@see page_snapshots
@see start_capture
@see stop_capture
@see capture?
# File lib/arachni/browser.rb, line 975 def flush_pages captured_pages + page_snapshots ensure @captured_pages.clear @page_snapshots.clear end
@param [String] url
Loads the given URL in the browser.
@param [Hash] options @option [Bool] :take_snapshot (true)
Take a snapshot right after loading the page.
@option [Array<Cookie>] :cookies ([])
Extra cookies to pass to the webapp.
@return [Page::DOM::Transition]
Transition used to replay the resource visit.
# File lib/arachni/browser.rb, line 315 def goto( url, options = {} ) take_snapshot = options.include?(:take_snapshot) ? options[:take_snapshot] : true extra_cookies = options[:cookies] || {} update_transitions = options.include?(:update_transitions) ? options[:update_transitions] : true pre_add_request_transitions = @add_request_transitions if !update_transitions @add_request_transitions = false end @last_url = Arachni::URI( url ).to_s self.class.add_asset_domain @last_url ensure_open_window load_cookies url, extra_cookies transition = Page::DOM::Transition.new( :page, :load, url: url, cookies: extra_cookies ) do print_debug_level_2 "Loading #{url} ..." @selenium.navigate.to url print_debug_level_2 '...done.' wait_till_ready Options.browser_cluster.css_to_wait_for( url ).each do |css| print_info "Waiting for #{css.inspect} to appear for: #{url}" begin Selenium::WebDriver::Wait.new( timeout: Options.browser_cluster.job_timeout ).until { @selenium.find_element( :css, css ) } print_info "#{css.inspect} appeared for: #{url}" rescue Selenium::WebDriver::Error::TimeoutError print_bad "#{css.inspect} did not appear for: #{url}" end end javascript.set_element_ids end if @add_request_transitions @transitions << transition end @add_request_transitions = pre_add_request_transitions update_cookies # Capture the page at its initial state. capture_snapshot if take_snapshot transition end
# File lib/arachni/browser.rb, line 1164 def inspect s = "#<#{self.class} " s << "last-url=#{@last_url.inspect} " s << "transitions=#{@transitions.size}" s << '>' end
@param [String, HTTP::Response
, Page
, Page:::DOM] resource
Loads the given resource in the browser. If it is a string it will be treated like a URL.
@return [Browser]
`self`
# File lib/arachni/browser.rb, line 250 def load( resource, options = {} ) case resource when String @transitions = [] goto resource, options when HTTP::Response @transitions = [] goto preload( resource ), options when Page HTTP::Client.update_cookies resource.cookie_jar load resource.dom when Page::DOM @transitions = resource.transitions.dup update_skip_states resource.skip_states @add_request_transitions = false if @transitions.any? resource.restore self @add_request_transitions = true else fail Error::Load, "Can't load resource of type #{resource.class}." end self end
# File lib/arachni/browser.rb, line 1028 def load_delay @javascript.timeouts.compact.map { |t| t[1].to_i }.max end
@return [Array<Page>]
Page snapshots (stored after events have been fired and JS links clicked) with hashes as keys and pages as values.
# File lib/arachni/browser.rb, line 770 def page_snapshots @page_snapshots.values end
@note The preloaded resource will be removed once used.
@param [HTTP::Response, Page] resource
Preloads a resource to be instantly available by URL via {#load}.
# File lib/arachni/browser.rb, line 286 def preload( resource ) response = case resource when HTTP::Response resource when Page resource.response else fail Error::Load, "Can't preload resource of type #{resource.class}." end save_response( response ) if !response.url.include?( request_token ) @preloads[response.url] = response response.url end
# File lib/arachni/browser.rb, line 1048 def response u = dom_url if u == 'about:blank' print_debug 'Blank page.' return end if skip_path?( u ) print_debug "Response is out of scope: #{u}" return end r = get_response( u ) return r if r && r.code != 504 if r print_debug "Origin server timed-out when requesting: #{u}" else print_debug "Response never arrived for: #{u}" print_debug 'Available responses are:' @window_responses.each do |k, _| print_debug "-- #{k}" end print_debug 'Tried:' print_debug "-- #{u}" print_debug "-- #{normalize_url( u )}" print_debug "-- #{normalize_watir_url( u )}" end nil end
# File lib/arachni/browser.rb, line 1128 def shutdown print_debug 'Shutting down...' if @selenium @selenium.close print_debug_level_2 'Quiting Selenium...' # So freaking hacky but @selenium.quit freezes if we don't detach first. @selenium.instance_eval do bridge.quit @service.instance_eval do Process.detach @process.pid @process.stop end end @selenium.quit rescue Errno::ECONNREFUSED # @selenium.quit rescue Selenium::WebDriver::Error::WebDriverError print_debug_level_2 '...done.' end if @proxy print_debug_level_2 'Shutting down proxy...' @proxy.shutdown rescue Reactor::Error::NotRunning print_debug_level_2 '...done.' end @proxy = nil @watir = nil @selenium = nil print_debug '...shutdown complete.' end
Arachni::Utilities#skip_path?
# File lib/arachni/browser.rb, line 1044 def skip_path?( path ) enforce_scope? && super( path ) end
@return [String]
HTML code of the evaluated (DOM/JS/AJAX) page.
# File lib/arachni/browser.rb, line 1024 def source @selenium.page_source end
@return [String]
Prefixes each source line with a number.
# File lib/arachni/browser.rb, line 238 def source_with_line_numbers source.lines.map.with_index do |line, i| "#{i+1} - #{line}" end.join end
Starts capturing requests and parses them into elements of pages, accessible via {#captured_pages}.
@return [Browser]
`self`
@see stop_capture
@see capture?
@see captured_pages
@see flush_pages
# File lib/arachni/browser.rb, line 740 def start_capture @capture = true self end
@return [Page::DOM]
# File lib/arachni/browser.rb, line 782 def state d_url = dom_url return if !response Page::DOM.new( url: d_url, transitions: @transitions.dup, digest: @javascript.dom_digest, skip_states: skip_states.dup ) end
Stops the {HTTP::Request} capture.
@return [Browser]
`self`
@see start_capture
@see capture?
@see flush_pages
# File lib/arachni/browser.rb, line 753 def stop_capture @capture = false self end
@return [Page]
Converts the current browser window to a {Page page}.
# File lib/arachni/browser.rb, line 797 def to_page d_url = dom_url if !(r = response) return Page.from_data( dom: { url: d_url }, response: { code: 0, url: url } ) end # We need sink data for both the current taint and to determine cookie # usage, so grab all of the data-flow sinks once. data_flow_sinks = {} if @javascript.supported? data_flow_sinks = @javascript.taint_tracer.data_flow_sinks end page = r.to_page page.body = source page.dom.url = d_url page.dom.cookies = self.cookies page.dom.digest = @javascript.dom_digest page.dom.execution_flow_sinks = @javascript.execution_flow_sinks page.dom.data_flow_sinks = data_flow_sinks[@javascript.taint] || [] page.dom.transitions = @transitions.dup page.dom.skip_states = skip_states.dup if Options.audit.ui_inputs? page.ui_inputs = Element::UIInput.from_browser( self, page ) end if Options.audit.ui_forms? page.ui_forms = Element::UIForm.from_browser( self, page ) end # Go through auditable DOM forms and cookies and remove the DOM from # them if no events are associated with it. # # This can save **A LOT** of time during the audit. if @javascript.supported? if Options.audit.form_doms? page.forms.each do |form| next if !form.node || !form.dom action = form.node['action'].to_s form.dom.browser = self next if action.downcase.start_with?( 'javascript:' ) || form.dom.locate.events.any? form.skip_dom = true end page.update_metadata page.clear_cache end if Options.audit.cookie_doms? page.cookies.each do |cookie| if (sinks = data_flow_sinks[cookie.name] || data_flow_sinks[cookie.value]) # Don't be satisfied with just a taint match, make sure # the full value is identical. # # For example, if a cookie has '1' as a name or value # that's too generic and can match irrelevant data. # # The current approach isn't perfect of course, but it's # the best we can do. next if sinks.find do |sink| sink.tainted_value == cookie.name || sink.tainted_value == cookie.value end end cookie.skip_dom = true end page.update_metadata end end page end
@note Captures page {#page_snapshots}.
Triggers ‘event` on the element described by `tag` on `page`.
@param [String, Page
, Page::DOM
, HTTP::Response
] resource
Page containing the element's `tag`.
@param [ElementLocator] element @param [Symbol] event
Event to trigger.
# File lib/arachni/browser.rb, line 530 def trigger_event( resource, element, event, restore = true ) transition = fire_event( element, event ) if !transition print_info "Could not trigger '#{event}' on: #{element}" if restore print_info 'Restoring page.' restore( resource ) end return end capture_snapshot( transition ) restore( resource ) if restore end
Triggers all events on all elements (once) and captures {#page_snapshots page snapshots}.
@return [Browser]
`self`
# File lib/arachni/browser.rb, line 481 def trigger_events dom = self.state return self if !dom url = normalize_url( dom.url ) count = 1 each_element_with_events do |locator, events| state = "#{url}:#{locator.tag_name}:#{locator.attributes}:#{events.keys.sort}" next if skip_state?( state ) skip_state state events.each do |name, _| if Options.scope.dom_event_limit_reached?( count ) print_debug "DOM event limit reached for: #{dom.url}" next end distribute_event( dom, locator, name.to_sym ) count += 1 end end self end
@return [String]
Current URL, noralized via #{Arachni::URI.}
# File lib/arachni/browser.rb, line 388 def url normalize_url dom_url end
# File lib/arachni/browser.rb, line 1032 def wait_for_timers delay = load_delay return if !delay effective_delay = [Options.http.request_timeout, delay].min / 1000.0 print_debug_level_2 "Waiting for max timer #{effective_delay}s (original was #{delay}ms)..." sleep effective_delay print_debug_level_2 '...done.' end
# File lib/arachni/browser.rb, line 376 def wait_till_ready @javascript.wait_till_ready if Options.browser_cluster.wait_for_timers? wait_for_timers end wait_for_pending_requests end
Private Instance Methods
# File lib/arachni/browser.rb, line 1438 def auth_token @auth_token ||= generate_token.to_s end
# File lib/arachni/browser.rb, line 1641 def capture( request ) return if !@last_url || !capture? elements = { forms: [], jsons: [], xmls: [] } found_element = false if (json = JSON.from_request( @last_url, request )) print_debug_level_2 "Extracted JSON input:\n#{json.source}" elements[:jsons] << json found_element = true end if !found_element && (xml = XML.from_request( @last_url, request )) print_debug_level_2 "Extracted XML input:\n#{xml.source}" elements[:xmls] << xml found_element = true end case request.method when :get inputs = request.parsed_url.query_parameters return if inputs.empty? elements[:forms] << Form.new( url: @last_url, action: request.url, method: request.method, inputs: inputs ) when :post inputs = request.parsed_url.query_parameters if inputs.any? elements[:forms] << Form.new( url: @last_url, action: request.url, method: :get, inputs: inputs ) end if !found_element && (inputs = request.body_parameters).any? elements[:forms] << Form.new( url: @last_url, action: request.url, method: request.method, inputs: inputs ) end else return end if (form = elements[:forms].last) print_debug_level_2 "Extracted form input:\n" << "#{form.method.to_s.upcase} #{form.action} #{form.inputs}" end el = elements.values.flatten if el.empty? print_debug_level_2 'No elements found.' return end # Don't bother if the system in general has already seen the vectors. if el.empty? || !el.find { |e| !ElementFilter.include?( e ) } print_debug_level_2 'Ignoring, already seen.' return end begin if !el.find { |e| !skip_state?( e ) } print_debug_level_2 'Ignoring, already seen.' return end el.each { |e| skip_state e.id } # This could be an orphaned HTTP request, without a job, if running in # BrowserCluster::Worker. rescue NoMethodError end page = Page.from_data( elements.merge( url: request.url ) ) page.response.request = request page.dom.push_transition Page::DOM::Transition.new( request.url, :request ) @captured_pages << page if store_pages? notify_on_new_page( page ) rescue => e print_debug "Could not capture: #{request.url}" print_debug request.body.to_s print_debug_exception e end
# File lib/arachni/browser.rb, line 1290 def capture_snapshot_with_sink( page ) return if page.dom.data_flow_sinks.empty? && page.dom.execution_flow_sinks.empty? notify_on_new_page_with_sink( page ) return if !store_pages? @page_snapshots_with_sinks << page end
# File lib/arachni/browser.rb, line 1756 def copy_response_data( source, destination ) [:code, :url, :body, :headers, :ip_address, :return_code, :return_message, :headers_string, :total_time, :time].each do |m| destination.send "#{m}=", source.send( m ) end javascript.inject destination nil end
# File lib/arachni/browser.rb, line 1787 def enforce_scope? !@ignore_scope end
Makes sure we have at least 2 windows open so that we can switch to the last available one in case there’s some JS in the page that closes one.
# File lib/arachni/browser.rb, line 1405 def ensure_open_window window_handles = @selenium.window_handles if window_handles.size == 0 @javascript.run( 'window.open()' ) @selenium.switch_to.window( @selenium.window_handles.last ) else if window_handles.size > 1 # Keep the first window_handles[1..-1].each do |handle| @selenium.switch_to.window( handle ) @selenium.close end @selenium.switch_to.window( @selenium.window_handles.first ) end @selenium.navigate.to 'about:blank' end @selenium.manage.window.resize_to( @width, @height ) end
# File lib/arachni/browser.rb, line 1173 def fill_in_form_inputs( form, inputs = nil ) form.find_elements( :css, 'input, textarea' ).each do |input| name_or_id = name_or_id_for( input ) value = inputs ? inputs[name_or_id] : value_for_name( name_or_id ) begin input.clear input.send_keys( value.to_s.recode ) # Disabled inputs and such... rescue Selenium::WebDriver::Error::WebDriverError => e print_debug_level_2 "Could not fill in form input '#{name_or_id}'" << " because: #{e} [#{e.class}" end end form.find_elements( :tag_name, 'select' ).each do |select| name_or_id = name_or_id_for( select ) value = inputs ? inputs[name_or_id] : value_for_name( name_or_id ) options = select.find_elements( tag_name: 'option' ) options.each do |option| begin if option[:value] == value || option.text == value option.click return end # Disabled inputs and such... rescue Selenium::WebDriver::Error::WebDriverError => e print_debug_level_2 "Could not fill in form select '#{name_or_id}'" << " because: #{e} [#{e.class}" end end end end
# File lib/arachni/browser.rb, line 1428 def flush_request_transitions @request_transitions.dup ensure @request_transitions.clear end
# File lib/arachni/browser.rb, line 1744 def from_preloads( request, response ) synchronize do return if !(preloaded = preloads.delete( request.url )) copy_response_data( preloaded, response ) response.request = request save_response( response ) if !preloaded.url.include?( request_token ) preloaded end end
# File lib/arachni/browser.rb, line 1775 def get_response( url ) synchronize do # Order is important, #normalize_url by can get confused and remove # everything after ';' by treating it as a path parameter. # Rightly so...but we need to bypass it when auditing LinkTemplate # elements. @window_responses[url] || @window_responses[normalize_watir_url( url )] || @window_responses[normalize_url( url )] end end
# File lib/arachni/browser.rb, line 1570 def ignore_request?( request ) print_debug_level_2 "Checking: #{request.url}" if !enforce_scope? print_debug_level_2 'Allow: Scope enforcement disabled.' return end if request_for_asset?( request ) print_debug_level_2 'Allow: Asset detected.' return false end if !request.scope.follow_protocol? print_debug_level_2 'Disallow: Cannot follow protocol.' return true end if !request.scope.in_domain? if self.class.asset_domains.include?( request.parsed_url.domain ) print_debug_level_2 'Allow: Out of scope but in CDN list.' return false end print_debug_level_2 'Disallow: Domain out of scope and not in CDN list.' return true end if request.scope.too_deep? print_debug_level_2 'Disallow: Too deep.' return true end if !request.scope.include? print_debug_level_2 'Disallow: Does not match inclusion rules.' return true end if request.scope.exclude? print_debug_level_2 'Disallow: Matches exclusion rules.' return true end if request.scope.redundant? print_debug_level_2 'Disallow: Matches redundant rules.' return true end false end
# File lib/arachni/browser.rb, line 1221 def name_or_id_for( element ) name = element[:name].to_s return name if !name.empty? id = element[:id].to_s return id if !id.empty? nil end
# File lib/arachni/browser.rb, line 1791 def normalize_watir_url( url ) normalize_url( url.gsub( ';', '%3B' ) ).gsub( '%3B', '%253B' ) end
# File lib/arachni/browser.rb, line 1621 def request_for_asset?( request ) ASSET_EXTENSIONS.include?( request.parsed_url.resource_extension.to_s.downcase ) end
# File lib/arachni/browser.rb, line 1442 def request_handler( request, response ) request.performer = self # return if request.headers['X-Arachni-Browser-Auth'] != auth_token # request.headers.delete 'X-Arachni-Browser-Auth' print_debug_level_2 "Request: #{request.url}" # We can't have 304 page responses in the framework, we need full request # and response data, the browser cache doesn't help us here. # # Still, it's a nice feature to have when requesting assets or anything # else. if !@last_url || request.url == @last_url request.headers.delete 'If-None-Match' request.headers.delete 'If-Modified-Since' end if @javascript.serve( request, response ) print_debug_level_2 'Serving local JS.' return end if !request.url.include?( request_token ) if ignore_request?( request ) print_debug_level_2 'Out of scope, ignoring.' return end if @add_request_transitions synchronize do @request_transitions << Page::DOM::Transition.new( request.url, :request ) end end end # Signal the proxy to not actually perform the request if we have a # preloaded response for it. if from_preloads( request, response ) print_debug_level_2 'Resource has been preloaded.' # There may be taints or custom JS code that need to be updated. javascript.inject response return end print_debug_level_2 'Request can proceed to origin.' # Capture the request as elements of pages -- let's us grab AJAX and # other browser requests and convert them into elements we can analyze # and audit. if request.scope.in? capture( request ) end request.headers['user-agent'] = Options.http.user_agent # Signal the proxy to continue with its request to the origin server. true end
# File lib/arachni/browser.rb, line 1434 def request_token @request_token ||= generate_token end
# File lib/arachni/browser.rb, line 1505 def response_handler( request, response ) return if request.url.include?( request_token ) # Prevent PhantomJS from caching the root page, we need to have an # associated response. # # Also don't cache when we don't have a @last_url because this could # be driven directly from Selenium/Watir via a plugin and caching it # can ruin the scan. if !@last_url || @last_url == response.url response.headers.delete 'Cache-control' response.headers.delete 'Etag' response.headers.delete 'Date' response.headers.delete 'Last-Modified' end # Allow our own scripts to run. response.headers.delete 'Content-Security-Policy' print_debug_level_2 "Got response: #{response.url}" @request_transitions.each do |transition| next if !transition.running? || transition.element != request.url transition.complete end # If we abort the request because it's out of scope we need to emulate # an OK response because we **do** want to be able to grab a page with # the out of scope URL, even if it's empty. # For example, unvalidated_redirect checks need this. if response.code == 0 if enforce_scope? && response.scope.out? response.code = 200 end else if javascript.inject( response ) print_debug_level_2 'Injected custom JS.' end end # Don't store assets, the browsers will cache them accordingly. if request_for_asset?( request ) || !response.text? print_debug_level_2 'Asset detected, will not store.' return end # No-matter the scope, don't store resources for external domains. if !response.scope.in_domain? print_debug_level_2 'Outside of domain scope, will not store.' return end if enforce_scope? && response.scope.out? print_debug_level_2 'Outside of general scope, will not store.' return end whitelist_asset_domains( response ) save_response response print_debug_level_2 'Stored.' nil end
Loads ‘page` without taking a snapshot, used for restoring the root page after manipulation.
# File lib/arachni/browser.rb, line 1286 def restore( page ) load page, take_snapshot: false end
# File lib/arachni/browser.rb, line 1766 def save_response( response ) synchronize do notify_on_response response return response if !response.text? @window_responses[response.url] = response end end
# File lib/arachni/browser.rb, line 1213 def skip_state( state ) self.skip_states << state end
# File lib/arachni/browser.rb, line 1209 def skip_state?( state ) self.skip_states.include? state end
# File lib/arachni/browser.rb, line 1254 def start_proxy print_debug 'Booting up...' print_debug_level_2 'Starting proxy...' @proxy = HTTP::ProxyServer.new( concurrency: @options[:concurrency], address: '127.0.0.1', request_handler: proc do |request, response| exception_jail { request_handler( request, response ) } end, response_handler: proc do |request, response| exception_jail { response_handler( request, response ) } end ) @proxy.start_async print_debug_level_2 "... started proxy at: #{@proxy.url}" end
# File lib/arachni/browser.rb, line 1272 def start_webdriver print_debug_level_2 'Starting WebDriver...' @watir = ::Watir::Browser.new( selenium ) print_debug_level_2 "... started WebDriver." print_debug '...boot-up completed.' end
# File lib/arachni/browser.rb, line 1280 def store_pages? !!@options[:store_pages] end
# File lib/arachni/browser.rb, line 1217 def update_skip_states( states ) self.skip_states.merge states end
@param [Watir::HTMLElement] element @return [String]
Value to use to fill-in the input.
@see OptionGroups::Input.value_for_name
# File lib/arachni/browser.rb, line 1246 def value_for( element ) Options.input.value_for_name( name_or_id_for( element ) ) end
# File lib/arachni/browser.rb, line 1250 def value_for_name( name ) Options.input.value_for_name( name ) end
# File lib/arachni/browser.rb, line 1300 def wait_for_pending_requests sleep 0.2 t = Time.now last_connections = [] while @proxy.has_pending_requests? connections = @proxy.active_connections if last_connections != connections print_debug_level_2 "Waiting for #{@proxy.pending_requests} requests to complete:" connections.each do |connection| if connection.request print_debug_level_2 " * #{connection.request.url}" else print_debug_level_2 ' * Still reading request data.' end end end last_connections = connections sleep 0.1 # If the browser sends incomplete data the connection will remain # open indefinitely. next if Time.now - t < Options.browser_cluster.job_timeout connections.each(&:close) break end end
# File lib/arachni/browser.rb, line 1625 def whitelist_asset_domains( response ) synchronize do @whitelist_asset_domains ||= Support::LookUp::HashSet.new return if @whitelist_asset_domains.include? response.body @whitelist_asset_domains << response.body ASSET_EXTRACTORS.each do |regexp| response.body.scan( regexp ).flatten.compact.each do |url| next if !(domain = self.class.add_asset_domain( url )) print_debug_level_2 "#{domain} from #{url} based on #{regexp.source}" end end end end
# File lib/arachni/browser.rb, line 1231 def with_timeout( timeout, &block ) Timeout.timeout( timeout ) do block.call end #rescue #ap 'TIMEOUT' #ap caller #raise end