class Arachni::Page
It holds page data like elements, cookies, headers, etc…
@author Tasos “Zapotek” Laskos <tasos.laskos@arachni-scanner.com>
Constants
- ELEMENTS
- METADATA
Attributes
@return [Hash]
@private
@return [DOM]
DOM snapshot.
@return [Set<Integer>]
Audit whitelist based on {Element::Capabilities::Auditable#coverage_hash}.
@see update_element_audit_whitelist
@see audit_element?
@see Check::Auditor#skip?
@return [Hash]
Holds page data that will need to persist between {#clear_cache} calls and other utility data.
@return [HTTP::Response]
HTTP response.
Public Class Methods
# File lib/arachni/page.rb, line 605 def self._load( data ) new( Marshal.load( data ) ) end
@option options [String] :url
URL of the page.
@option options [String] :body
Body of the page.
@option options [Array<Link>] :links
{Link} elements.
@option options [Array<Form>] :forms
{Form} elements.
@option options [Array<Cookie>] :cookies
{Cookie} elements.
@option options [Array<Header>] :headers
{Header} elements.
@option options [Array<Cookie>] :cookie_jar
{Cookie} elements with which to update the HTTP cookiejar before auditing.
@option options [Array<String>] :paths
Paths contained in the page.
@option options [Array<String>] :request
{Request#initialize} options.
# File lib/arachni/page.rb, line 82 def self.from_data( data ) data = data.dup data[:response] ||= {} data[:response][:code] ||= 200 data[:response][:url] ||= data.delete( :url ) data[:response][:body] ||= data.delete( :body ) || '' data[:response][:request] ||= {} data[:response][:request][:url] ||= data[:response][:url] data[:cookie_jar] ||= [] data[:response][:request] = Arachni::HTTP::Request.new( data[:response][:request] ) data[:response] = Arachni::HTTP::Response.new( data[:response] ) new data end
@param [HTTP::Response] response
HTTP response to parse.
@return [Page]
# File lib/arachni/page.rb, line 59 def self.from_response( response ) Parser.new( response ).page end
@param [Hash] data
{#to_rpc_data}
@return [Page]
# File lib/arachni/page.rb, line 573 def self.from_rpc_data( data ) dom = data.delete('dom') normalized_data = {} data.each do |name, value| value = case name when 'response' HTTP::Response.from_rpc_data( value ) when *ELEMENTS.map(&:to_s) value.map do |e| Element.type_to_class( name[0...-1].to_sym ).from_rpc_data( e ) end.to_a else value end normalized_data[name.to_sym] = value end instance = new( normalized_data ) instance.instance_variable_set( '@dom', DOM.from_rpc_data( dom.merge( page: instance ) ) ) instance end
@param [String] url
URL to fetch.
@param [Hash] opts @option opts [Integer] :precision (2)
How many times to request the page and examine changes between requests. Used tp identify nonce tokens etc.
@option opts [Hash] :http
HTTP {HTTP::Client#get request} options.
@param [Block] block
Block to which to pass the page object. If given, the request will be performed asynchronously. If no block is given, the page will be fetched synchronously and be returned by this method.
@return [Page]
# File lib/arachni/page.rb, line 37 def self.from_url( url, opts = {}, &block ) responses = [] opts[:precision] ||= 2 opts[:precision].times do HTTP::Client.get( url, opts[:http] || {} ) do |res| responses << res next if responses.size != opts[:precision] block.call( from_response( responses ) ) if block_given? end end if !block_given? HTTP::Client.run from_response( responses ) end end
Needs either a ‘:parser` or a `:response` or user provided data.
@param [Hash] options
Hash from which to set instance attributes.
@option options [Array<HTTP::Response>, HTTP::Response
] :response
HTTP response of the page -- or array of responses for the page for content refinement.
@option options [Parser] :parser
An instantiated {Parser}.
# File lib/arachni/page.rb, line 143 def initialize( options ) fail ArgumentError, 'Options cannot be empty.' if options.empty? options = options.dup @cache = {} @do_not_audit_elements = options.delete(:do_not_audit_elements) @cache[:parser] = options.delete(:parser) @response = @cache[:parser].response if @cache[:parser] # We need to know whether or not the page has been dynamically updated # with elements, in order to optimize #dup and #hash operations. @has_custom_elements = Set.new @metadata ||= {} options.each do |k, v| send( "#{k}=", try_dup( v ) ) end @dom = DOM.new( (options[:dom] || {}).merge( page: self ) ) fail ArgumentError, 'No URL given!' if !url Platform::Manager.fingerprint( self ) @element_audit_whitelist ||= [] @element_audit_whitelist = Set.new( @element_audit_whitelist ) end
Public Instance Methods
# File lib/arachni/page.rb, line 467 def ==( other ) hash == other.hash end
# File lib/arachni/page.rb, line 601 def _dump( _ ) Marshal.dump( to_initialization_options( false ) ) end
@param [Element::Capabilities::Auditable, Integer] element
Element or {Element::Capabilities::Auditable#coverage_hash}.
@return [Bool]
`true` if the element should be audited, `false` otherwise.
@see element_audit_whitelist
@see Check::Auditor#skip?
# File lib/arachni/page.rb, line 229 def audit_element?( element ) return if @do_not_audit_elements return true if @element_audit_whitelist.empty? @element_audit_whitelist.include?( element.is_a?( Integer ) ? element : element.coverage_hash ) end
@return [String]
HTTP response body.
# File lib/arachni/page.rb, line 269 def body return '' if !@body && !@response @body ||= response.body end
@param [String] string
Page body.
# File lib/arachni/page.rb, line 276 def body=( string ) @has_javascript = nil clear_cache @body = string.to_s.freeze end
@note Will preserve caches for elements which have been externally modified.
@return [Page]
`self` with caches cleared.
# File lib/arachni/page.rb, line 352 def clear_cache ELEMENTS.each do |type| next if @has_custom_elements.include? type # Remove the association to this page before clearing the elements # from cache to make it easier on the GC. (@cache[type] || []).each { |e| e.page = nil } end @cache.delete_if { |k, _| !@has_custom_elements.include? k } self end
@return [String]
URL of the page.
# File lib/arachni/page.rb, line 256 def code return 0 if !@code && !response @code ||= response.code end
It forces {#audit_element?} to always returns false.
# File lib/arachni/page.rb, line 238 def do_not_audit_elements @do_not_audit_elements = true end
@return [Arachni::Parser::Document]
Parsed {#body HTML} document.
# File lib/arachni/page.rb, line 342 def document @cache[:document] ||= (parser.nil? ? Arachni::Parser.parse( body ) : parser.document) end
# File lib/arachni/page.rb, line 475 def dup self.class.new to_initialization_options end
@return [Array<Element::Base>]
All page elements.
# File lib/arachni/page.rb, line 320 def elements ELEMENTS.map { |type| send( type ) }.flatten end
@return [Array<Element::Base>]
All page elements that are within the scope of the scan.
# File lib/arachni/page.rb, line 326 def elements_within_scope ELEMENTS.map do |type| next if !Options.audit.element? type send( type ).select { |e| e.scope.in? } end.flatten.compact end
# File lib/arachni/page.rb, line 471 def eql?( other ) self == other end
@param [String, Symbol,Array<String, Symbol>] tags
Element tag names.
@return [Boolean]
`true` if the page contains any of the given elements, `false` otherwise.
# File lib/arachni/page.rb, line 412 def has_elements?( *tags ) return if !text? tags.flatten.each do |tag| tag = tag.to_s next if !body.has_html_tag?( tag ) return false if !document return true if document.nodes_by_name( tag ).any? end false end
@return [Boolean]
`true` if the page contains client-side code, `false` otherwise.
# File lib/arachni/page.rb, line 384 def has_script? return @has_javascript if !@has_javascript.nil? if !response.headers.content_type.to_s.start_with?( 'text/html' ) || !text? return @has_javascript = false end dbody = body.downcase # First check, quick and simple. if dbody.include?( '<script' ) || dbody.include?( 'javascript:' ) return @has_javascript = true end # Check for event attributes, if there are any then there's JS to be # executed. Browser::Javascript.events.flatten.each do |event| return @has_javascript = true if dbody.include?( "#{event}=" ) end @has_javascript = false end
# File lib/arachni/page.rb, line 463 def hash digest.hash end
# File lib/arachni/page.rb, line 495 def import_metadata( other, metas = METADATA ) [metas].flatten.each do |meta| other.metadata.each do |element_type, data| @metadata[element_type] ||= {} @metadata[element_type][meta.to_s] ||= {} @metadata[element_type][meta.to_s].merge!( data[meta.to_s] ) end end reload_metadata self end
@return [String]
The request method that returned the page
# File lib/arachni/page.rb, line 335 def method( *args ) return super( *args ) if args.any? response.request.method end
@return [Arachni::URI]
# File lib/arachni/page.rb, line 186 def parsed_url Arachni::URI( url ) end
@return [Parser]
# File lib/arachni/page.rb, line 191 def parser return if !@response return @cache[:parser] if @cache[:parser] @cache[:parser] = Parser.new( @response ) # The page may have a browser-assigned body, set it as the one to parse. @cache[:parser].body = body @cache[:parser] end
# File lib/arachni/page.rb, line 202 def parser=( p ) @cache[:parser] = p end
@return [Array<String>]
Paths contained in this page.
@see Parser#paths
# File lib/arachni/page.rb, line 308 def paths @cache[:paths] ||= (parser ? parser.paths : []) end
@return [Object]
Object which performed the {#request} which lead to this page.
# File lib/arachni/page.rb, line 181 def performer request.performer end
# File lib/arachni/page.rb, line 459 def persistent_hash digest.persistent_hash end
@return [Platform]
Applicable platforms for the page.
# File lib/arachni/page.rb, line 314 def platforms Platform::Manager[url] end
# File lib/arachni/page.rb, line 364 def prepare_for_report # We want a hard clear, that's why we don't call #clear_cache. @cache.clear # If we're dealing with binary data remove it before storing. if !text? response.body = nil self.body = nil end @cookie_jar.clear if @cookie_jar @dom.digest = nil @dom.skip_states = nil self end
@return [Hash]
{#url URL} query parameters.
# File lib/arachni/page.rb, line 263 def query_vars @cache[:query_vars] ||= uri_parse_query( url ) end
# File lib/arachni/page.rb, line 487 def reload_metadata ELEMENTS.each do |type| next if !@cache[type] @cache[type].each { |e| restore_from_metadata e } end end
@return [HTTP::Request]
HTTP request.
# File lib/arachni/page.rb, line 244 def request response.request end
@return [Scope]
# File lib/arachni/page.rb, line 175 def scope @scope = Scope.new( self ) end
@return [Boolean]
`true` if the body of the page is text-base, `false` otherwise.
# File lib/arachni/page.rb, line 429 def text? return false if !response response.text? end
@return [String]
Title of the page.
# File lib/arachni/page.rb, line 436 def title document.nodes_by_name( 'title' ).first.text rescue nil end
@return [Hash]
Converts the page data to a hash.
# File lib/arachni/page.rb, line 442 def to_h skip = [:@document, :@do_not_audit_elements, :@has_custom_elements, :@scope] instance_variables.inject({}) do |h, iv| next h if skip.include? iv h[iv.to_s.gsub( '@', '').to_sym] = try_dup( instance_variable_get( iv ) ) h end.merge(@cache).tap { |h| h.delete :parser } end
# File lib/arachni/page.rb, line 509 def to_initialization_options( deep = true ) h = {} h[:body] = @body if @body [:cookie_jar, :element_audit_whitelist, :metadata].each do |m| h[m] = instance_variable_get( "@#{m}".to_sym ) if deep h[m] = try_dup( h[m] ) end h.delete( m ) if !h[m] end ELEMENTS.each do |type| next if !@has_custom_elements.include?( type ) h[type] = @cache[type] if !h[type] || h[type].empty? h.delete( type ) next end h[type] = h[type].map { |e| c = e.dup; c.page = nil; c } end h[:response] = response h[:do_not_audit_elements] = @do_not_audit_elements h[:dom] = dom.to_h.keys.inject({}) do |dh, k| dh[k] = dom.send( k ) if deep dh[k] = try_dup( dh[k] ) end dh end h end
@return [Hash]
Data representing this instance that are suitable the RPC transmission.
# File lib/arachni/page.rb, line 553 def to_rpc_data data = to_initialization_options( false ).my_stringify_keys(false) data['dom'] = dom.to_rpc_data data['element_audit_whitelist'] = element_audit_whitelist.to_a data['response'] = data['response'].to_rpc_data (ELEMENTS - [:headers]).map(&:to_s).each do |e| next if !data[e] data[e] = send(e).map(&:to_rpc_data) end data.delete 'cookie_jar' data end
# File lib/arachni/page.rb, line 454 def to_s "#<#{self.class}:#{object_id} @url=#{@url.inspect} @dom=#{@dom}>" end
@param [Array<Element::Capabilities::Auditable, Integer>] list
Audit whitelist based on {Element::Capabilities::Auditable elements} or {Element::Capabilities::Auditable#coverage_hash}s.
@return [Set] {#element_audit_whitelist}
@see element_audit_whitelist
@see Check::Auditor#skip?
# File lib/arachni/page.rb, line 214 def update_element_audit_whitelist( list ) [list].flatten.each do |e| @element_audit_whitelist << (e.is_a?( Integer ) ? e : e.coverage_hash ) end end
# File lib/arachni/page.rb, line 479 def update_metadata ELEMENTS.each do |type| next if !@cache[type] @cache[type].each { |e| store_to_metadata e } end end
@return [String]
URL of the page.
# File lib/arachni/page.rb, line 250 def url @url ||= @response.url end
Private Instance Methods
# File lib/arachni/page.rb, line 629 def assign_page_to_elements( list ) list.map do |e| e.page = self store_to_metadata e restore_from_metadata e e end.freeze end
# File lib/arachni/page.rb, line 611 def digest element_hashes = [] ELEMENTS.each do |type| next if !@has_custom_elements.include?( type ) || !(list = @cache[type]) element_hashes |= list.map(&:hash) end "#{dom.playable_transitions.hash}:#{body.hash}#{element_hashes.sort}" end
# File lib/arachni/page.rb, line 662 def ensure_metadata( element, meta ) @metadata[element.type.to_s] ||= {} @metadata[element.type.to_s][meta.to_s] ||= {} end
# File lib/arachni/page.rb, line 625 def paths=( paths ) @cache[:paths] = paths end
# File lib/arachni/page.rb, line 650 def restore_from_metadata( element ) METADATA.each do |meta| next if !element.respond_to?( "#{meta}=" ) ensure_metadata( element, meta ) element.send( "#{meta}=", @metadata[element.type.to_s][meta.to_s][element.coverage_hash] ) end end
# File lib/arachni/page.rb, line 640 def store_to_metadata( element ) METADATA.each do |meta| next if !element.respond_to?(meta) ensure_metadata( element, meta ) @metadata[element.type.to_s][meta.to_s][element.coverage_hash] ||= element.send(meta) end end
# File lib/arachni/page.rb, line 667 def try_dup( v ) v.dup rescue v end