class Arachni::Trainer

Trainer class

Analyzes key HTTP responses looking for new auditable elements.

@author Tasos Laskos <tasos.laskos@arachni-scanner.com>

Constants

MAX_TRAININGS_PER_URL

Public Class Methods

new( framework ) click to toggle source

@param [Arachni::Framework] framework

# File lib/arachni/trainer.rb, line 31
def initialize( framework )
    super()

    @framework  = framework
    @updated    = false

    @seen_pages = Support::LookUp::HashSet.new

    @trainings_per_url = Hash.new( 0 )

    # get us setup using the page that is being audited as a seed page
    framework.on_page_audit { |page| self.page = page }

    framework.http.on_complete do |response|
        next if response.request.buffered? || !response.request.train?

        if response.redirect?
            reference_url = @page ? @page.url : @framework.options.url
            redirect_url  = to_absolute( response.headers.location, reference_url )

            framework.http.get( redirect_url ) { |res| push res }
            next
        end

        next if response.request.buffered?

        push response
    end
end

Public Instance Methods

page=( page ) click to toggle source

Sets the current working page and {ElementFilter.update_from_page updates} the {ElementFilter}.

@param [Arachni::Page] page

# File lib/arachni/trainer.rb, line 88
def page=( page )
    ElementFilter.update_from_page page
    @page = page
end
push( response ) click to toggle source

Passes the response on for analysis.

If the response contains new elements it creates a new page with those elements and pushes it a buffer.

These new pages can then be retrieved by flushing the buffer (flush).

@param [Arachni::HTTP::Response] response

# File lib/arachni/trainer.rb, line 69
def push( response )
    if !@page
        print_debug 'No seed page assigned yet.'
        return
    end

    return if !analyze_response?( response )

    analyze response
    true
rescue => e
    print_exception e
    nil
end

Private Instance Methods

analyze( resource ) click to toggle source

Analyzes a response looking for new links, forms and cookies.

@param [HTTP::Response, Page] resource

# File lib/arachni/trainer.rb, line 98
def analyze( resource )
    incoming_page = resource.is_a?( Page ) ? resource: resource.to_page

    print_debug "Started for response with request ID: ##{resource.request.id}"

    has_new_elements = has_new?( incoming_page, :cookies )

    # if the response body is the same as the page body and
    # no new cookies have appeared there's no reason to analyze the page
    if @page && incoming_page.body == @page.body && !has_new_elements &&
        @page.url == incoming_page.url

        incoming_page.clear_cache
        print_debug 'Page hasn\'t changed.'
        return
    end

    [ :forms, :links ].each { |type| has_new_elements ||= has_new?( incoming_page, type ) }

    incoming_page.paths.each do |path|
        @framework.push_to_url_queue( path )
    end

    if has_new_elements
        @trainings_per_url[incoming_page.url] += 1

        notify_on_new_page incoming_page
        @framework.push_to_page_queue( incoming_page )
    end

    incoming_page.clear_cache

    print_debug 'Training complete.'
end
analyze_response?( response ) click to toggle source
# File lib/arachni/trainer.rb, line 163
def analyze_response?( response )
    if !@framework.accepts_more_pages?
        print_info 'No more pages accepted, skipping analysis.'
        return
    end

    return false if !within_scope?( response )

    param_names = response.parsed_url.query_parameters.keys
    cookies     = Cookie.from_headers( response.url, response.headers ).map(&:name)

    k = "#{param_names.hash}:#{cookies.hash}:#{response.body}"

    # Naive optimization but it works a lot of the time. :)
    if @seen_pages.include? k
        print_debug "Already seen response for request ID: ##{response.request.id}"
        return
    end
    @seen_pages << k

    return false if !response.text?

    true
end
has_new?( incoming_page, element_type ) click to toggle source
# File lib/arachni/trainer.rb, line 133
def has_new?( incoming_page, element_type )
    count = ElementFilter.send(
        "update_#{element_type}".to_sym,
        incoming_page.send( element_type )
    )

    return if count == 0

    print_info "Found #{count} new #{element_type}."
    true
end
within_scope?( response ) click to toggle source
# File lib/arachni/trainer.rb, line 145
def within_scope?( response )
    skip_message = nil
    if @trainings_per_url[response.url] >= MAX_TRAININGS_PER_URL
        skip_message = "Reached maximum trainings (#{MAX_TRAININGS_PER_URL})"
    elsif response.scope.redundant?
        skip_message = 'Matched redundancy filters'
    elsif response.scope.out?
        skip_message = 'Matched exclusion criteria'
    end

    if skip_message
        print_verbose "#{skip_message}, skipping: #{response.url}"
        return false
    end

    true
end