class Arachni::BrowserCluster

@author Tasos “Zapotek” Laskos <tasos.laskos@arachni-scanner.com>

Attributes

pending_job_counter[R]

@return [Integer]

Number of pending jobs.
pool_size[R]

@return [Integer]

Amount of browser instances in the pool.
workers[R]

@return [Array<Worker>]

Worker pool.

Public Class Methods

add_to_total_job_time( time ) click to toggle source
# File lib/arachni/browser_cluster.rb, line 438
def self.add_to_total_job_time( time )
    @total_job_time ||= 0.0
    @total_job_time += time.to_f
end
completed_job_count() click to toggle source
# File lib/arachni/browser_cluster.rb, line 430
def self.completed_job_count
    @completed_job_count.to_i
end
increment_completed_job_count() click to toggle source
# File lib/arachni/browser_cluster.rb, line 420
def self.increment_completed_job_count
    @completed_job_count ||= 0
    @completed_job_count += 1
end
increment_queued_job_count() click to toggle source
# File lib/arachni/browser_cluster.rb, line 415
def self.increment_queued_job_count
    @queued_job_count ||= 0
    @queued_job_count += 1
end
increment_time_out_count() click to toggle source
# File lib/arachni/browser_cluster.rb, line 425
def self.increment_time_out_count
    @time_out_count ||= 0
    @time_out_count += 1
end
new( options = {} ) click to toggle source

@param [Hash] options @option options [Integer] :pool_size (5)

Amount of {Worker browsers} to add to the pool.

@option options [Integer] :time_to_live (10)

Restricts each browser's lifetime to the given amount of pages.
When that number is exceeded the current process is killed and a new
one is pushed to the pool. Helps prevent memory leak issues.

@raise ArgumentError On missing ‘:handler` option.

Calls superclass method
# File lib/arachni/browser_cluster.rb, line 75
def initialize( options = {} )
    super()

    {
        pool_size: Options.browser_cluster.pool_size
    }.merge( options ).each do |k, v|
        begin
            send( "#{k}=", try_dup( v ) )
        rescue NoMethodError
            instance_variable_set( "@#{k}".to_sym, v )
        end
    end

    # Used to sync operations between workers per Job#id.
    @skip_states_per_job = {}

    # Callbacks for each job per Job#id. We need to keep track of this
    # here because jobs are serialized and off-loaded to disk and thus can't
    # contain Block or Proc objects.
    @job_callbacks = {}

    # Keeps track of the amount of pending jobs distributed across the
    # cluster, by Job#id. Once a job's count reaches 0, it's passed to
    # #job_done.
    @pending_jobs = Hash.new(0)
    @pending_job_counter = 0

    # Jobs are off-loaded to disk.
    @jobs = Support::Database::Queue.new
    @jobs.max_buffer_size = 10

    # Worker pool holding BrowserCluster::Worker instances.
    @workers     = []

    @mutex       = Monitor.new
    @done_signal = Queue.new

    initialize_workers
end
seconds_per_job() click to toggle source
# File lib/arachni/browser_cluster.rb, line 410
def self.seconds_per_job
    n = (total_job_time / Float( completed_job_count ))
    n.nan? ? 0 : n
end
statistics() click to toggle source
# File lib/arachni/browser_cluster.rb, line 443
def self.statistics
    {
        seconds_per_job:     seconds_per_job,
        total_job_time:      total_job_time,
        queued_job_count:    @queued_job_count    || 0,
        completed_job_count: @completed_job_count || 0,
        time_out_count:      @time_out_count      || 0
    }
end
total_job_time() click to toggle source
# File lib/arachni/browser_cluster.rb, line 434
def self.total_job_time
    @total_job_time.to_i
end

Public Instance Methods

add_to_total_job_time( time ) click to toggle source
# File lib/arachni/browser_cluster.rb, line 404
def add_to_total_job_time( time )
    synchronize do
        self.class.add_to_total_job_time( time )
    end
end
callback_for( job ) click to toggle source

@private

# File lib/arachni/browser_cluster.rb, line 382
def callback_for( job )
    @job_callbacks[job.id]
end
done?() click to toggle source

@return [Bool]

`true` if there are no resources to analyze and no running workers.
# File lib/arachni/browser_cluster.rb, line 275
def done?
    fail_if_shutdown
    synchronize { @pending_job_counter == 0 }
end
explore( resource, options = {}, cb = nil, &block ) click to toggle source

@param [Page, String, HTTP::Response] resource

Resource to explore, if given a `String` it will be treated it as a URL
and will be loaded.

@param [Hash] options

See {Jobs::DOMExploration} accessors.

@param [Block] block

Callback to be passed the {Job::Result}.

@see Jobs::DOMExploration @see queue

# File lib/arachni/browser_cluster.rb, line 182
def explore( resource, options = {}, cb = nil, &block )
    queue(
        Jobs::DOMExploration.new( options.merge( resource: resource ) ),
        cb,
        &block
    )
end
handle_job_result( result ) click to toggle source

@param [Job::Result] result

@private

# File lib/arachni/browser_cluster.rb, line 254
def handle_job_result( result )
    return if @shutdown
    return if job_done? result.job

    synchronize do
        print_debug "Got job result: #{result}"

        exception_jail( false ) do
            @job_callbacks[result.job.id].call( *[
                result,
                result.job.args,
                self
            ].flatten.compact)
        end
    end

    nil
end
increment_completed_job_count() click to toggle source
# File lib/arachni/browser_cluster.rb, line 392
def increment_completed_job_count
    synchronize do
        self.class.increment_completed_job_count
    end
end
increment_queued_job_count() click to toggle source
# File lib/arachni/browser_cluster.rb, line 386
def increment_queued_job_count
    synchronize do
        self.class.increment_queued_job_count
    end
end
increment_time_out_count() click to toggle source
# File lib/arachni/browser_cluster.rb, line 398
def increment_time_out_count
    synchronize do
        self.class.increment_time_out_count
    end
end
javascript_token() click to toggle source

@return [String]

Javascript token used to namespace the custom JS environment.
# File lib/arachni/browser_cluster.rb, line 117
def javascript_token
    Browser::Javascript::TOKEN
end
job_done( job ) click to toggle source

@param [Job] job

Job to mark as done. Will remove any callbacks and associated
{Worker} states.
# File lib/arachni/browser_cluster.rb, line 211
def job_done( job )
    synchronize do
        print_debug "Job done: #{job}"

        @pending_job_counter  -= 1
        @pending_jobs[job.id] -= 1

        increment_completed_job_count
        add_to_total_job_time( job.time )

        notify_on_job_done job

        if !job.never_ending?
            @skip_states_per_job.delete job.id
            @job_callbacks.delete job.id
        end

        if @pending_job_counter == 0
            print_debug_level_2 'Pending job counter reached 0.'
            @done_signal << nil
        end
    end
end
job_done?( job, fail_if_not_found = true ) click to toggle source

@param [Job] job

@return [Bool]

`true` if the `job` has been marked as finished, `false` otherwise.

@raise [Error::JobNotFound] Raised when ‘job` could not be found.

# File lib/arachni/browser_cluster.rb, line 241
def job_done?( job, fail_if_not_found = true )
    return false if job.never_ending?

    synchronize do
        fail_if_job_not_found job if fail_if_not_found
        return false if !@pending_jobs.include?( job.id )
        @pending_jobs[job.id] == 0
    end
end
pop() click to toggle source

@return [Job]

Pops a job from the queue.

@see queue @private

# File lib/arachni/browser_cluster.rb, line 329
def pop
    print_debug 'Popping...'
    {} while job_done?( job = @jobs.pop )
    print_debug "...popped: #{job}"

    notify_on_pop job

    job
end
queue( job, cb = nil, &block ) click to toggle source

@param [Job] job @param [Block] block

Callback to be passed the {Job::Result}.

@raise [AlreadyShutdown] @raise [Job::Error::AlreadyDone]

# File lib/arachni/browser_cluster.rb, line 140
def queue( job, cb = nil, &block )
    fail_if_shutdown
    fail_if_job_done job

    @done_signal.clear

    synchronize do
        print_debug "Queueing: #{job}"

        notify_on_queue job

        self.class.increment_queued_job_count

        @pending_job_counter  += 1
        @pending_jobs[job.id] += 1

        if cb
            @job_callbacks[job.id] = cb
        elsif block
            @job_callbacks[job.id] = block
        end

        if !@job_callbacks[job.id]
            fail ArgumentError, "No callback set for job ID #{job.id}."
        end

        @jobs << job
    end

    nil
end
shutdown( wait = true ) click to toggle source

Shuts the cluster down.

# File lib/arachni/browser_cluster.rb, line 296
def shutdown( wait = true )
    print_debug 'Shutting down...'
    @shutdown = true

    print_debug_level_2 'Clearing jobs...'
    # Clear the jobs -- don't forget this, it also removes the disk files for
    # the contained items.
    @jobs.clear
    print_debug_level_2 '...done.'

    print_debug_level_2 "Shutting down #{@workers.size} workers..."
    # Kill the browsers.
    @workers.each { |b| exception_jail( false ) { b.shutdown wait } }
    @workers.clear
    print_debug_level_2 '...done.'

    print_debug_level_2 'Clearing data and state...'
    # Very important to leave these for last, they may contain data
    # necessary to cleanly handle interrupted jobs.
    @job_callbacks.clear
    @skip_states_per_job.clear
    @pending_jobs.clear
    print_debug_level_2 '...done.'

    print_debug '...shutdown complete.'
    true
end
skip_state( job_id, state ) click to toggle source

Used to sync operations between browser workers.

@param [Integer] job_id

Job ID.

@param [String] state

State to skip in the future.

@private

# File lib/arachni/browser_cluster.rb, line 364
def skip_state( job_id, state )
    synchronize { skip_states( job_id ) << state }
end
skip_state?( job_id, state ) click to toggle source

Used to sync operations between browser workers.

@param [Integer] job_id

Job ID.

@param [String] state

Should the given state be skipped?

@raise [Error::JobNotFound]

Raised when `job` could not be found.

@private

# File lib/arachni/browser_cluster.rb, line 350
def skip_state?( job_id, state )
    synchronize do
        skip_states( job_id ).include? state
    end
end
skip_states( id ) click to toggle source

@private

# File lib/arachni/browser_cluster.rb, line 374
def skip_states( id )
    synchronize do
        @skip_states_per_job[id] ||=
            Support::LookUp::HashSet.new( hasher: :persistent_hash )
    end
end
trace_taint( resource, options = {}, cb = nil, &block ) click to toggle source

@param [Page, String, HTTP::Response] resource

Resource to load and whose environment to trace, if given a `String` it
will be treated it as a URL and will be loaded.

@param [Hash] options

See {Jobs::TaintTrace} accessors.

@param [Block] block

Callback to be passed the {Job::Result}.

@see Jobs::TaintTrace @see queue

# File lib/arachni/browser_cluster.rb, line 200
def trace_taint( resource, options = {}, cb = nil, &block )
    queue(
        Jobs::TaintTrace.new( options.merge( resource: resource ) ),
        cb,
        &block
    )
end
update_skip_states( id, lookups ) click to toggle source

@private

# File lib/arachni/browser_cluster.rb, line 369
def update_skip_states( id, lookups )
    synchronize { skip_states( id ).merge lookups }
end
wait() click to toggle source

Blocks until all resources have been analyzed.

# File lib/arachni/browser_cluster.rb, line 285
def wait
    fail_if_shutdown

    print_debug 'Waiting to finish...'
    @done_signal.pop if !done?
    print_debug '...finish.'

    self
end
with_browser( *args, &block ) click to toggle source

@note Operates in non-blocking mode.

@param [Block] block

Block to which to pass a {Worker} as soon as one is available.
# File lib/arachni/browser_cluster.rb, line 125
def with_browser( *args, &block )
    method_handler = nil
    if args.last.is_a? Method
        method_handler = args.pop
    end

    queue( Jobs::BrowserProvider.new( args ), method_handler, &block )
end

Private Instance Methods

fail_if_job_done( job ) click to toggle source
# File lib/arachni/browser_cluster.rb, line 476
def fail_if_job_done( job )
    return if !job_done?( job, false )
    fail Job::Error::AlreadyDone, 'Job has been marked as done.'
end
fail_if_job_not_found( job ) click to toggle source
# File lib/arachni/browser_cluster.rb, line 481
def fail_if_job_not_found( job )
    return if @pending_jobs.include?( job.id )
    fail Error::JobNotFound, 'Job could not be found.'
end
fail_if_shutdown() click to toggle source
# File lib/arachni/browser_cluster.rb, line 472
def fail_if_shutdown
    fail Error::AlreadyShutdown, 'Cluster has been shut down.' if @shutdown
end
initialize_workers() click to toggle source
# File lib/arachni/browser_cluster.rb, line 490
def initialize_workers
    print_status "Initializing #{pool_size} browsers..."

    @workers = []
    pool_size.times do |i|
        worker = Worker.new(
            master: self,
            width:  Options.browser_cluster.screen_width,
            height: Options.browser_cluster.screen_height
        )
        @workers << worker
        print_status "Spawned ##{i+1}."
    end

    print_status "Initialization completed with #{@workers.size} browsers in the pool."
end
notify_on_job_done( job ) click to toggle source
# File lib/arachni/browser_cluster.rb, line 460
def notify_on_job_done( job )
    return if !@on_job_done

    @on_job_done.call job
end
notify_on_pop( job ) click to toggle source
# File lib/arachni/browser_cluster.rb, line 466
def notify_on_pop( job )
    return if !@on_pop

    @on_pop.call job
end
notify_on_queue( job ) click to toggle source
# File lib/arachni/browser_cluster.rb, line 455
def notify_on_queue( job )
    return if !@on_queue
    @on_queue.call job
end
synchronize( &block ) click to toggle source
# File lib/arachni/browser_cluster.rb, line 486
def synchronize( &block )
    @mutex.synchronize( &block )
end