class Arachni::Data::Framework

Data for {Arachni::Framework}.

@author Tasos “Zapotek” Laskos <tasos.laskos@arachni-scanner.com>

Attributes

page_queue[R]

@return [Support::Database::Queue]

page_queue_total_size[RW]

@return [Integer]

rpc[RW]

@return [RPC]

sitemap[R]

@return [Hash<String, Integer>]

List of crawled URLs with their HTTP codes.
url_queue[R]

@return [Support::Database::Queue]

url_queue_total_size[RW]

@return [Integer]

Public Class Methods

load( directory ) click to toggle source
# File lib/arachni/data/framework.rb, line 136
def self.load( directory )
    framework = new

    framework.rpc = RPC.load( "#{directory}/rpc/" )
    framework.sitemap.merge! Marshal.load( IO.binread( "#{directory}/sitemap" ) )

    Dir["#{directory}/page_queue/*"].each do |page_file|
        framework.page_queue.disk << page_file
    end

    Marshal.load( IO.binread( "#{directory}/url_queue" ) ).each do |url|
        framework.url_queue.buffer << url
    end

    framework.page_queue_total_size =
        Marshal.load( IO.binread( "#{directory}/page_queue_total_size" ) )
    framework.url_queue_total_size =
        Marshal.load( IO.binread( "#{directory}/url_queue_total_size" ) )

    framework
end
new() click to toggle source
# File lib/arachni/data/framework.rb, line 46
def initialize
    @rpc = RPC.new

    @sitemap = {}

    @page_queue            = Support::Database::Queue.new
    @page_queue.max_buffer_size = 10
    @page_queue_total_size = 0

    @url_queue                 = Support::Database::Queue.new
    @url_queue.max_buffer_size = Float::INFINITY
    @url_queue_total_size      = 0
end

Public Instance Methods

add_page_to_sitemap( page ) click to toggle source

@param [Page] page

Page with which to update the {#sitemap}.
# File lib/arachni/data/framework.rb, line 92
def add_page_to_sitemap( page )
    update_sitemap( page.dom.url => page.code )
end
clear() click to toggle source
# File lib/arachni/data/framework.rb, line 158
def clear
    rpc.clear

    @sitemap.clear

    @page_queue.clear
    @page_queue_total_size = 0

    @url_queue.clear
    @url_queue_total_size = 0
end
dump( directory ) click to toggle source
# File lib/arachni/data/framework.rb, line 108
def dump( directory )
    FileUtils.mkdir_p( directory )

    rpc.dump( "#{directory}/rpc/" )

    page_queue_directory = "#{directory}/page_queue/"

    FileUtils.rm_rf( page_queue_directory )
    FileUtils.mkdir_p( page_queue_directory )

    page_queue.buffer.each do |page|
        IO.binwrite(
            "#{page_queue_directory}/#{page.persistent_hash}",
            page_queue.serialize( page )
        )
    end

    page_queue.disk.each do |filepath|
        FileUtils.cp filepath, "#{page_queue_directory}/"
    end

    IO.binwrite( "#{directory}/url_queue", Marshal.dump( @url_queue.buffer ) )

    %w(sitemap page_queue_total_size url_queue_total_size).each do |attribute|
        IO.binwrite( "#{directory}/#{attribute}", Marshal.dump( send(attribute) ) )
    end
end
push_to_page_queue( page ) click to toggle source

@note Increases the {#page_queue_total_size}.

@param [Page] page

Page to push to the {#page_queue}.
# File lib/arachni/data/framework.rb, line 75
def push_to_page_queue( page )
    @page_queue << page.clear_cache
    add_page_to_sitemap( page )
    @page_queue_total_size += 1
end
push_to_url_queue( url ) click to toggle source

@note Increases the {#url_queue_total_size}.

@param [String] url

URL to push to the {#url_queue}.
# File lib/arachni/data/framework.rb, line 85
def push_to_url_queue( url )
    @url_queue << url
    @url_queue_total_size += 1
end
statistics() click to toggle source
# File lib/arachni/data/framework.rb, line 60
def statistics
    {
        rpc:                   @rpc.statistics,
        sitemap:               @sitemap.size,
        page_queue:            @page_queue.size,
        page_queue_total_size: @page_queue_total_size,
        url_queue:             @url_queue.size,
        url_queue_total_size:  @url_queue_total_size
    }
end
update_sitemap( entries ) click to toggle source
# File lib/arachni/data/framework.rb, line 96
def update_sitemap( entries )
    entries.each do |url, code|
        # Feedback from the trainer or whatever, don't include it in the
        # sitemap, it'll just add noise.
        next if url.include?( Utilities.random_seed )

        @sitemap[url] = code
    end

    @sitemap
end