class Arachni::OptionGroups::Scope

Scan scope options, maintains rules used to decide which resources should be considered for crawling/auditing/etc. during the scan.

@author Tasos “Zapotek” Laskos <tasos.laskos@arachni-scanner.com>

Constants

EXCLUDE_FILE_EXTENSIONS
EXCLUDE_MIME_TYPES

Attributes

auto_redundant_paths[RW]

@return [Bool]

Sets a limit to how many paths with identical query parameter names to
process. Helps avoid processing redundant/identical resources like
entries in calendars and catalogs.

@see URI::Scope#redundant? @see URI::Scope#auto_redundant?

directory_depth_limit[RW]

@note ‘nil` is infinite – default is `nil`.

@return [Integer]

How deep to go into the site's directory tree.

@see URI::Scope#too_deep?

dom_depth_limit[RW]

@note ‘nil` is infinite – default is `10`.

@return [Integer]

How deep to go into each page's DOM tree.

@see Page::Scope#dom_depth_limit_reached?

dom_event_inheritance_limit[RW]

@note ‘nil` is infinite – default is `nil`.

@return [Integer]

How many elements should inherit the DOM events of their parents.

@see Browser#trigger_events

dom_event_limit[RW]

@note ‘nil` is infinite – default is `nil`.

@return [Integer]

How many DOM events to trigger for each snapshot.

@see Browser#trigger_events

exclude_binaries[RW]

@note Default is ‘false`.

@return [Bool]

Exclude pages with binary content from the audit. Mainly used to avoid
having grep checks confused by random binary content.

@see HTTP::Response::Scope#exclude_as_binary?

exclude_binaries?[RW]

@note Default is ‘false`.

@return [Bool]

Exclude pages with binary content from the audit. Mainly used to avoid
having grep checks confused by random binary content.

@see HTTP::Response::Scope#exclude_as_binary?

exclude_content_patterns[RW]

@return [Array<Regexp>]

{Page}/{HTTP::Response} bodies matching any of these patterns will be are ignored.

@see HTTP::Response::Scope#exclude_content?

exclude_file_extensions[RW]

@return [Array<String>]

Extension exclusion patterns, resources whose extension is in the list
will not be considered.

@see URI::Scope#exclude_file_extension?

exclude_path_patterns[RW]

@return [Array<Regexp>]

Path exclusion patterns, resources that match any of the specified
patterns will not be considered.

@see URI::Scope#exclude?

extend_paths[RW]

@return [Array<String>]

Paths to use in addition to crawling.

@see Framework#push_to_page_queue @see Framework#push_to_url_queue

https_only[RW]

@return [Bool]

If an HTTPS {Options#url} has been provided, **do not** downgrade to to
a insecure link.

@see URI::Scope#follow_protocol?

https_only?[RW]

@return [Bool]

If an HTTPS {Options#url} has been provided, **do not** downgrade to to
a insecure link.

@see URI::Scope#follow_protocol?

include_path_patterns[RW]

@return [Array<Regexp>]

Path inclusion patterns, only resources that match any of the specified
patterns will be considered.

@see URI::Scope#include?

include_subdomains[RW]

@note Default if ‘false`.

@return [Bool]

Take into consideration URLs pointing to different subdomains from the
{Options#url seed URL}.

@see URI::Scope#in_domain?

page_limit[RW]

@note ‘nil` is infinite – default is `nil`.

@return [Integer]

How many pages to consider (crawl/audit)?

@see Framework#push_to_page_queue @see Framework#push_to_url_queue @see Framework#audit_page @see Trainer#push

redundant_path_patterns[RW]

@return [Hash{Regexp => Integer}]

Filters for redundant paths in the form of `{ pattern => counter }`.
Once the `pattern` has matched a path `counter` amount of times, the
resource will be ignored from then on.

Useful when scanning pages that dynamically generate a large number of
pages like galleries and calendars.

@see URI::Scope#redundant?

restrict_paths[RW]

@return [Array<String>]

Paths to use instead of crawling.

@see Framework#push_to_url_queue

url_rewrites[RW]

@return [Hash<Regexp => String>]

Regular expression and substitution pairs, used to rewrite
{Element::Capabilities::Submittable#action}.

@see URI.rewrite @see URI#rewrite

Public Instance Methods

auto_redundant?() click to toggle source
# File lib/arachni/option_groups/scope.rb, line 245
def auto_redundant?
    @auto_redundant_paths.to_i > 0
end
auto_redundant_counter() click to toggle source
# File lib/arachni/option_groups/scope.rb, line 249
def auto_redundant_counter
    @auto_redundant_counter ||= Hash.new( 0 )
end
crawl() click to toggle source
# File lib/arachni/option_groups/scope.rb, line 257
def crawl
    self.page_limit = nil
end
crawl?() click to toggle source
# File lib/arachni/option_groups/scope.rb, line 261
def crawl?
    !page_limit || page_limit != 0
end
do_not_crawl() click to toggle source
# File lib/arachni/option_groups/scope.rb, line 253
def do_not_crawl
    self.page_limit = 0
end
dom_event_limit_reached?( count ) click to toggle source
# File lib/arachni/option_groups/scope.rb, line 269
def dom_event_limit_reached?( count )
    dom_event_limit && count >= dom_event_limit
end
exclude_file_extensions=( ext ) click to toggle source
# File lib/arachni/option_groups/scope.rb, line 213
def exclude_file_extensions=( ext )
    return @exclude_file_extensions =
        defaults[:exclude_file_extensions].dup if !ext

    if ext.is_a? Set
        @exclude_file_extensions = ext
    else
        @exclude_file_extensions = Set.new(
            [ext].flatten.compact.map { |s| s.to_s.downcase }
        )
    end
end
page_limit_reached?( count ) click to toggle source
# File lib/arachni/option_groups/scope.rb, line 265
def page_limit_reached?( count )
    page_limit && page_limit.to_i > 0 && count >= page_limit
end
redundant_path_patterns=( filters ) click to toggle source

Sets the redundancy filters.

Filter example:

{
    # regexp           counter
    /calendar\.php/ => 5
    'gallery\.php' => '3'
}

@param [Hash] filters

# File lib/arachni/option_groups/scope.rb, line 283
def redundant_path_patterns=( filters )
    if filters.nil?
        return @redundant_path_patterns =
            defaults[:redundant_path_patterns].dup
    end

    @redundant_path_patterns =
         filters.inject({}) do |h, (regexp, counter)|
             regexp = regexp.is_a?( Regexp ) ?
                 regexp :
                 Regexp.new( regexp.to_s, Regexp::IGNORECASE )
             h.merge!( regexp => Integer( counter ) )
             h
         end
end
to_rpc_data() click to toggle source
Calls superclass method Arachni::OptionGroup#to_rpc_data
# File lib/arachni/option_groups/scope.rb, line 299
def to_rpc_data
    d = super

    d['exclude_file_extensions'] = d['exclude_file_extensions'].to_a

    %w(redundant_path_patterns url_rewrites).each do |k|
        d[k] = d[k].inject({}) { |h, (k2, v)| h.merge k2.source => v }
    end

    %w(exclude_path_patterns exclude_content_patterns include_path_patterns).each do |k|
        d[k] = d[k].map(&:source)
    end

    d
end
url_rewrites=( rules ) click to toggle source
# File lib/arachni/option_groups/scope.rb, line 201
def url_rewrites=( rules )
    return @url_rewrites = defaults[:url_rewrites].dup if !rules

    @url_rewrites = rules.inject({}) do |h, (regexp, value)|
        regexp = regexp.is_a?( Regexp ) ?
            regexp :
            Regexp.new( regexp.to_s, Regexp::IGNORECASE )
        h.merge!( regexp => value )
        h
    end
end