class Arachni::HTTP::Client::Dynamic404Handler

@author Tasos “Zapotek” Laskos <tasos.laskos@arachni-scanner.com>

Constants

CACHE_SIZE

Maximum size of the cache that holds 404 signatures.

PRECISION
SIGNATURE_THRESHOLD

Maximum allowed difference ratio when comparing custom 404 signatures. The fact that we refine the signatures allows us to set this threshold really low and still maintain good accuracy.

Public Class Methods

new() click to toggle source
# File lib/arachni/http/client/dynamic_404_handler.rb, line 28
def initialize
    @static     = Support::LookUp::HashSet.new
    @signatures = Hash.new
end

Private Class Methods

info() click to toggle source
# File lib/arachni/http/client/dynamic_404_handler.rb, line 582
def self.info
    { name: 'Dynamic404Handler' }
end

Public Instance Methods

_404?( response, &block ) click to toggle source

@param [Response] response

Checks whether or not the provided response means 'not found'.

@param [Block] block

To be passed `true` or `false` depending on the result of the analysis.
# File lib/arachni/http/client/dynamic_404_handler.rb, line 37
def _404?( response, &block )
    # This matters, the request URL may differ from the response one due to
    # redirections and we need to test the original.
    url = response.request.url

    # Easy pickins, well-behaved static 404 handler and a URL that doesn't
    # need advanced analysis.
    if checked_and_static?( url )
        result = (response.code == 404)
        print_debug "[static]: #{block} #{url} #{result}"
        block.call( result )
        return
    end

    # We've hit the cache, hopefully some preliminary signature will
    # match the response body and we'll get to avoid the advanced analysis.
    if checked?( url )
        result = matches_preliminary_signatures?( url, response.body )

        # If we've got a positive result that's all we need to know, return
        # it immediately.
        if result
            print_debug "[cached]: #{block} #{url} #{result}"
            return block.call( result )
        end

        # If the result was negative only return it if there's no need for
        # advanced analysis for this resource.
        if !needs_advanced_analysis?( url )
            print_debug "[cached]: #{block} #{url} #{result}"
            return block.call( result )
        end
    end

    # No need to go over this process for each caller for the same handler,
    # group them together and they'll get notified when the analysis is
    # complete.
    data_for( url )[:waiting] << [url, response.code, response.body, block]
    if data_for( url )[:in_progress]
        print_debug "[waiting]: #{url} #{block}"
        return
    end
    data_for( url )[:in_progress] = true

    # If it's already checked then preliminary analysis has been performed
    # and since its results can be shared across different resource checks
    # don't waste time redoing it, we can jump straight into the advanced
    # analysis.
    if checked?( url ) && needs_advanced_analysis?( url )
        print_debug "[checking-advanced]: #{url} #{block}"
        process_advanced_analysis_callers_for( url )
        return
    end

    print_debug "[checking]: #{url} #{block}"

    # So... we've got nothing cached for the handler for this URL, let's
    # start from scratch.
    preliminary_analysis( url ) do
        process_callers_for( url )
    end

    nil
end
checked?( url ) click to toggle source

@param [String] url

URL to check.

@return [Bool]

`true` if the `url` has been checked for the existence of a custom-404
handler, `false` otherwise.
# File lib/arachni/http/client/dynamic_404_handler.rb, line 108
def checked?( url )
    data_for( url )[:analyzed]
end
checked_and_static?( url ) click to toggle source

@param [String] url

URL to check.

@return [Bool]

`true` if the `url` has been checked for the existence of a custom-404
handler but none was identified, `false` otherwise.
# File lib/arachni/http/client/dynamic_404_handler.rb, line 118
def checked_and_static?( url )
    @static.include?( url_for( url ) ) && !needs_advanced_analysis?( url )
end
needs_check?( url ) click to toggle source

@param [String] url

URL to check.

@return [Bool]

`true` if the `url` needs to be checked for a {#_404?}, `false`
otherwise.
# File lib/arachni/http/client/dynamic_404_handler.rb, line 128
def needs_check?( url )
    !checked?( url ) || !checked_and_static?( url )
end
prune() click to toggle source

@private

# File lib/arachni/http/client/dynamic_404_handler.rb, line 138
def prune
    return if @signatures.size <= CACHE_SIZE

    @signatures.keys.each do |url|
        # If the path hasn't been analyzed yet skip it.
        next if !@signatures[url][:analyzed]

        # We've done enough...
        return if @signatures.size <= CACHE_SIZE

        @signatures.delete( url )
    end
end
signatures() click to toggle source

@private

# File lib/arachni/http/client/dynamic_404_handler.rb, line 133
def signatures
    @signatures
end

Private Instance Methods

advanced_analysis( url, &block ) click to toggle source
# File lib/arachni/http/client/dynamic_404_handler.rb, line 193
def advanced_analysis( url, &block )
    generators = advanced_probe_generators( url, PRECISION )

    if generators.empty?
        block.call
        return
    end

    corrupted           = false
    gathered_signatures = 0
    expected_signatures = generators.size

    generators.each.with_index do |generator, i|
        current_signature = (advanced_signatures_for( url )[i] ||= {})

        signature_from_url generator.call, current_signature do |c_res, status|
            next if corrupted

            print_debug "[gathering]: #{c_res.request.url} #{c_res.url} #{c_res.code} #{block}"

            gathered_signatures += 1

            if status == :done && gathered_signatures == expected_signatures
                block.call
            end

            if status == :corrupted
                print_debug "[corrupted]: #{url} #{block}"
                corrupted = true
            end
        end
    end
end
advanced_probe_generators( url, precision ) click to toggle source

@return [Array<Proc>]

# File lib/arachni/http/client/dynamic_404_handler.rb, line 437
def advanced_probe_generators( url, precision )
    uri                = uri_parse( url )
    up_to_path         = uri.up_to_path
    resource_name      = uri.resource_name.to_s.split('.').tap(&:pop).join('.')
    resource_extension = uri.resource_extension

    probes = []

    if !resource_name.empty?
        # Get an existing resource with a random extension.
        probes << proc {
            s = up_to_path.dup
            s << resource_name
            s << '.'
            s << random_string[0..precision]
            s
        }
    end

    if resource_extension
        # Get a random filename with an existing extension.
        probes << proc {
            s = up_to_path.dup
            s << random_string
            s << '.'
            s << resource_extension
            s
        }
    end

    # Some webapps do routing based on name resources with "-" as a separator.
    if uri.resource_name.include?( '-' )
        rn = uri.resource_name

        probes << proc {
            up_to_path.sub( rn, rn.gsub( '-', "#{random_string}-" ) )
        }

        probes << proc {
            up_to_path.sub( rn, rn.gsub( '-', "-#{random_string}" ) )
        }
    end

    if uri.resource_name.include?( '~' )
        probes << proc {
            up_to_path.sub(
                uri.resource_name,
                resource_name.gsub( '~', '~~' )
            )
        }
    end

    probes
end
advanced_signatures_for( url ) click to toggle source
# File lib/arachni/http/client/dynamic_404_handler.rb, line 532
def advanced_signatures_for( url )
    data_for( url )[:signatures][:advanced][url] ||= []
end
checked( url ) click to toggle source
# File lib/arachni/http/client/dynamic_404_handler.rb, line 540
def checked( url )
    data = data_for( url )
    data[:analyzed]    = true
    data[:in_progress] = false
end
clear_data_for( url ) click to toggle source
# File lib/arachni/http/client/dynamic_404_handler.rb, line 512
def clear_data_for( url )
    @signatures[url_for( url )] = signature_prototype
end
corrupted_response?( response ) click to toggle source

If this is neither a regular 404 nor a 202 the server probably freaked out – 500 errors under stress and the like.

In that case we should bail out to avoid corrupted signatures which can lead to FPs.

# File lib/arachni/http/client/dynamic_404_handler.rb, line 344
def corrupted_response?( response )
    !response.ok? || (response.code != 404 && response.code != 200)
end
data_for( url ) click to toggle source
# File lib/arachni/http/client/dynamic_404_handler.rb, line 508
def data_for( url )
    @signatures[url_for( url )] ||= signature_prototype
end
matches_advanced_signatures?( url, body ) click to toggle source
# File lib/arachni/http/client/dynamic_404_handler.rb, line 546
def matches_advanced_signatures?( url, body )
    # First try matching the signatures for the specific URL...
    advanced_signatures_for( url ).each do |signature|
        return true if signature[:rdiff].similar? signature[:body].refine( body )
    end

    false
end
matches_preliminary_signatures?( url, body ) click to toggle source
# File lib/arachni/http/client/dynamic_404_handler.rb, line 555
def matches_preliminary_signatures?( url, body )
    # First try matching the signatures for the specific URL...
    preliminary_signatures_for( url ).each do |signature|
        return true if signature[:rdiff].similar? signature[:body].refine( body )
    end

    # ...then try the rest for good measure.
    url = url_for( url )
    @signatures.each do |u, data|
        next if u == url || !data[:analyzed]

        data[:signatures][:preliminary].each do |signature|
            return true if signature[:rdiff].similar? signature[:body].refine( body )
        end
    end

    false
end
needs_advanced_analysis?( url ) click to toggle source
# File lib/arachni/http/client/dynamic_404_handler.rb, line 328
def needs_advanced_analysis?( url )
    uri = uri_parse( url )
    resource_name = uri.resource_name.to_s.split('.').tap(&:pop).join('.')
    !!(
        !resource_name.empty? ||
        uri.resource_extension ||
        uri.resource_name.to_s.include?( '~' ) ||
        uri.resource_name.to_s.include?( '-' )
    )
end
perform_advanced_analysis_if_necessary( url, body, &block ) click to toggle source
# File lib/arachni/http/client/dynamic_404_handler.rb, line 276
def perform_advanced_analysis_if_necessary( url, body, &block )
    result = matches_preliminary_signatures?( url, body )
    print_debug "[checked]: #{block} #{url} #{result}"

    if result
        print_debug "[notify]: #{block} #{url} #{result}"
        checked( url )
        block.call( result )
        return
    end

    advanced_analysis url do
        checked( url )

        # If the signatures match after advanced analysis has been performed,
        # then this handler will always require advanced analysis for each
        # URL.
        result = matches_advanced_signatures?( url, body )
        print_debug "[notify]: #{block} #{url} #{result}"
        block.call result

        # More callers may have been added to the waiting queue during
        # the advanced analysis.
        process_callers_for( url )
    end
end
preliminary_analysis( url, &block ) click to toggle source
# File lib/arachni/http/client/dynamic_404_handler.rb, line 154
def preliminary_analysis( url, &block )
    generators = preliminary_probe_generators( url, PRECISION )

    real_404s          = 0
    corrupted          = false
    gathered_signatures = 0
    expected_signatures = generators.size

    generators.each.with_index do |generator, i|
        current_signature = (preliminary_signatures_for( url )[i] ||= {})

        signature_from_url generator.call, current_signature do |c_res, status|
            next if corrupted

            if status == :done
                print_debug "[gathering]: #{c_res.request.url} #{c_res.url} #{c_res.code} #{block}"

                gathered_signatures += 1
                if c_res.code == 404
                    real_404s += 1
                end

                if gathered_signatures == expected_signatures
                    if real_404s == expected_signatures
                        @static << url_for( url )
                    end

                    block.call
                end
            end

            if status == :corrupted
                print_debug "[corrupted]: #{url} #{block}"
                corrupted = true
            end
        end
    end
end
preliminary_probe_generators( url, precision ) click to toggle source

@return [Array<Proc>]

Generators for URLs which should elicit 404 responses for different types
of scenarios.
# File lib/arachni/http/client/dynamic_404_handler.rb, line 372
def preliminary_probe_generators( url, precision )
    uri        = uri_parse( url )
    up_to_path = uri.up_to_path

    trv_back = File.dirname( Arachni::URI( up_to_path ).path )
    trv_back << '/' if trv_back[-1] != '/'

    parsed = uri.dup
    parsed.path   = trv_back
    parsed.query  = ''
    trv_back_url  = parsed.to_s

    g = [
        # Get a random path with an extension.
        proc {
            s = up_to_path.dup
            s << random_string
            s << '.'
            s << random_string[0..precision]
            s
        },

        # Get a random path without an extension.
        proc { up_to_path + random_string },

        # Get a random path without an extension with all caps.
        #
        # Yes, this is here due to a real use case...
        proc { up_to_path + random_string_alpha_capital },

        # Move up a dir and get a random file.
        proc { trv_back_url + random_string },

        proc { trv_back_url + random_string_alpha_capital },

        # Move up a dir and get a random file with an extension.
        proc {
            s = trv_back_url.dup
            s << random_string
            s << '.'
            s << random_string[0..precision]
            s
        },

        # Get a random directory.
        proc {
            s = up_to_path.dup
            s << random_string
            s << '/'
            s
        }
    ]

    if !(rn = uri.resource_name.to_s).empty?
        # Append a random string to the resource name.
        g << proc { url.gsub( rn, "#{rn}#{random_string[0..precision]}" ) }

        # Prepend a random string to the resource name.
        g << proc { url.gsub( rn, "#{random_string[0..precision]}#{rn}" ) }
    end

    g
end
preliminary_signatures_for( url ) click to toggle source
# File lib/arachni/http/client/dynamic_404_handler.rb, line 528
def preliminary_signatures_for( url )
    data_for( url )[:signatures][:preliminary]
end
process_advanced_analysis_callers_for( url ) click to toggle source
# File lib/arachni/http/client/dynamic_404_handler.rb, line 321
def process_advanced_analysis_callers_for( url )
    while (waiting = data_for( url )[:waiting].pop)
        curl, _, body, callback = waiting
        perform_advanced_analysis_if_necessary( curl, body, &callback )
    end
end
process_callers_for( url ) click to toggle source
# File lib/arachni/http/client/dynamic_404_handler.rb, line 303
def process_callers_for( url )
    if checked_and_static?( url )
        checked( url )
        process_static_callers_for( url )
    else
        process_advanced_analysis_callers_for( url )
    end
end
process_static_callers_for( url ) click to toggle source
# File lib/arachni/http/client/dynamic_404_handler.rb, line 312
def process_static_callers_for( url )
    while (waiting = data_for( url )[:waiting].pop)
        curl, code, _, callback = waiting
        result = (code == 404)
        print_debug "[notify]: #{callback} #{curl} #{result}"
        callback.call result
    end
end
random_string() click to toggle source
# File lib/arachni/http/client/dynamic_404_handler.rb, line 574
def random_string
    Digest::SHA1.hexdigest( rand( 9999999 ).to_s )
end
random_string_alpha_capital() click to toggle source
# File lib/arachni/http/client/dynamic_404_handler.rb, line 578
def random_string_alpha_capital
    random_string.gsub( /\d/, '' ).upcase
end
request( url, &block ) click to toggle source
# File lib/arachni/http/client/dynamic_404_handler.rb, line 492
def request( url, &block )
    Client.get( url,
        # This is important, helps us reduce waiting callers.
        high_priority:   true,

        # We're going to be checking for a lot of non-existent resources,
        # don't bother fingerprinting them
        fingerprint:     false,

        follow_location: true,

        performer:       self,
        &block
    )
end
signature_from_url( url, signature_data, precision = PRECISION, &block ) click to toggle source
# File lib/arachni/http/client/dynamic_404_handler.rb, line 227
def signature_from_url( url, signature_data, precision = PRECISION, &block )
    controlled_precision = precision * 2
    control_data         = {}

    corrupted          = false
    gathered_responses = 0

    controlled_precision.times do
        request( url ) do |response|
            next if corrupted

            signature = gathered_responses >= precision ?
                control_data : signature_data

            # Well, bad luck, bail out to avoid FPs.
            if corrupted_response?( response )
                block.call response, :corrupted
                corrupted = true
                next
            end

            gathered_responses += 1

            if signature[:body]
                signature[:rdiff] = signature[:body].refine( response.body )

                if gathered_responses == controlled_precision

                    # Both attempts yielded in the same result, the webapp
                    # was stable during the process and the signature can be
                    # considered accurate.
                    if control_data[:rdiff].similar? signature_data[:rdiff]
                        block.call response, :done

                    # Coo-coo for cocoa puffs, can't work with it.
                    else
                        block.call response, :corrupted
                    end

                end
            else
                signature[:body] = Support::Signature.new(
                    response.body, threshold: SIGNATURE_THRESHOLD
                )
            end
        end
    end
end
signature_prototype() click to toggle source
# File lib/arachni/http/client/dynamic_404_handler.rb, line 516
def signature_prototype
    {
        analyzed:    false,
        in_progress: false,
        waiting:     [],
        signatures:  {
            preliminary: [],
            advanced:    {}
        }
    }
end
signatures_for( url ) click to toggle source
# File lib/arachni/http/client/dynamic_404_handler.rb, line 536
def signatures_for( url )
    preliminary_signatures_for( url ) + advanced_signatures_for( url )
end
url_for( url ) click to toggle source
# File lib/arachni/http/client/dynamic_404_handler.rb, line 348
def url_for( url )
    parsed = Arachni::URI( url )

    # If we're dealing with a file resource, then its parent directory will
    # be the applicable custom-404 handler...
    if parsed.resource_extension
        trv_back = Arachni::URI( parsed.up_to_path ).path

    # ...however, if we're dealing with a directory, the applicable handler
    # will be its parent directory.
    else
        trv_back = File.dirname( Arachni::URI( parsed.up_to_path ).path )
    end

    trv_back += '/' if trv_back[-1] != '/'

    parsed = parsed.dup
    parsed.path = trv_back
    parsed.to_s
end