class Arachni::HTTP::Request

HTTP Request representation.

@author Tasos “Zapotek” Laskos <tasos.laskos@arachni-scanner.com>

Constants

ENCODE_CACHE
MODES

Supported modes of operation.

REDIRECT_LIMIT

Default redirect limit, RFC says 5 max.

Attributes

cookies[R]

@return [Hash]

Cookies set for this request.
effective_body[RW]

@note Available only via completed {Response#request}.

@return [String]

Transmitted HTTP request body.
follow_location[RW]

@return [Bool]

Follow `Location` headers.
headers_string[RW]

@note Available only via completed {Response#request}.

@return [String]

Transmitted HTTP request headers.
high_priority[RW]

@return [Bool]

id[RW]

@return [Integer]

Auto-incremented ID for this request (set by {Client#request}).
max_redirects[RW]

@return [Integer]

Maximum number of redirects to follow.

@see follow_location

mode[R]

@return [Symbol]

Mode of operation for the request.

@see MODES

parameters[R]

@return [Hash]

Request parameters.
password[RW]

@return [String]

HTTP password.
performer[RW]

Entity which performed the request – mostly used to track which response was a result of which submitted element.

proxy[RW]

@return [String]

`host:port`
proxy_type[RW]

@return [String]

proxy_user_password[RW]

@return [String]

`user:password`
raw_cookies[R]

@return [Array<Element::Cookie>]

raw_parameters[RW]

@return [Array]

Parameters which should not be encoded, by name.
response[RW]

@return [Response]

response_body_buffer[RW]

@private

response_max_size[RW]

@return [Integer]

Maximum HTTP response size to accept, in bytes.
timeout[RW]

@return [Integer]

Timeout in milliseconds.
username[RW]

@return [String]

HTTP username.

Public Class Methods

encode( string ) click to toggle source
# File lib/arachni/http/request.rb, line 790
def encode( string )
    string = string.to_s
    @easy ||= Ethon::Easy.new( url: 'www.example.com' )
    ENCODE_CACHE.fetch( string ) { @easy.escape( string ) }
end
encode_hash( hash, skip = [] ) click to toggle source
# File lib/arachni/http/request.rb, line 770
def encode_hash( hash, skip = [] )
    hash.inject({}) do |h, (k, v)|

        if skip.include?( k )
            # We need to at least encode null-bytes since they can't
            # be transported at all.
            # If we don't Typhoeus/Ethon will raise errors.
            h.merge!( encode_null_byte( k ) => encode_null_byte( v ) )
        else
            h.merge!( encode( k ) => encode( v ) )
        end

        h
    end
end
encode_null_byte( string ) click to toggle source
# File lib/arachni/http/request.rb, line 786
def encode_null_byte( string )
    string.to_s.gsub "\0", '%00'
end
from_rpc_data( data ) click to toggle source

@param [Hash] data {#to_rpc_data} @return [Request]

# File lib/arachni/http/request.rb, line 737
def from_rpc_data( data )
    instance = allocate
    data.each do |name, value|

        value = case name
                    when 'method', 'mode'
                        value.to_sym

                    else
                        value
                end

        instance.instance_variable_set( "@#{name}", value )
    end
    instance
end
new( options = {} ) click to toggle source

@param [Hash] options

Request options.

@option options [String] :url

URL.

@option options [Hash] :parameters ({})

Request parameters.

@option options [String] :body ({})

Request body.

@option options [Bool] :train (false)

Force Arachni to analyze the response looking for new elements.

@option options [Symbol] :mode (:async)

Mode in which to perform the request:

* `:async` -- Asynchronous (non-blocking) (Default).
* `:sync` -- Synchronous (blocking).

@option options [Hash] :headers ({})

Extra HTTP request headers.

@option options [Hash] :cookies ({})

Cookies for the request.
Calls superclass method Arachni::HTTP::Message::new
# File lib/arachni/http/request.rb, line 141
def initialize( options = {} )
    options[:method] ||= :get

    super( options )

    @train           = false if @train.nil?
    @fingerprint     = true  if @fingerprint.nil?
    @update_cookies  = false if @update_cookies.nil?
    @follow_location = false if @follow_location.nil?
    @max_redirects   = (Options.http.request_redirect_limit || REDIRECT_LIMIT)

    @on_headers    = []
    @on_body       = []
    @on_body_line  = []
    @on_body_lines = []
    @on_complete   = []

    @raw_parameters ||= []
    @timeout        ||= Options.http.request_timeout
    @mode           ||= :async
    @parameters     ||= {}
    @cookies        ||= {}
    @raw_cookies    ||= []
end
parse_body( body ) click to toggle source

Parses an HTTP request body generated by submitting a form.

@param [String] body

@return [Hash]

Parameters.
# File lib/arachni/http/request.rb, line 760
def parse_body( body )
    return {} if body.to_s.empty?

    body.split( '&' ).inject( {} ) do |h, pair|
        name, value = pair.split( '=', 2 )
        h[Form.decode( name.to_s )] = Form.decode( value )
        h
    end
end

Public Instance Methods

==( other ) click to toggle source
# File lib/arachni/http/request.rb, line 680
def ==( other )
    hash == other.hash
end
asynchronous?() click to toggle source

@return [Boolean]

`true` if {#mode} is `:async`, `false` otherwise.
# File lib/arachni/http/request.rb, line 203
def asynchronous?
    mode == :async
end
blocking?() click to toggle source

@return [Boolean]

`true` if {#mode} is `:sync`, `false` otherwise.
# File lib/arachni/http/request.rb, line 209
def blocking?
    mode == :sync
end
body_parameters() click to toggle source
# File lib/arachni/http/request.rb, line 270
def body_parameters
    return {}         if method != :post
    return parameters if parameters.any?

    if headers.content_type.to_s.start_with?( 'multipart/form-data' )
        return {} if !headers.content_type.include?( 'boundary=' )

        return Form.parse_data(
            body,
            headers.content_type.match( /boundary=(.*)/i )[1].to_s
        )
    end

    self.class.parse_body( body )
end
buffered?() click to toggle source
# File lib/arachni/http/request.rb, line 367
def buffered?
    @on_body.any? || @on_body_line.any? || @on_body_lines.any?
end
clear_callbacks() click to toggle source

Clears {#on_complete} callbacks.

# File lib/arachni/http/request.rb, line 339
def clear_callbacks
    @on_complete.clear
    @on_body.clear
    @on_headers.clear
    @on_body_line.clear
    @on_body_lines.clear
end
cookies=( cookies ) click to toggle source

@note All keys and values will be recursively converted to strings.

@param [Hash] cookies

Cookies to assign to this request.

@return [Hash] Normalized cookies.

# File lib/arachni/http/request.rb, line 184
def cookies=( cookies )
    @cookies = cookies.stringify_recursively_and_freeze
end
effective_cookies() click to toggle source
# File lib/arachni/http/request.rb, line 244
def effective_cookies
    effective_cookies = self.cookies.dup

    if !headers['Cookie'].to_s.empty?
        Cookie.from_string( url, headers['Cookie'] ).
            inject( effective_cookies ) do |h, cookie|
            h[cookie.name] ||= cookie.value
            h
        end
    end

    @raw_cookies.inject( effective_cookies ) do |h, cookie|
        h[cookie.raw_name] ||= cookie.raw_value
        h
    end

    effective_cookies
end
effective_parameters() click to toggle source
# File lib/arachni/http/request.rb, line 263
def effective_parameters
    ep = Utilities.uri_parse_query( url )
    return ep if parameters.empty?

    ep.merge!( parameters )
end
fingerprint?() click to toggle source

@return [Bool]

`true` if the {Response} should be {Platform::Manager.fingerprint fingerprinted}
for platforms, `false` otherwise.
# File lib/arachni/http/request.rb, line 356
def fingerprint?
    @fingerprint
end
follow_location?() click to toggle source

@return [Bool]

`true` if redirects should be followed, `false` otherwise.
# File lib/arachni/http/request.rb, line 349
def follow_location?
    !!@follow_location
end
hash() click to toggle source
# File lib/arachni/http/request.rb, line 684
def hash
    to_h.hash
end
high_priority?() click to toggle source
# File lib/arachni/http/request.rb, line 174
def high_priority?
    !!@high_priority
end
inspect() click to toggle source
# File lib/arachni/http/request.rb, line 292
def inspect
    s = "#<#{self.class} "
    s << "@id=#{id} "
    s << "@mode=#{mode} "
    s << "@method=#{method} "
    s << "@url=#{url.inspect} "
    s << "@parameters=#{parameters.inspect} "
    s << "@high_priority=#{high_priority} "
    s << "@performer=#{performer.inspect}"
    s << '>'
end
marshal_dump() click to toggle source
# File lib/arachni/http/request.rb, line 688
def marshal_dump
    raw_cookies   = @raw_cookies.dup
    callbacks     = @on_complete.dup
    on_body       = @on_body.dup
    on_headers    = @on_headers.dup
    on_body_line  = @on_body_line.dup
    on_body_lines = @on_body_lines.dup
    performer     = @performer
    response      = @response

    @performer     = nil
    @response      = nil
    @raw_cookies   = []
    @on_complete   = []
    @on_body       = []
    @on_body_line  = []
    @on_body_lines = []
    @on_headers    = []

    instance_variables.inject( {} ) do |h, iv|
        next h if iv == :@scope
        h[iv.to_s.gsub('@','')] = instance_variable_get( iv )
        h
    end
ensure
    @response      = response
    @raw_cookies   = raw_cookies
    @on_complete   = callbacks
    @on_body       = on_body
    @on_body_line  = on_body_line
    @on_body_lines = on_body_lines
    @on_headers    = on_headers
    @performer     = performer
end
marshal_load( h ) click to toggle source
# File lib/arachni/http/request.rb, line 723
def marshal_load( h )
    h.each { |k, v| instance_variable_set( "@#{k}", v ) }
end
method( *args ) click to toggle source

@return [Symbol]

HTTP method.
Calls superclass method
# File lib/arachni/http/request.rb, line 215
def method( *args )
    return super( *args ) if args.any? # Preserve Object#method.
    @method
end
method=( verb ) click to toggle source

@note Method will be normalized to a lower-case symbol.

Sets the request HTTP method.

@param [#to_s] verb

HTTP method.

@return [Symbol]

HTTP method.
# File lib/arachni/http/request.rb, line 229
def method=( verb )
    @method = verb.to_s.downcase.to_sym
end
mode=( v ) click to toggle source
# File lib/arachni/http/request.rb, line 233
def mode=( v )
    v = v.downcase.to_sym

    if !MODES.include?( v )
        fail ArgumentError,
             "Invalid mode, supported modes are: #{MODES.join( ', ' )}"
    end

    @mode = v.to_sym
end
on_body( &block ) click to toggle source
# File lib/arachni/http/request.rb, line 320
def on_body( &block )
    fail 'Block is missing.' if !block_given?
    @on_body << block
    self
end
on_body_line( &block ) click to toggle source
# File lib/arachni/http/request.rb, line 326
def on_body_line( &block )
    fail 'Block is missing.' if !block_given?
    @on_body_line << block
    self
end
on_body_lines( &block ) click to toggle source
# File lib/arachni/http/request.rb, line 332
def on_body_lines( &block )
    fail 'Block is missing.' if !block_given?
    @on_body_lines << block
    self
end
on_complete( &block ) click to toggle source

@note Can be invoked multiple times.

@param [Block] block

Callback to be passed the {Response response}.
# File lib/arachni/http/request.rb, line 314
def on_complete( &block )
    fail 'Block is missing.' if !block_given?
    @on_complete << block
    self
end
on_headers( &block ) click to toggle source
# File lib/arachni/http/request.rb, line 304
def on_headers( &block )
    fail 'Block is missing.' if !block_given?
    @on_headers << block
    self
end
parameters=( params ) click to toggle source

@note All keys and values will be recursively converted to strings.

@param [Hash] params

Parameters to assign to this request.
If performing a GET request and the URL has parameters of its own they
will be merged and overwritten.

@return [Hash]

Normalized parameters.
# File lib/arachni/http/request.rb, line 197
def parameters=( params )
    @parameters = params.stringify_recursively_and_freeze
end
prepare_headers() click to toggle source
# File lib/arachni/http/request.rb, line 797
def prepare_headers
    headers['User-Agent']      ||= Options.http.user_agent
    headers['Accept']          ||= 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
    headers['From']            ||= Options.authorized_by if Options.authorized_by
    headers['Accept-Language'] ||= 'en-US,en;q=0.8,he;q=0.6'

    headers.each { |k, v| headers[k] = Header.encode( v ) if v }

    final_cookies_hash = self.cookies
    final_raw_cookies  = self.raw_cookies

    if headers['Cookie']
        final_raw_cookies_set = Set.new( final_raw_cookies.map(&:name) )
        final_raw_cookies |= Cookie.from_string( url, headers['Cookie'] ).reject do |c|
            final_cookies_hash.include?( c.name ) ||
                final_raw_cookies_set.include?( c.name )
        end
    end

    headers['Cookie'] = final_cookies_hash.
        map { |k, v| "#{Cookie.encode( k )}=#{Cookie.encode( v )}" }.join( ';' )

    if !headers['Cookie'].empty? && final_raw_cookies.any?
        headers['Cookie'] += ';'
    end

    headers['Cookie'] += final_raw_cookies.map { |c| c.to_s }.join( ';' )

    headers.delete( 'Cookie' ) if headers['Cookie'].empty?

    headers
end
raw_parameters=( names ) click to toggle source
# File lib/arachni/http/request.rb, line 166
def raw_parameters=( names )
    if names
        @raw_parameters = names
    else
        @raw_parameters.clear
    end
end
run() click to toggle source

@note Will call {#on_complete} callbacks.

Performs the {Request} without going through {HTTP::Client}.

@return [Response]

# File lib/arachni/http/request.rb, line 394
def run
    client_run
end
set_response_data( typhoeus_response ) click to toggle source
# File lib/arachni/http/request.rb, line 656
def set_response_data( typhoeus_response )
    fill_in_data_from_typhoeus_response typhoeus_response

    self.response = Response.from_typhoeus(
        typhoeus_response,
        normalize_url: @normalize_url,
        request:       self
    )

    self.response.update_from_typhoeus typhoeus_response
end
to_h() click to toggle source
# File lib/arachni/http/request.rb, line 668
def to_h
    {
        url:            url,
        parameters:     parameters,
        headers:        headers,
        headers_string: headers_string,
        effective_body: effective_body,
        body:           body,
        method:         method
    }
end
to_rpc_data() click to toggle source

@return [Hash]

Data representing this instance that are suitable the RPC transmission.
# File lib/arachni/http/request.rb, line 729
def to_rpc_data
    marshal_dump
end
to_s() click to toggle source

@return [String]

HTTP request string.
# File lib/arachni/http/request.rb, line 288
def to_s
    "#{headers_string}#{effective_body}"
end
to_typhoeus() click to toggle source

@return [Typhoeus::Response]

`self` converted to a `Typhoeus::Request`.
# File lib/arachni/http/request.rb, line 400
def to_typhoeus
    prepare_headers

    if (userpwd = (@username || Options.http.authentication_username))
        if (passwd = (@password || Options.http.authentication_password))
            userpwd += ":#{passwd}"
        end
    end

    max_size = @response_max_size || Options.http.response_max_size
    # Weird I know, for some reason 0 gets ignored.
    max_size = 1   if max_size == 0
    max_size = nil if max_size < 0

    ep = self.class.encode_hash( self.effective_parameters, @raw_parameters )

    eb = self.body
    if eb.is_a?( Hash )
        eb = self.class.encode_hash( eb, @raw_parameters )
    end

    options = {
        method:          method,
        headers:         headers,

        body:            eb,
        params:          ep,

        userpwd:         userpwd,

        followlocation:  follow_location?,
        maxredirs:       @max_redirects,

        ssl_verifypeer:  !!Options.http.ssl_verify_peer,
        ssl_verifyhost:  Options.http.ssl_verify_host ? 2 : 0,
        sslcert:         Options.http.ssl_certificate_filepath,
        sslcerttype:     Options.http.ssl_certificate_type,
        sslkey:          Options.http.ssl_key_filepath,
        sslkeytype:      Options.http.ssl_key_type,
        sslkeypasswd:    Options.http.ssl_key_password,
        cainfo:          Options.http.ssl_ca_filepath,
        capath:          Options.http.ssl_ca_directory,
        sslversion:      Options.http.ssl_version,

        accept_encoding: 'gzip, deflate',
        nosignal:        true,

        # If Content-Length is missing this option will have no effect, so
        # we'll also stream the body to make sure that we can at least abort
        # the reading of the response body if it exceeds this limit.
        maxfilesize:     max_size,

        # Reusing connections for blocking requests used to cause FD leaks
        # but doesn't appear to do so anymore.
        #
        # Let's allow reuse for all request types again but keep an eye on it.
        # forbid_reuse:    blocking?,

        # Enable debugging messages in order to capture raw traffic data.
        verbose:         true,

        # We're going to be escaping **a lot** of the same strings during
        # the scan, so bypass Ethon's encoding and do our own cache-based
        # encoding.
        escape:          false
    }

    options[:timeout_ms] = timeout if timeout

    # This will allow GSS-Negotiate to work out of the box but shouldn't
    # have any adverse effects.
    if !options[:userpwd] && !parsed_url.user
        options[:userpwd]  = ':'
        options[:httpauth] = :gssnegotiate
    else
        options[:httpauth] = Options.http.authentication_type.to_sym
    end

    if proxy
        options.merge!(
            proxy:     proxy,
            proxytype: (proxy_type || :http).to_sym
        )

        if proxy_user_password
            options[:proxyuserpwd] = proxy_user_password
        end

    elsif Options.http.proxy_host && Options.http.proxy_port
        options.merge!(
            proxy:     "#{Options.http.proxy_host}:#{Options.http.proxy_port}",
            proxytype: (Options.http.proxy_type || :http).to_sym
        )

        if Options.http.proxy_username && Options.http.proxy_password
            options[:proxyuserpwd] =
                "#{Options.http.proxy_username}:#{Options.http.proxy_password}"
        end
    end

    typhoeus_request = Typhoeus::Request.new( url.split( '?').first, options )

    aborted = nil

    # Always set this because we'll be streaming most of the time, so we
    # should set @response so that there'll be a response available for the
    # #on_body and #on_body_line callbacks.
    typhoeus_request.on_headers do |typhoeus_response|
        next aborted if aborted

        set_response_data typhoeus_response

        @on_headers.each do |on_header|
            exception_jail false do
                if on_header.call( self.response ) == :abort
                    break aborted = :abort
                end
            end

            next aborted if aborted
        end
    end

    if @on_body.any?
        typhoeus_request.on_body do |chunk|
            next aborted if aborted

            @on_body.each do |b|
                exception_jail false do
                    chunk.recode!
                    if b.call( chunk, self.response ) == :abort
                        break aborted = :abort
                    end
                end
            end

            next aborted if aborted
        end
    end

    if @on_body_line.any?
        line_buffer = ''
        typhoeus_request.on_body do |chunk|
            next aborted if aborted

            chunk.recode!
            line_buffer << chunk

            lines = line_buffer.lines

            @response_body_buffer = nil

            # Incomplete last line, we've either read everything of were cut
            # short, but we can't know which.
            if !lines.last.index( /[\n\r]/, -1 )
                last_line = lines.pop

                # Set it as the generic body buffer in order to be accessible
                # via #on_complete in case this was indeed the end of the
                # response.
                @response_body_buffer = last_line.dup

                # Also push it back to out own buffer in case there's more
                # to read in order to complete the line.
                line_buffer = last_line
            end

            lines.each do |line|
                @on_body_line.each do |b|
                    exception_jail false do
                        if b.call( line, self.response ) == :abort
                            break aborted = :abort
                        end
                    end
                end

                break aborted if aborted
            end

            line_buffer.clear

            next aborted if aborted
        end
    end

    if @on_body_lines.any?
        lines_buffer = ''
        typhoeus_request.on_body do |chunk|
            next aborted if aborted

            chunk.recode!
            lines_buffer << chunk

            lines, middle, remnant = lines_buffer.rpartition( /[\r\n]/ )
            lines << middle

            @response_body_buffer = nil

            # Incomplete last line, we've either read everything of were cut
            # short, but we can't know which.
            if !remnant.empty?
                # Set it as the generic body buffer in order to be accessible
                # via #on_complete in case this was indeed the end of the
                # response.
                @response_body_buffer = remnant.dup

                # Also push it back to out own buffer in case there's more
                # to read in order to complete the line.
                lines_buffer = remnant
            end

            @on_body_lines.each do |b|
                exception_jail false do
                    if b.call( lines, self.response ) == :abort
                        break aborted = :abort
                    end
                end
            end

            next aborted if aborted
        end
    end

    if @on_complete.any?
        # No need to set our own reader in order to enforce max response size
        # if the response is already been read bit by bit via other callbacks.
        if typhoeus_request.options[:maxfilesize] && @on_body.empty? &&
            @on_body_line.empty? && @on_body_lines.empty?

            @response_body_buffer = ''
            set_body_reader( typhoeus_request, @response_body_buffer )
        end

        typhoeus_request.on_complete do |typhoeus_response|
            next aborted if aborted

            # Set either by the default body reader or is a remnant from
            # a user specified callback like #on_body, #on_body_line, etc.
            if @response_body_buffer
                typhoeus_response.options[:response_body] =
                    @response_body_buffer
            end

            set_response_data typhoeus_response

            @on_complete.each do |b|
                exception_jail false do
                    b.call self.response
                end
            end
        end
    end

    typhoeus_request
end
train() click to toggle source

Flags that the response should be analyzed by the {Trainer} for new elements.

# File lib/arachni/http/request.rb, line 373
def train
    @train = true
end
train?() click to toggle source

@return [Bool]

`true` if the {Response} should be analyzed by the {Trainer}
for new elements, `false` otherwise.
# File lib/arachni/http/request.rb, line 363
def train?
    @train
end
update_cookies() click to toggle source

Flags that the {CookieJar} should be updated with the {Response} cookies.

# File lib/arachni/http/request.rb, line 385
def update_cookies
    @update_cookies = true
end
update_cookies?() click to toggle source

@return [Bool]

`true` if the {CookieJar} should be updated with the {Response} cookies,
`false` otherwise.
# File lib/arachni/http/request.rb, line 380
def update_cookies?
    @update_cookies
end

Private Instance Methods

client_run() click to toggle source
# File lib/arachni/http/request.rb, line 832
def client_run
    # Set #on_complete so that the #response will be set.
    on_complete {}

    treq = self.to_typhoeus

    hydra = (Thread.current[:client_run_hydra] ||= Typhoeus::Hydra.new)
    hydra.queue treq
    hydra.run

    self.response
end
fill_in_data_from_typhoeus_response( response ) click to toggle source
# File lib/arachni/http/request.rb, line 845
def fill_in_data_from_typhoeus_response( response )
    # Only grab the last data.
    # In case of CONNECT calls for HTTPS via proxy the first data will be
    # the proxy-related stuff.
    @headers_string = response.debug_info.header_out.last
    @effective_body = response.debug_info.data_out.last
end
set_body_reader( typhoeus_request, buffer ) click to toggle source
# File lib/arachni/http/request.rb, line 853
def set_body_reader( typhoeus_request, buffer )
    return if !typhoeus_request.options[:maxfilesize]

    aborted = nil
    typhoeus_request.on_body do |chunk|
        next aborted if aborted

        if buffer.size >= typhoeus_request.options[:maxfilesize]
            buffer.clear
            next aborted = :abort
        end

        buffer << chunk

        true
    end
end