class Kcar::Parser
Constants
- CHUNK_MAX
The maximum size a single chunk when using chunked transfer encoding. This is only a theoretical maximum used to detect errors in clients, it is highly unlikely to encounter clients that send more than several kilobytes at once.
- LENGTH_MAX
The maximum size of the body as specified by Content-Length. This is only a theoretical maximum, the actual limit is subject to the limits of the file system used for
Dir.tmpdir
.
Public Class Methods
Creates a new parser.
call-seq:
parser.reset => parser
Resets the parser so it can be reused by another client
static VALUE initialize(VALUE self) { http_parser_init(data_get(self)); return self; }
Public Instance Methods
Returns the number of bytes left to run through Parser#filter_body
. This will initially be the value of the “Content-Length” HTTP header after header parsing is complete and will decrease in value as Parser#filter_body
is called for each chunk. This should return zero for responses with no body.
This will return nil on “Transfer-Encoding: chunked” responses as well as HTTP/1.0 responses where Content-Length is not set
static VALUE body_bytes_left(VALUE self) { struct http_parser *hp = data_get(self); if (hp->chunked) return Qnil; if (hp->len.content >= 0) return OFFT2NUM(hp->len.content); return Qnil; }
Sets the number of bytes left to download for HTTP responses with “Content-Length”. This raises RuntimeError for chunked responses.
static VALUE body_bytes_left_set(VALUE self, VALUE bytes) { struct http_parser *hp = data_get(self); if (hp->chunked) rb_raise(rb_eRuntimeError, "body_bytes_left= is not for chunked bodies"); hp->len.content = NUM2OFFT(bytes); if (hp->len.content == 0) hp->body_eof_seen = 1; return bytes; }
Detects if we're done filtering the body or not. This can be used to detect when to stop calling Parser#filter_body
.
static VALUE body_eof(VALUE self) { struct http_parser *hp = data_get(self); if (!hp->has_header && hp->persistent) return Qtrue; if (hp->chunked) return chunked_eof(hp) ? Qtrue : Qfalse; if (!hp->has_body) return Qtrue; return hp->len.content == 0 ? Qtrue : Qfalse; }
This is used to detect if a response uses chunked Transfer-Encoding or not.
static VALUE chunked(VALUE self) { struct http_parser *hp = data_get(self); return hp->chunked ? Qtrue : Qfalse; }
extract trailers that were set in the header object as an array of arrays
parser.extract_trailers(hdr) => [ [ 'Content-MD5', '1B2M2Y8AsgTpgAmY7PhCfg==' ] ]
# File lib/kcar/parser.rb, line 8 def extract_trailers(hdr) trailers = [] if hdr.kind_of?(Array) t = {} # the HTTP spec (and our parser) guarantees trailers will appear # after the "Trailer" header is inserted in the array hdr.each do |key, value| if key =~ %r{\ATrailer\z}i value.split(/\s*,+\s*/).each do |k| t[k] = true end elsif !t.empty? && key =~ /\A(#{t.keys.join('|')})\z/i k = $1 trailers.concat(value.split(/\n+/).map! { |v| [ k, v ] }) end end elsif t = hdr['Trailer'] t.split(/\s*[,\n]+\s*/).each do |k| value = hdr[k] or next trailers.concat(value.split(/\n+/).map! { |v| [ k, v ] }) end end trailers end
Takes a String of src
, will modify src if dechunking is done. Returns nil
if there is more src
left to process. Returns dst
if body processing is complete. When returning dst
, it may modify src
so the start of the string points to where the body ended so that trailer processing can begin.
Raises ParserError if there are dechunking errors. Basically this is a glorified memcpy(3) that copies src
into dst
while filtering it through the dechunker.
static VALUE filter_body(VALUE self, VALUE dst, VALUE src) { struct http_parser *hp = data_get(self); char *sptr; long slen; sptr = RSTRING_PTR(src); slen = RSTRING_LEN(src); check_buffer_size(slen); StringValue(dst); rb_str_modify(dst); OBJ_TAINT(dst); /* keep weirdo $SAFE users happy */ /* * for now, only support filter_body for identity requests, * not responses; it's rather inefficient to blindly memcpy * giant request bodies; on the other hand, it simplifies * server-side code. */ if (hp->is_request && !hp->chunked) { /* no need to enter the Ragel machine for unchunked transfers */ assert(hp->len.content >= 0 && "negative Content-Length"); if (hp->len.content > 0) { long nr = MIN(slen, hp->len.content); rb_str_resize(dst, nr); memcpy(RSTRING_PTR(dst), sptr, nr); hp->len.content -= nr; if (hp->len.content == 0) hp->body_eof_seen = 1; advance_str(src, nr); } return dst; } if (!hp->chunked) rb_raise(rb_eRuntimeError, "filter_body is only for chunked bodies"); rb_str_resize(dst, slen); /* we can never copy more than slen bytes */ if (!chunked_eof(hp)) { hp->s.dest_offset = 0; http_parser_execute(hp, dst, sptr, slen); if (hp->cs == http_parser_error) rb_raise(eParserError, "Invalid HTTP format, parsing fails."); assert(hp->s.dest_offset <= hp->offset && "destination buffer overflow"); advance_str(src, hp->offset); rb_str_set_len(dst, hp->s.dest_offset); if (RSTRING_LEN(dst) == 0 && chunked_eof(hp)) { assert(hp->len.chunk == 0 && "chunk at EOF but more to parse"); } else { dst = Qnil; } } hp->offset = 0; /* for trailer parsing */ return dst; }
Takes a Hash and a String of data, parses the String of data filling in the Hash returning the Hash if parsing is finished, nil otherwise When returning the hdr Hash, it may modify data to point to where body processing should begin.
Raises ParserError if there are parsing errors.
static VALUE headers(VALUE self, VALUE hdr, VALUE data) { struct http_parser *hp = data_get(self); if (hp->is_request) rb_raise(rb_eRuntimeError, "parser is handling a request, not response"); parser_execute(hp, hdr, data); VALIDATE_MAX_LENGTH(hp->offset, HEADER); if (hp->cs == http_parser_first_final || hp->cs == http_parser_en_ChunkedBody) { advance_str(data, hp->offset + 1); hp->offset = 0; if (hp->in_trailer) return hdr; else return rb_ary_new3(2, hp->v.status, hdr); } return Qnil; }
This should be used to detect if a request can really handle keepalives and pipelining. Currently, the rules are:
-
MUST be HTTP/1.1
or
HTTP/1.0 with “Connection: keep-alive” -
MUST NOT have “Connection: close” set
-
If there is a response body, either a) Content-Length is set or b) chunked encoding is used
static VALUE keepalive(VALUE self) { struct http_parser *hp = data_get(self); if (hp->persistent) { if (hp->has_header && hp->has_body) { if (hp->chunked || (hp->len.content >= 0)) { if (!hp->is_request) return Qtrue; else return hp->body_eof_seen ? Qtrue : Qfalse; } /* unknown Content-Length and not chunked, we must assume close */ return Qfalse; } else { /* 100 Continue, 304 Not Modified, etc... */ return Qtrue; } } return Qfalse; }
static VALUE request(VALUE self, VALUE env, VALUE buf) { struct http_parser *hp = data_get(self); hp->is_request = 1; Check_Type(buf, T_STRING); parser_execute(hp, env, buf); if (hp->cs == http_parser_first_final || hp->cs == http_parser_en_ChunkedBody) { advance_str(buf, hp->offset + 1); hp->offset = 0; if (hp->in_trailer) hp->body_eof_seen = 1; return env; } return Qnil; /* incomplete */ }
Resets the parser so it can be reused by another client
static VALUE initialize(VALUE self) { http_parser_init(data_get(self)); return self; }
Takes a Hash and a String of data, parses the String of data filling in the Hash returning the Hash if parsing is finished, nil otherwise When returning the hdr Hash, it may modify data to point to where body processing should begin.
Raises ParserError if there are parsing errors.