module Gumbo

Public Class Methods

Gumbo::parse(input) {|document| ...} click to toggle source
Gumbo::parse(input) → document

Parse a HTML document from a string. If the document cannot be created, a runtime error is raised.

The input string must be UTF-8 encoded.

VALUE
r_gumbo_parse(VALUE module, VALUE input) {
    GumboOutput *output;
    GumboDocument *document;
    VALUE r_document, r_root;
    VALUE result;

    rb_check_type(input, T_STRING);

    if (rb_enc_get_index(input) != rb_utf8_encindex())
        rb_raise(rb_eArgError, "input is not UTF-8 encoded");

    output = gumbo_parse_with_options(&kGumboDefaultOptions,
                                      StringValueCStr(input),
                                      RSTRING_LEN(input));
    if (!output)
        rb_raise(rb_eRuntimeError, "cannot parse input");

    r_document = rb_ensure(r_gumbo_node_to_value, (VALUE)output->document,
                           r_gumbo_destroy_output, (VALUE)output);

    if (rb_block_given_p()) {
        result = rb_yield(r_document);
    } else {
        result = r_document;
    }

    return result;
}