module Nokogumbo
Constants
- DEFAULT_MAX_ATTRIBUTES
The default maximum number of attributes per element.
- DEFAULT_MAX_ERRORS
The default maximum number of errors for parsing a document or a fragment.
- DEFAULT_MAX_TREE_DEPTH
The default maximum depth of the DOM tree produced by parsing a document or fragment.
- LINE_SUPPORTED
- VERSION
Public Class Methods
fragment(p1, p2, p3, p4, p5, p6)
click to toggle source
static VALUE fragment ( VALUE self, VALUE doc_fragment, VALUE tags, VALUE ctx, VALUE max_attributes, VALUE max_errors, VALUE max_depth ) { ID name = rb_intern_const("name"); const char *ctx_tag; GumboNamespaceEnum ctx_ns; GumboQuirksModeEnum quirks_mode; bool form = false; const char *encoding = NULL; if (NIL_P(ctx)) { ctx_tag = "body"; ctx_ns = GUMBO_NAMESPACE_HTML; } else if (TYPE(ctx) == T_STRING) { ctx_tag = StringValueCStr(ctx); ctx_ns = GUMBO_NAMESPACE_HTML; size_t len = RSTRING_LEN(ctx); const char *colon = memchr(ctx_tag, ':', len); if (colon) { switch (colon - ctx_tag) { case 3: if (st_strncasecmp(ctx_tag, "svg", 3) != 0) goto error; ctx_ns = GUMBO_NAMESPACE_SVG; break; case 4: if (st_strncasecmp(ctx_tag, "html", 4) == 0) ctx_ns = GUMBO_NAMESPACE_HTML; else if (st_strncasecmp(ctx_tag, "math", 4) == 0) ctx_ns = GUMBO_NAMESPACE_MATHML; else goto error; break; default: error: rb_raise(rb_eArgError, "Invalid context namespace '%*s'", (int)(colon - ctx_tag), ctx_tag); } ctx_tag = colon+1; } else { // For convenience, put 'svg' and 'math' in their namespaces. if (len == 3 && st_strncasecmp(ctx_tag, "svg", 3) == 0) ctx_ns = GUMBO_NAMESPACE_SVG; else if (len == 4 && st_strncasecmp(ctx_tag, "math", 4) == 0) ctx_ns = GUMBO_NAMESPACE_MATHML; } // Check if it's a form. form = ctx_ns == GUMBO_NAMESPACE_HTML && st_strcasecmp(ctx_tag, "form") == 0; } else { ID element_ = rb_intern_const("element?"); // Context fragment name. VALUE tag_name = rb_funcall(ctx, name, 0); assert(RTEST(tag_name)); Check_Type(tag_name, T_STRING); ctx_tag = StringValueCStr(tag_name); // Context fragment namespace. ctx_ns = lookup_namespace(ctx, true); // Check for a form ancestor, including self. for (VALUE node = ctx; !NIL_P(node); node = rb_respond_to(node, parent) ? rb_funcall(node, parent, 0) : Qnil) { if (!RTEST(rb_funcall(node, element_, 0))) continue; VALUE element_name = rb_funcall(node, name, 0); if (RSTRING_LEN(element_name) == 4 && !st_strcasecmp(RSTRING_PTR(element_name), "form") && lookup_namespace(node, false) == GUMBO_NAMESPACE_HTML) { form = true; break; } } // Encoding. if (RSTRING_LEN(tag_name) == 14 && !st_strcasecmp(ctx_tag, "annotation-xml")) { VALUE enc = rb_funcall(ctx, rb_intern_const("[]"), rb_utf8_str_new_static("encoding", 8)); if (RTEST(enc)) { Check_Type(enc, T_STRING); encoding = StringValueCStr(enc); } } } // Quirks mode. VALUE doc = rb_funcall(doc_fragment, rb_intern_const("document"), 0); VALUE dtd = rb_funcall(doc, internal_subset, 0); if (NIL_P(dtd)) { quirks_mode = GUMBO_DOCTYPE_NO_QUIRKS; } else { VALUE dtd_name = rb_funcall(dtd, name, 0); VALUE pubid = rb_funcall(dtd, rb_intern_const("external_id"), 0); VALUE sysid = rb_funcall(dtd, rb_intern_const("system_id"), 0); quirks_mode = gumbo_compute_quirks_mode ( NIL_P(dtd_name)? NULL:StringValueCStr(dtd_name), NIL_P(pubid)? NULL:StringValueCStr(pubid), NIL_P(sysid)? NULL:StringValueCStr(sysid) ); } // Perform a fragment parse. int depth = NUM2INT(max_depth); GumboOptions options = kGumboDefaultOptions; options.max_attributes = NUM2INT(max_attributes); options.max_errors = NUM2INT(max_errors); // Add one to account for the HTML element. options.max_tree_depth = depth < 0 ? -1 : (depth + 1); options.fragment_context = ctx_tag; options.fragment_namespace = ctx_ns; options.fragment_encoding = encoding; options.quirks_mode = quirks_mode; options.fragment_context_has_form_ancestor = form; GumboOutput *output = perform_parse(&options, tags); ParseArgs args = { .output = output, .input = tags, .url_or_frag = doc_fragment, .doc = (xmlDocPtr)extract_xml_node(doc), }; VALUE parse_args = wrap_parse_args(&args); rb_ensure(fragment_continue, parse_args, parse_cleanup, parse_args); return Qnil; }
parse(p1, p2, p3, p4, p5)
click to toggle source
static VALUE parse(VALUE self, VALUE input, VALUE url, VALUE max_attributes, VALUE max_errors, VALUE max_depth) { GumboOptions options = kGumboDefaultOptions; options.max_attributes = NUM2INT(max_attributes); options.max_errors = NUM2INT(max_errors); options.max_tree_depth = NUM2INT(max_depth); GumboOutput *output = perform_parse(&options, input); ParseArgs args = { .output = output, .input = input, .url_or_frag = url, .doc = NIL, }; VALUE parse_args = wrap_parse_args(&args); return rb_ensure(parse_continue, parse_args, parse_cleanup, parse_args); }