Gumbo 0.9.2
A C library for parsing HTML.
Loading...
Searching...
No Matches
gumbo.h
Go to the documentation of this file.
1// Copyright 2010 Google Inc. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// Author: jdtang@google.com (Jonathan Tang)
16//
17// We use Gumbo as a prefix for types, gumbo_ as a prefix for functions, and
18// GUMBO_ as a prefix for enum constants (static constants get the Google-style
19// kGumbo prefix).
20
42#ifndef GUMBO_GUMBO_H_
43#define GUMBO_GUMBO_H_
44
45#ifdef _MSC_VER
46#define _CRT_SECURE_NO_WARNINGS
47#define fileno _fileno
48#endif
49
50#include <stdbool.h>
51#include <stddef.h>
52
53#ifdef __cplusplus
54extern "C" {
55#endif
56
67typedef struct {
68 unsigned int line;
69 unsigned int column;
70 unsigned int offset;
72
78
88typedef struct {
90 const char* data;
91
93 size_t length;
95
98
104 const GumboStringPiece* str1, const GumboStringPiece* str2);
105
111 const GumboStringPiece* str1, const GumboStringPiece* str2);
112
122typedef struct {
126 void** data;
127
129 unsigned int length;
130
132 unsigned int capacity;
134
136extern const GumboVector kGumboEmptyVector;
137
142int gumbo_vector_index_of(GumboVector* vector, const void* element);
143
156typedef enum {
157// Load all the tags from an external source, generated from tag.in.
158#include "tag_enum.h"
159 // Used for all tags that don't have special handling in HTML. Add new tags
160 // to the end of tag.in so as to preserve backwards-compatibility.
161 GUMBO_TAG_UNKNOWN,
162 // A marker value to indicate the end of the enum, for iterating over it.
163 // Also used as the terminator for varargs functions that take tags.
164 GUMBO_TAG_LAST,
165} GumboTag;
166
173
185
199
204GumboTag gumbo_tag_enum(const char* tagname);
205GumboTag gumbo_tagn_enum(const char* tagname, unsigned int length);
206
212typedef enum {
213 GUMBO_ATTR_NAMESPACE_NONE,
214 GUMBO_ATTR_NAMESPACE_XLINK,
215 GUMBO_ATTR_NAMESPACE_XML,
216 GUMBO_ATTR_NAMESPACE_XMLNS,
218
224typedef struct {
232
237 const char* name;
238
244
251 const char* value;
252
262
265
272
275
279
285GumboAttribute* gumbo_get_attribute(const GumboVector* attrs, const char* name);
286
291typedef enum {
311
316typedef struct GumboInternalNode GumboNode;
317
321typedef enum {
322 GUMBO_DOCTYPE_NO_QUIRKS,
323 GUMBO_DOCTYPE_QUIRKS,
324 GUMBO_DOCTYPE_LIMITED_QUIRKS
326
334typedef enum {
335 GUMBO_NAMESPACE_HTML,
336 GUMBO_NAMESPACE_SVG,
337 GUMBO_NAMESPACE_MATHML
339
348typedef enum {
354
362
375
376 // Value 1 << 2 was for a flag that has since been removed.
377
383
391
394
397
404
407
410
417
421typedef struct {
427 GumboVector /* GumboNode* */ children;
428
429 // True if there was an explicit doctype token as opposed to it being omitted.
430 bool has_doctype;
431
432 // Fields from the doctype token, copied verbatim.
433 const char* name;
434 const char* public_identifier;
435 const char* system_identifier;
436
443
448typedef struct {
453 const char* text;
454
460
466} GumboText;
467
472typedef struct {
477 GumboVector /* GumboNode* */ children;
478
481
484
492
499
502
505
510 GumboVector /* GumboAttribute* */ attributes;
512
520
522 GumboNode* parent;
523
526
533
535 union {
536 GumboDocument document; // For GUMBO_NODE_DOCUMENT.
537 GumboElement element; // For GUMBO_NODE_ELEMENT.
538 GumboText text; // For everything else.
539 } v;
540};
541
548// TODO(jdtang): Add checks throughout the codebase for out-of-memory condition.
549typedef void* (*GumboAllocatorFunction)(void* userdata, size_t size);
550
555typedef void (*GumboDeallocatorFunction)(void* userdata, void* ptr);
556
563typedef struct GumboInternalOptions {
566
569
574 void* userdata;
575
581
587
596
611
620
623
625typedef struct GumboInternalOutput {
630 GumboNode* document;
631
636 GumboNode* root;
637
645 GumboVector /* GumboError */ errors;
647
655GumboOutput* gumbo_parse(const char* buffer);
656
662 const GumboOptions* options, const char* buffer, size_t buffer_length);
663
665void gumbo_destroy_output(const GumboOptions* options, GumboOutput* output);
666
667#ifdef __cplusplus
668}
669#endif
670
671#endif // GUMBO_GUMBO_H_
GumboAttributeNamespaceEnum
Definition gumbo.h:212
void(* GumboDeallocatorFunction)(void *userdata, void *ptr)
Definition gumbo.h:555
GumboOutput * gumbo_parse(const char *buffer)
int gumbo_vector_index_of(GumboVector *vector, const void *element)
const char * gumbo_normalize_svg_tagname(const GumboStringPiece *tagname)
GumboNamespaceEnum
Definition gumbo.h:334
GumboTag
Definition gumbo.h:156
void *(* GumboAllocatorFunction)(void *userdata, size_t size)
Definition gumbo.h:549
bool gumbo_string_equals_ignore_case(const GumboStringPiece *str1, const GumboStringPiece *str2)
GumboNodeType
Definition gumbo.h:291
@ GUMBO_NODE_CDATA
Definition gumbo.h:299
@ GUMBO_NODE_TEXT
Definition gumbo.h:297
@ GUMBO_NODE_DOCUMENT
Definition gumbo.h:293
@ GUMBO_NODE_WHITESPACE
Definition gumbo.h:303
@ GUMBO_NODE_COMMENT
Definition gumbo.h:301
@ GUMBO_NODE_TEMPLATE
Definition gumbo.h:309
@ GUMBO_NODE_ELEMENT
Definition gumbo.h:295
GumboParseFlags
Definition gumbo.h:348
@ GUMBO_INSERTION_ADOPTION_AGENCY_CLONED
Definition gumbo.h:406
@ GUMBO_INSERTION_NORMAL
Definition gumbo.h:353
@ GUMBO_INSERTION_CONVERTED_FROM_END_TAG
Definition gumbo.h:390
@ GUMBO_INSERTION_IMPLICIT_END_TAG
Definition gumbo.h:374
@ GUMBO_INSERTION_FOSTER_PARENTED
Definition gumbo.h:415
@ GUMBO_INSERTION_RECONSTRUCTED_FORMATTING_ELEMENT
Definition gumbo.h:403
@ GUMBO_INSERTION_FROM_ISINDEX
Definition gumbo.h:393
@ GUMBO_INSERTION_ADOPTION_AGENCY_MOVED
Definition gumbo.h:409
@ GUMBO_INSERTION_BY_PARSER
Definition gumbo.h:361
@ GUMBO_INSERTION_IMPLIED
Definition gumbo.h:382
@ GUMBO_INSERTION_FROM_IMAGE
Definition gumbo.h:396
void gumbo_destroy_output(const GumboOptions *options, GumboOutput *output)
GumboTag gumbo_tag_enum(const char *tagname)
bool gumbo_string_equals(const GumboStringPiece *str1, const GumboStringPiece *str2)
const char * gumbo_normalized_tagname(GumboTag tag)
const GumboStringPiece kGumboEmptyString
const GumboVector kGumboEmptyVector
GumboQuirksModeEnum
Definition gumbo.h:321
const GumboSourcePosition kGumboEmptySourcePosition
void gumbo_tag_from_original_text(GumboStringPiece *text)
GumboAttribute * gumbo_get_attribute(const GumboVector *attrs, const char *name)
GumboOutput * gumbo_parse_with_options(const GumboOptions *options, const char *buffer, size_t buffer_length)
const GumboOptions kGumboDefaultOptions
Definition gumbo.h:224
GumboSourcePosition name_start
Definition gumbo.h:264
GumboSourcePosition name_end
Definition gumbo.h:271
GumboStringPiece original_value
Definition gumbo.h:261
GumboSourcePosition value_end
Definition gumbo.h:277
GumboStringPiece original_name
Definition gumbo.h:243
GumboAttributeNamespaceEnum attr_namespace
Definition gumbo.h:231
GumboSourcePosition value_start
Definition gumbo.h:274
const char * name
Definition gumbo.h:237
const char * value
Definition gumbo.h:251
Definition gumbo.h:421
GumboQuirksModeEnum doc_type_quirks_mode
Definition gumbo.h:441
GumboVector children
Definition gumbo.h:427
Definition gumbo.h:472
GumboStringPiece original_end_tag
Definition gumbo.h:498
GumboNamespaceEnum tag_namespace
Definition gumbo.h:483
GumboSourcePosition start_pos
Definition gumbo.h:501
GumboVector attributes
Definition gumbo.h:510
GumboTag tag
Definition gumbo.h:480
GumboVector children
Definition gumbo.h:477
GumboStringPiece original_tag
Definition gumbo.h:491
GumboSourcePosition end_pos
Definition gumbo.h:504
Definition gumbo.h:517
GumboNode * parent
Definition gumbo.h:522
GumboNodeType type
Definition gumbo.h:519
GumboParseFlags parse_flags
Definition gumbo.h:532
size_t index_within_parent
Definition gumbo.h:525
union GumboInternalNode::@0 v
Definition gumbo.h:563
GumboTag fragment_context
Definition gumbo.h:610
bool stop_on_first_error
Definition gumbo.h:586
GumboAllocatorFunction allocator
Definition gumbo.h:565
void * userdata
Definition gumbo.h:574
GumboDeallocatorFunction deallocator
Definition gumbo.h:568
int tab_stop
Definition gumbo.h:580
GumboNamespaceEnum fragment_namespace
Definition gumbo.h:618
int max_errors
Definition gumbo.h:595
Definition gumbo.h:625
GumboNode * root
Definition gumbo.h:636
GumboVector errors
Definition gumbo.h:645
GumboNode * document
Definition gumbo.h:630
Definition gumbo.h:67
Definition gumbo.h:88
const char * data
Definition gumbo.h:90
size_t length
Definition gumbo.h:93
Definition gumbo.h:448
GumboStringPiece original_text
Definition gumbo.h:459
const char * text
Definition gumbo.h:453
GumboSourcePosition start_pos
Definition gumbo.h:465
Definition gumbo.h:122
void ** data
Definition gumbo.h:126
unsigned int capacity
Definition gumbo.h:132
unsigned int length
Definition gumbo.h:129