<html>
<head> <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/> <title>HTMLparser: interface for an HTML 4.0 non-verifying parser</title> <meta name="generator" content="Libxml2 devhelp stylesheet"/> <link rel="start" href="index.html" title="libxml2 Reference Manual"/> <link rel="up" href="general.html" title="API"/> <link rel="stylesheet" href="style.css" type="text/css"/> <link rel="chapter" href="general.html" title="API"/> </head> <body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"> <table class="navigation" width="100%" summary="Navigation header" cellpadding="2" cellspacing="2"> <tr valign="middle"> <td> <a accesskey="p" href="libxml2-DOCBparser.html"> <img src="left.png" width="24" height="24" border="0" alt="Prev"/> </a> </td> <td> <a accesskey="u" href="general.html"> <img src="up.png" width="24" height="24" border="0" alt="Up"/> </a> </td> <td> <a accesskey="h" href="index.html"> <img src="home.png" width="24" height="24" border="0" alt="Home"/> </a> </td> <td> <a accesskey="n" href="libxml2-HTMLtree.html"> <img src="right.png" width="24" height="24" border="0" alt="Next"/> </a> </td> <th width="100%" align="center">libxml2 Reference Manual</th> </tr> </table> <h2> <span class="refentrytitle">HTMLparser</span> </h2> <p>HTMLparser - interface for an HTML 4.0 non-verifying parser</p> <p>this module implements an HTML 4.0 non-verifying parser with API compatible with the XML parser ones. It should be able to parse "real world" HTML, even if severely broken from a specification point of view. </p> <p>Author(s): Daniel Veillard </p> <div class="refsynopsisdiv"> <h2>Synopsis</h2> <pre class="synopsis">#define <a href="#htmlDefaultSubelement">htmlDefaultSubelement</a>(elt);
define <a href=“#htmlElementAllowedHereDesc”>htmlElementAllowedHereDesc</a>(parent, elt); define <a href=“#htmlRequiredAttrs”>htmlRequiredAttrs</a>(elt); typedef <a href=“libxml2-parser.html#xmlParserNodeInfo”>xmlParserNodeInfo</a> <a href=“#htmlParserNodeInfo”>htmlParserNodeInfo</a>; typedef <a href=“libxml2-tree.html#xmlParserInput”>xmlParserInput</a> <a href=“#htmlParserInput”>htmlParserInput</a>; typedef <a href=“libxml2-tree.html#xmlParserCtxtPtr”>xmlParserCtxtPtr</a> <a href=“#htmlParserCtxtPtr”>htmlParserCtxtPtr</a>; typedef struct _htmlEntityDesc <a href=“#htmlEntityDesc”>htmlEntityDesc</a>; typedef <a href=“libxml2-tree.html#xmlDocPtr”>xmlDocPtr</a> <a href=“#htmlDocPtr”>htmlDocPtr</a>; typedef <a href=“libxml2-tree.html#xmlSAXHandlerPtr”>xmlSAXHandlerPtr</a> <a href=“#htmlSAXHandlerPtr”>htmlSAXHandlerPtr</a>; typedef enum <a href=“#htmlStatus”>htmlStatus</a>; typedef <a href=“libxml2-tree.html#xmlNodePtr”>xmlNodePtr</a> <a href=“#htmlNodePtr”>htmlNodePtr</a>; typedef <a href=“libxml2-HTMLparser.html#htmlElemDesc”>htmlElemDesc</a> * <a href=“#htmlElemDescPtr”>htmlElemDescPtr</a>; typedef struct _htmlElemDesc <a href=“#htmlElemDesc”>htmlElemDesc</a>; typedef <a href=“libxml2-tree.html#xmlSAXHandler”>xmlSAXHandler</a> <a href=“#htmlSAXHandler”>htmlSAXHandler</a>; typedef <a href=“libxml2-tree.html#xmlParserInputPtr”>xmlParserInputPtr</a> <a href=“#htmlParserInputPtr”>htmlParserInputPtr</a>; typedef enum <a href=“#htmlParserOption”>htmlParserOption</a>; typedef <a href=“libxml2-HTMLparser.html#htmlEntityDesc”>htmlEntityDesc</a> * <a href=“#htmlEntityDescPtr”>htmlEntityDescPtr</a>; typedef <a href=“libxml2-tree.html#xmlParserCtxt”>xmlParserCtxt</a> <a href=“#htmlParserCtxt”>htmlParserCtxt</a>; int <a href=“#htmlIsScriptAttribute”>htmlIsScriptAttribute</a> (const <a href=“libxml2-xmlstring.html#xmlChar”>xmlChar</a> * name); int <a href=“#htmlHandleOmittedElem”>htmlHandleOmittedElem</a> (int val); <a href=“libxml2-HTMLparser.html#htmlDocPtr”>htmlDocPtr</a> <a href=“#htmlReadFd”>htmlReadFd</a> (int fd, <br/> const char * URL, <br/> const char * encoding, <br/> int options); <a href=“libxml2-HTMLparser.html#htmlDocPtr”>htmlDocPtr</a> <a href=“#htmlReadIO”>htmlReadIO</a> (<a href=“libxml2-xmlIO.html#xmlInputReadCallback”>xmlInputReadCallback</a> ioread, <br/> <a href=“libxml2-xmlIO.html#xmlInputCloseCallback”>xmlInputCloseCallback</a> ioclose, <br/> void * ioctx, <br/> const char * URL, <br/> const char * encoding, <br/> int options); <a href=“libxml2-HTMLparser.html#htmlDocPtr”>htmlDocPtr</a> <a href=“#htmlParseFile”>htmlParseFile</a> (const char * filename, <br/> const char * encoding); <a href=“libxml2-HTMLparser.html#htmlDocPtr”>htmlDocPtr</a> <a href=“#htmlCtxtReadDoc”>htmlCtxtReadDoc</a> (<a href=“libxml2-HTMLparser.html#htmlParserCtxtPtr”>htmlParserCtxtPtr</a> ctxt, <br/> const <a href=“libxml2-xmlstring.html#xmlChar”>xmlChar</a> * cur, <br/> const char * URL, <br/> const char * encoding, <br/> int options); int <a href=“#htmlAutoCloseTag”>htmlAutoCloseTag</a> (<a href=“libxml2-HTMLparser.html#htmlDocPtr”>htmlDocPtr</a> doc, <br/> const <a href=“libxml2-xmlstring.html#xmlChar”>xmlChar</a> * name, <br/> <a href=“libxml2-HTMLparser.html#htmlNodePtr”>htmlNodePtr</a> elem); int <a href=“#htmlParseChunk”>htmlParseChunk</a> (<a href=“libxml2-HTMLparser.html#htmlParserCtxtPtr”>htmlParserCtxtPtr</a> ctxt, <br/> const char * chunk, <br/> int size, <br/> int terminate); const <a href=“libxml2-HTMLparser.html#htmlElemDesc”>htmlElemDesc</a> * <a href=“#htmlTagLookup”>htmlTagLookup</a> (const <a href=“libxml2-xmlstring.html#xmlChar”>xmlChar</a> * tag); <a href=“libxml2-HTMLparser.html#htmlParserCtxtPtr”>htmlParserCtxtPtr</a> <a href=“#htmlCreateMemoryParserCtxt”>htmlCreateMemoryParserCtxt</a> (const char * buffer, <br/> int size); void <a href=“#htmlCtxtReset”>htmlCtxtReset</a> (<a href=“libxml2-HTMLparser.html#htmlParserCtxtPtr”>htmlParserCtxtPtr</a> ctxt); int <a href=“#htmlElementAllowedHere”>htmlElementAllowedHere</a> (const <a href=“libxml2-HTMLparser.html#htmlElemDesc”>htmlElemDesc</a> * parent, <br/> const <a href=“libxml2-xmlstring.html#xmlChar”>xmlChar</a> * elt); <a href=“libxml2-HTMLparser.html#htmlDocPtr”>htmlDocPtr</a> <a href=“#htmlCtxtReadIO”>htmlCtxtReadIO</a> (<a href=“libxml2-HTMLparser.html#htmlParserCtxtPtr”>htmlParserCtxtPtr</a> ctxt, <br/> <a href=“libxml2-xmlIO.html#xmlInputReadCallback”>xmlInputReadCallback</a> ioread, <br/> <a href=“libxml2-xmlIO.html#xmlInputCloseCallback”>xmlInputCloseCallback</a> ioclose, <br/> void * ioctx, <br/> const char * URL, <br/> const char * encoding, <br/> int options); <a href=“libxml2-HTMLparser.html#htmlParserCtxtPtr”>htmlParserCtxtPtr</a> <a href=“#htmlCreatePushParserCtxt”>htmlCreatePushParserCtxt</a> (<a href=“libxml2-HTMLparser.html#htmlSAXHandlerPtr”>htmlSAXHandlerPtr</a> sax, <br/> void * user_data, <br/> const char * chunk, <br/> int size, <br/> const char * filename, <br/> <a href=“libxml2-encoding.html#xmlCharEncoding”>xmlCharEncoding</a> enc); <a href=“libxml2-HTMLparser.html#htmlDocPtr”>htmlDocPtr</a> <a href=“#htmlReadMemory”>htmlReadMemory</a> (const char * buffer, <br/> int size, <br/> const char * URL, <br/> const char * encoding, <br/> int options); int <a href=“#htmlIsAutoClosed”>htmlIsAutoClosed</a> (<a href=“libxml2-HTMLparser.html#htmlDocPtr”>htmlDocPtr</a> doc, <br/> <a href=“libxml2-HTMLparser.html#htmlNodePtr”>htmlNodePtr</a> elem); int <a href=“#htmlParseCharRef”>htmlParseCharRef</a> (<a href=“libxml2-HTMLparser.html#htmlParserCtxtPtr”>htmlParserCtxtPtr</a> ctxt); <a href=“libxml2-HTMLparser.html#htmlDocPtr”>htmlDocPtr</a> <a href=“#htmlReadDoc”>htmlReadDoc</a> (const <a href=“libxml2-xmlstring.html#xmlChar”>xmlChar</a> * cur, <br/> const char * URL, <br/> const char * encoding, <br/> int options); int <a href=“#htmlEncodeEntities”>htmlEncodeEntities</a> (unsigned char * out, <br/> int * outlen, <br/> const unsigned char * in, <br/> int * inlen, <br/> int quoteChar); <a href=“libxml2-HTMLparser.html#htmlStatus”>htmlStatus</a> <a href=“#htmlNodeStatus”>htmlNodeStatus</a> (const <a href=“libxml2-HTMLparser.html#htmlNodePtr”>htmlNodePtr</a> node, <br/> int legacy); <a href=“libxml2-HTMLparser.html#htmlStatus”>htmlStatus</a> <a href=“#htmlAttrAllowed”>htmlAttrAllowed</a> (const <a href=“libxml2-HTMLparser.html#htmlElemDesc”>htmlElemDesc</a> * elt, <br/> const <a href=“libxml2-xmlstring.html#xmlChar”>xmlChar</a> * attr, <br/> int legacy); <a href=“libxml2-HTMLparser.html#htmlDocPtr”>htmlDocPtr</a> <a href=“#htmlSAXParseFile”>htmlSAXParseFile</a> (const char * filename, <br/> const char * encoding, <br/> <a href=“libxml2-HTMLparser.html#htmlSAXHandlerPtr”>htmlSAXHandlerPtr</a> sax, <br/> void * userData); const <a href=“libxml2-HTMLparser.html#htmlEntityDesc”>htmlEntityDesc</a> * <a href=“#htmlParseEntityRef”>htmlParseEntityRef</a> (<a href=“libxml2-HTMLparser.html#htmlParserCtxtPtr”>htmlParserCtxtPtr</a> ctxt, <br/> const <a href=“libxml2-xmlstring.html#xmlChar”>xmlChar</a> ** str); <a href=“libxml2-HTMLparser.html#htmlStatus”>htmlStatus</a> <a href=“#htmlElementStatusHere”>htmlElementStatusHere</a> (const <a href=“libxml2-HTMLparser.html#htmlElemDesc”>htmlElemDesc</a> * parent, <br/> const <a href=“libxml2-HTMLparser.html#htmlElemDesc”>htmlElemDesc</a> * elt); const <a href=“libxml2-HTMLparser.html#htmlEntityDesc”>htmlEntityDesc</a> * <a href=“#htmlEntityValueLookup”>htmlEntityValueLookup</a> (unsigned int value); void <a href=“#htmlParseElement”>htmlParseElement</a> (<a href=“libxml2-HTMLparser.html#htmlParserCtxtPtr”>htmlParserCtxtPtr</a> ctxt); int <a href=“#UTF8ToHtml”>UTF8ToHtml</a> (unsigned char * out, <br/> int * outlen, <br/> const unsigned char * in, <br/> int * inlen); const <a href=“libxml2-HTMLparser.html#htmlEntityDesc”>htmlEntityDesc</a> * <a href=“#htmlEntityLookup”>htmlEntityLookup</a> (const <a href=“libxml2-xmlstring.html#xmlChar”>xmlChar</a> * name); void <a href=“#htmlFreeParserCtxt”>htmlFreeParserCtxt</a> (<a href=“libxml2-HTMLparser.html#htmlParserCtxtPtr”>htmlParserCtxtPtr</a> ctxt); <a href=“libxml2-HTMLparser.html#htmlDocPtr”>htmlDocPtr</a> <a href=“#htmlCtxtReadMemory”>htmlCtxtReadMemory</a> (<a href=“libxml2-HTMLparser.html#htmlParserCtxtPtr”>htmlParserCtxtPtr</a> ctxt, <br/> const char * buffer, <br/> int size, <br/> const char * URL, <br/> const char * encoding, <br/> int options); <a href=“libxml2-HTMLparser.html#htmlDocPtr”>htmlDocPtr</a> <a href=“#htmlCtxtReadFd”>htmlCtxtReadFd</a> (<a href=“libxml2-HTMLparser.html#htmlParserCtxtPtr”>htmlParserCtxtPtr</a> ctxt, <br/> int fd, <br/> const char * URL, <br/> const char * encoding, <br/> int options); <a href=“libxml2-HTMLparser.html#htmlDocPtr”>htmlDocPtr</a> <a href=“#htmlReadFile”>htmlReadFile</a> (const char * filename, <br/> const char * encoding, <br/> int options); <a href=“libxml2-HTMLparser.html#htmlDocPtr”>htmlDocPtr</a> <a href=“#htmlCtxtReadFile”>htmlCtxtReadFile</a> (<a href=“libxml2-HTMLparser.html#htmlParserCtxtPtr”>htmlParserCtxtPtr</a> ctxt, <br/> const char * filename, <br/> const char * encoding, <br/> int options); int <a href=“#htmlParseDocument”>htmlParseDocument</a> (<a href=“libxml2-HTMLparser.html#htmlParserCtxtPtr”>htmlParserCtxtPtr</a> ctxt); <a href=“libxml2-HTMLparser.html#htmlParserCtxtPtr”>htmlParserCtxtPtr</a> <a href=“#htmlNewParserCtxt”>htmlNewParserCtxt</a> (void); <a href=“libxml2-HTMLparser.html#htmlDocPtr”>htmlDocPtr</a> <a href=“#htmlSAXParseDoc”>htmlSAXParseDoc</a> (const <a href=“libxml2-xmlstring.html#xmlChar”>xmlChar</a> * cur, <br/> const char * encoding, <br/> <a href=“libxml2-HTMLparser.html#htmlSAXHandlerPtr”>htmlSAXHandlerPtr</a> sax, <br/> void * userData); int <a href=“#htmlCtxtUseOptions”>htmlCtxtUseOptions</a> (<a href=“libxml2-HTMLparser.html#htmlParserCtxtPtr”>htmlParserCtxtPtr</a> ctxt, <br/> int options); <a href=“libxml2-HTMLparser.html#htmlDocPtr”>htmlDocPtr</a> <a href=“#htmlParseDoc”>htmlParseDoc</a> (const <a href=“libxml2-xmlstring.html#xmlChar”>xmlChar</a> * cur, <br/> const char * encoding); </pre>
</div> <div class="refsect1" lang="en"> <h2>Description</h2> </div> <div class="refsect1" lang="en"> <h2>Details</h2> <div class="refsect2" lang="en"> <div class="refsect2" lang="en"><h3><a name="htmlDefaultSubelement">Macro </a>htmlDefaultSubelement</h3><pre class="programlisting">#define <a href="#htmlDefaultSubelement">htmlDefaultSubelement</a>(elt);
</pre><p>Returns the default subelement for this
element</p><div class=“variablelist”><table
border=“0”><col
align=“left”/><tbody><tr><td><span
class=“term”>elt
:</span></td><td>HTML
element</td></tr></tbody></table></div>
</div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlElementAllowedHereDesc">Macro </a>htmlElementAllowedHereDesc</h3><pre class="programlisting">#define <a href="#htmlElementAllowedHereDesc">htmlElementAllowedHereDesc</a>(parent, elt);
</pre><p>Checks whether an HTML element description may be a
direct child of the specified element. Returns 1 if allowed; 0
otherwise.</p><div class=“variablelist”><table
border=“0”><col
align=“left”/><tbody><tr><td><span
class=“term”>parent
:</span></td><td>HTML
parent element</td></tr><tr><td><span
class=“term”>elt
:</span></td><td>HTML
element</td></tr></tbody></table></div>
</div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlRequiredAttrs">Macro </a>htmlRequiredAttrs</h3><pre class="programlisting">#define <a href="#htmlRequiredAttrs">htmlRequiredAttrs</a>(elt);
</pre><p>Returns the attributes required for the specified
element.</p><div class=“variablelist”><table
border=“0”><col
align=“left”/><tbody><tr><td><span
class=“term”>elt
:</span></td><td>HTML
element</td></tr></tbody></table></div>
</div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlDocPtr">Typedef </a>htmlDocPtr</h3><pre class="programlisting"><a href="libxml2-tree.html#xmlDocPtr">xmlDocPtr</a> htmlDocPtr;
</pre><p/> </div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlElemDesc">Structure </a>htmlElemDesc</h3><pre class="programlisting">struct _htmlElemDesc { const char * name : The tag name char startTag : Whether the start tag can be implied char endTag : Whether the end tag can be implied char saveEndTag : Whether the end tag should be saved char empty : Is this an empty element ? char depr : Is this a deprecated element ? char dtd : 1: only in Loose DTD, 2: only Frameset one char isinline : is this a block 0 or inline 1 element const char * desc : the description NRK Jan.2003 * New fields encapsulating HTML structur const char ** subelts : allowed sub-elements of this element const char * defaultsubelt : subelement for suggested auto-repair if necessary or NULL const char ** attrs_opt : Optional Attributes const char ** attrs_depr : Additional deprecated attributes const char ** attrs_req : Required attributes
} htmlElemDesc; </pre><p/> </div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlElemDescPtr">Typedef </a>htmlElemDescPtr</h3><pre class="programlisting"><a href="libxml2-HTMLparser.html#htmlElemDesc">htmlElemDesc</a> * htmlElemDescPtr;
</pre><p/> </div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlEntityDesc">Structure </a>htmlEntityDesc</h3><pre class="programlisting">struct _htmlEntityDesc { unsigned int value : the UNICODE value for the character const char * name : The entity name const char * desc : the description
} htmlEntityDesc; </pre><p/> </div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlEntityDescPtr">Typedef </a>htmlEntityDescPtr</h3><pre class="programlisting"><a href="libxml2-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a> * htmlEntityDescPtr;
</pre><p/> </div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlNodePtr">Typedef </a>htmlNodePtr</h3><pre class="programlisting"><a href="libxml2-tree.html#xmlNodePtr">xmlNodePtr</a> htmlNodePtr;
</pre><p/> </div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlParserCtxt">Typedef </a>htmlParserCtxt</h3><pre class="programlisting"><a href="libxml2-tree.html#xmlParserCtxt">xmlParserCtxt</a> htmlParserCtxt;
</pre><p/> </div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlParserCtxtPtr">Typedef </a>htmlParserCtxtPtr</h3><pre class="programlisting"><a href="libxml2-tree.html#xmlParserCtxtPtr">xmlParserCtxtPtr</a> htmlParserCtxtPtr;
</pre><p/> </div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlParserInput">Typedef </a>htmlParserInput</h3><pre class="programlisting"><a href="libxml2-tree.html#xmlParserInput">xmlParserInput</a> htmlParserInput;
</pre><p/> </div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlParserInputPtr">Typedef </a>htmlParserInputPtr</h3><pre class="programlisting"><a href="libxml2-tree.html#xmlParserInputPtr">xmlParserInputPtr</a> htmlParserInputPtr;
</pre><p/> </div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlParserNodeInfo">Typedef </a>htmlParserNodeInfo</h3><pre class="programlisting"><a href="libxml2-parser.html#xmlParserNodeInfo">xmlParserNodeInfo</a> htmlParserNodeInfo;
</pre><p/> </div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlParserOption">Enum </a>htmlParserOption</h3><pre class="programlisting">enum <a href="#htmlParserOption">htmlParserOption</a> { <a name="HTML_PARSE_RECOVER">HTML_PARSE_RECOVER</a> = 1 Relaxed parsing <a name="HTML_PARSE_NODEFDTD">HTML_PARSE_NODEFDTD</a> = 4 do not default a doctype if not found <a name="HTML_PARSE_NOERROR">HTML_PARSE_NOERROR</a> = 32 /* suppress error reports */ <a name="HTML_PARSE_NOWARNING">HTML_PARSE_NOWARNING</a> = 64 /* suppress warning reports */ <a name="HTML_PARSE_PEDANTIC">HTML_PARSE_PEDANTIC</a> = 128 /* pedantic error reporting */ <a name="HTML_PARSE_NOBLANKS">HTML_PARSE_NOBLANKS</a> = 256 /* remove blank nodes */ <a name="HTML_PARSE_NONET">HTML_PARSE_NONET</a> = 2048 /* Forbid network access */ <a name="HTML_PARSE_NOIMPLIED">HTML_PARSE_NOIMPLIED</a> = 8192 /* Do not add implied html/body... elements */ <a name="HTML_PARSE_COMPACT">HTML_PARSE_COMPACT</a> = 65536 /* compact small text nodes */ <a name="HTML_PARSE_IGNORE_ENC">HTML_PARSE_IGNORE_ENC</a> = 2097152 /* ignore internal document encoding hint */
}; </pre><p/> </div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlSAXHandler">Typedef </a>htmlSAXHandler</h3><pre class="programlisting"><a href="libxml2-tree.html#xmlSAXHandler">xmlSAXHandler</a> htmlSAXHandler;
</pre><p/> </div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlSAXHandlerPtr">Typedef </a>htmlSAXHandlerPtr</h3><pre class="programlisting"><a href="libxml2-tree.html#xmlSAXHandlerPtr">xmlSAXHandlerPtr</a> htmlSAXHandlerPtr;
</pre><p/> </div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlStatus">Enum </a>htmlStatus</h3><pre class="programlisting">enum <a href="#htmlStatus">htmlStatus</a> { <a name="HTML_NA">HTML_NA</a> = 0 /* something we don't check at all */ <a name="HTML_INVALID">HTML_INVALID</a> = 1 <a name="HTML_DEPRECATED">HTML_DEPRECATED</a> = 2 <a name="HTML_VALID">HTML_VALID</a> = 4 <a name="HTML_REQUIRED">HTML_REQUIRED</a> = 12 /* VALID bit set so ( & HTML_VALID ) is TRUE */
}; </pre><p/> </div>
<hr/> <div class="refsect2" lang="en"><h3><a name="UTF8ToHtml"/>UTF8ToHtml ()</h3><pre class="programlisting">int UTF8ToHtml (unsigned char * out, <br/> int * outlen, <br/> const unsigned char * in, <br/> int * inlen)<br/>
</pre><p>Take a block of UTF-8 chars in and try to convert it
to an ASCII plus HTML entities block of chars out.</p> <div
class=“variablelist”><table border=“0”><col
align=“left”/><tbody><tr><td><span
class=“term”>out
:</span></td><td>a
pointer to an array of bytes to store the
result</td></tr><tr><td><span
class=“term”>outlen
:</span></td><td>the
length of @out</td></tr><tr><td><span
class=“term”>in
:</span></td><td>a
pointer to an array of UTF-8
chars</td></tr><tr><td><span
class=“term”>inlen
:</span></td><td>the
length of @in</td></tr><tr><td><span
class=“term”>Returns
:</span></td><td>0
if success, -2 if the transcoding fails, or -1 otherwise The value of
@inlen after return is the number of octets consumed as the return value is
positive, else unpredictable. The value of @outlen after return is the
number of octets
consumed.</td></tr></tbody></table></div></div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlAttrAllowed"/>htmlAttrAllowed ()</h3><pre class="programlisting"><a href="libxml2-HTMLparser.html#htmlStatus">htmlStatus</a> htmlAttrAllowed (const <a href="libxml2-HTMLparser.html#htmlElemDesc">htmlElemDesc</a> * elt, <br/> const <a href="libxml2-xmlstring.html#xmlChar">xmlChar</a> * attr, <br/> int legacy)<br/>
</pre><p>Checks whether an <a
href=“libxml2-SAX.html#attribute”>attribute</a> is valid for an
element Has full knowledge of Required and Deprecated attributes</p>
<div class=“variablelist”><table border=“0”><col
align=“left”/><tbody><tr><td><span
class=“term”>elt
:</span></td><td>HTML
element</td></tr><tr><td><span
class=“term”>attr
:</span></td><td>HTML
<a
href=“libxml2-SAX.html#attribute”>attribute</a></td></tr><tr><td><span
class=“term”>legacy
:</span></td><td>whether
to allow deprecated
attributes</td></tr><tr><td><span
class=“term”>Returns
:</span></td><td>one
of HTML_REQUIRED, HTML_VALID, HTML_DEPRECATED, <a
href=“libxml2-HTMLparser.html#HTML_INVALID”>HTML_INVALID</a></td></tr></tbody></table></div></div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlAutoCloseTag"/>htmlAutoCloseTag ()</h3><pre class="programlisting">int htmlAutoCloseTag (<a href="libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> doc, <br/> const <a href="libxml2-xmlstring.html#xmlChar">xmlChar</a> * name, <br/> <a href="libxml2-HTMLparser.html#htmlNodePtr">htmlNodePtr</a> elem)<br/>
</pre><p>The HTML DTD allows a tag to implicitly close other
tags. The list is kept in htmlStartClose array. This function checks if the
element or one of it's children would autoclose the given
tag.</p> <div class=“variablelist”><table
border=“0”><col
align=“left”/><tbody><tr><td><span
class=“term”>doc
:</span></td><td>the
HTML document</td></tr><tr><td><span
class=“term”>name
:</span></td><td>The
tag name</td></tr><tr><td><span
class=“term”>elem
:</span></td><td>the
HTML element</td></tr><tr><td><span
class=“term”>Returns
:</span></td><td>1
if autoclose, 0
otherwise</td></tr></tbody></table></div></div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlCreateMemoryParserCtxt"/>htmlCreateMemoryParserCtxt ()</h3><pre class="programlisting"><a href="libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> htmlCreateMemoryParserCtxt (const char * buffer, <br/> int size)<br/>
</pre><p>Create a parser context for an HTML in-memory
document.</p> <div class=“variablelist”><table
border=“0”><col
align=“left”/><tbody><tr><td><span
class=“term”>buffer
:</span></td><td>a
pointer to a char array</td></tr><tr><td><span
class=“term”>size
:</span></td><td>the
size of the array</td></tr><tr><td><span
class=“term”>Returns
:</span></td><td>the
new parser context or
NULL</td></tr></tbody></table></div></div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlCreatePushParserCtxt"/>htmlCreatePushParserCtxt ()</h3><pre class="programlisting"><a href="libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> htmlCreatePushParserCtxt (<a href="libxml2-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a> sax, <br/> void * user_data, <br/> const char * chunk, <br/> int size, <br/> const char * filename, <br/> <a href="libxml2-encoding.html#xmlCharEncoding">xmlCharEncoding</a> enc)<br/>
</pre><p>Create a parser context for using the HTML parser in
push mode The value of @filename is used for fetching external entities and
error/warning reports.</p> <div class=“variablelist”><table
border=“0”><col
align=“left”/><tbody><tr><td><span
class=“term”>sax
:</span></td><td>a
SAX handler</td></tr><tr><td><span
class=“term”>user_data
:</span></td><td>The
user data returned on SAX
callbacks</td></tr><tr><td><span
class=“term”>chunk
:</span></td><td>a
pointer to an array of
chars</td></tr><tr><td><span
class=“term”>size
:</span></td><td>number
of chars in the array</td></tr><tr><td><span
class=“term”>filename
:</span></td><td>an
optional file name or URI</td></tr><tr><td><span
class=“term”>enc
:</span></td><td>an
optional encoding</td></tr><tr><td><span
class=“term”>Returns
:</span></td><td>the
new parser context or
NULL</td></tr></tbody></table></div></div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlCtxtReadDoc"/>htmlCtxtReadDoc ()</h3><pre class="programlisting"><a href="libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlCtxtReadDoc (<a href="libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt, <br/> const <a href="libxml2-xmlstring.html#xmlChar">xmlChar</a> * cur, <br/> const char * URL, <br/> const char * encoding, <br/> int options)<br/>
</pre><p>parse an XML in-memory document and build a tree. This
reuses the existing @ctxt parser context</p> <div
class=“variablelist”><table border=“0”><col
align=“left”/><tbody><tr><td><span
class=“term”>ctxt
:</span></td><td>an
HTML parser context</td></tr><tr><td><span
class=“term”>cur
:</span></td><td>a
pointer to a zero terminated
string</td></tr><tr><td><span
class=“term”>URL
:</span></td><td>the
base URL to use for the
document</td></tr><tr><td><span
class=“term”>encoding
:</span></td><td>the
document encoding, or
NULL</td></tr><tr><td><span
class=“term”>options
:</span></td><td>a
combination of
htmlParserOption(s)</td></tr><tr><td><span
class=“term”>Returns
:</span></td><td>the
resulting document
tree</td></tr></tbody></table></div></div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlCtxtReadFd"/>htmlCtxtReadFd ()</h3><pre class="programlisting"><a href="libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlCtxtReadFd (<a href="libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt, <br/> int fd, <br/> const char * URL, <br/> const char * encoding, <br/> int options)<br/>
</pre><p>parse an XML from a file descriptor and build a tree.
This reuses the existing @ctxt parser context</p> <div
class=“variablelist”><table border=“0”><col
align=“left”/><tbody><tr><td><span
class=“term”>ctxt
:</span></td><td>an
HTML parser context</td></tr><tr><td><span
class=“term”>fd
:</span></td><td>an
open file descriptor</td></tr><tr><td><span
class=“term”>URL
:</span></td><td>the
base URL to use for the
document</td></tr><tr><td><span
class=“term”>encoding
:</span></td><td>the
document encoding, or
NULL</td></tr><tr><td><span
class=“term”>options
:</span></td><td>a
combination of
htmlParserOption(s)</td></tr><tr><td><span
class=“term”>Returns
:</span></td><td>the
resulting document
tree</td></tr></tbody></table></div></div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlCtxtReadFile"/>htmlCtxtReadFile ()</h3><pre class="programlisting"><a href="libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlCtxtReadFile (<a href="libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt, <br/> const char * filename, <br/> const char * encoding, <br/> int options)<br/>
</pre><p>parse an XML file from the filesystem or the network.
This reuses the existing @ctxt parser context</p> <div
class=“variablelist”><table border=“0”><col
align=“left”/><tbody><tr><td><span
class=“term”>ctxt
:</span></td><td>an
HTML parser context</td></tr><tr><td><span
class=“term”>filename
:</span></td><td>a
file or URL</td></tr><tr><td><span
class=“term”>encoding
:</span></td><td>the
document encoding, or
NULL</td></tr><tr><td><span
class=“term”>options
:</span></td><td>a
combination of
htmlParserOption(s)</td></tr><tr><td><span
class=“term”>Returns
:</span></td><td>the
resulting document
tree</td></tr></tbody></table></div></div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlCtxtReadIO"/>htmlCtxtReadIO ()</h3><pre class="programlisting"><a href="libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlCtxtReadIO (<a href="libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt, <br/> <a href="libxml2-xmlIO.html#xmlInputReadCallback">xmlInputReadCallback</a> ioread, <br/> <a href="libxml2-xmlIO.html#xmlInputCloseCallback">xmlInputCloseCallback</a> ioclose, <br/> void * ioctx, <br/> const char * URL, <br/> const char * encoding, <br/> int options)<br/>
</pre><p>parse an HTML document from I/O functions and source
and build a tree. This reuses the existing @ctxt parser context</p>
<div class=“variablelist”><table border=“0”><col
align=“left”/><tbody><tr><td><span
class=“term”>ctxt
:</span></td><td>an
HTML parser context</td></tr><tr><td><span
class=“term”>ioread
:</span></td><td>an
I/O read function</td></tr><tr><td><span
class=“term”>ioclose
:</span></td><td>an
I/O close function</td></tr><tr><td><span
class=“term”>ioctx
:</span></td><td>an
I/O handler</td></tr><tr><td><span
class=“term”>URL
:</span></td><td>the
base URL to use for the
document</td></tr><tr><td><span
class=“term”>encoding
:</span></td><td>the
document encoding, or
NULL</td></tr><tr><td><span
class=“term”>options
:</span></td><td>a
combination of
htmlParserOption(s)</td></tr><tr><td><span
class=“term”>Returns
:</span></td><td>the
resulting document
tree</td></tr></tbody></table></div></div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlCtxtReadMemory"/>htmlCtxtReadMemory ()</h3><pre class="programlisting"><a href="libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlCtxtReadMemory (<a href="libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt, <br/> const char * buffer, <br/> int size, <br/> const char * URL, <br/> const char * encoding, <br/> int options)<br/>
</pre><p>parse an XML in-memory document and build a tree. This
reuses the existing @ctxt parser context</p> <div
class=“variablelist”><table border=“0”><col
align=“left”/><tbody><tr><td><span
class=“term”>ctxt
:</span></td><td>an
HTML parser context</td></tr><tr><td><span
class=“term”>buffer
:</span></td><td>a
pointer to a char array</td></tr><tr><td><span
class=“term”>size
:</span></td><td>the
size of the array</td></tr><tr><td><span
class=“term”>URL
:</span></td><td>the
base URL to use for the
document</td></tr><tr><td><span
class=“term”>encoding
:</span></td><td>the
document encoding, or
NULL</td></tr><tr><td><span
class=“term”>options
:</span></td><td>a
combination of
htmlParserOption(s)</td></tr><tr><td><span
class=“term”>Returns
:</span></td><td>the
resulting document
tree</td></tr></tbody></table></div></div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlCtxtReset"/>htmlCtxtReset ()</h3><pre class="programlisting">void htmlCtxtReset (<a href="libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt)<br/>
</pre><p>Reset a parser context</p> <div
class=“variablelist”><table border=“0”><col
align=“left”/><tbody><tr><td><span
class=“term”>ctxt
:</span></td><td>an
HTML parser
context</td></tr></tbody></table></div></div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlCtxtUseOptions"/>htmlCtxtUseOptions ()</h3><pre class="programlisting">int htmlCtxtUseOptions (<a href="libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt, <br/> int options)<br/>
</pre><p>Applies the options to the parser context</p>
<div class=“variablelist”><table border=“0”><col
align=“left”/><tbody><tr><td><span
class=“term”>ctxt
:</span></td><td>an
HTML parser context</td></tr><tr><td><span
class=“term”>options
:</span></td><td>a
combination of
htmlParserOption(s)</td></tr><tr><td><span
class=“term”>Returns
:</span></td><td>0
in case of success, the set of unknown or unimplemented options in case of
error.</td></tr></tbody></table></div></div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlElementAllowedHere"/>htmlElementAllowedHere ()</h3><pre class="programlisting">int htmlElementAllowedHere (const <a href="libxml2-HTMLparser.html#htmlElemDesc">htmlElemDesc</a> * parent, <br/> const <a href="libxml2-xmlstring.html#xmlChar">xmlChar</a> * elt)<br/>
</pre><p>Checks whether an HTML element may be a direct child
of a parent element. Note - doesn't check for deprecated
elements</p> <div class=“variablelist”><table
border=“0”><col
align=“left”/><tbody><tr><td><span
class=“term”>parent
:</span></td><td>HTML
parent element</td></tr><tr><td><span
class=“term”>elt
:</span></td><td>HTML
element</td></tr><tr><td><span
class=“term”>Returns
:</span></td><td>1
if allowed; 0
otherwise.</td></tr></tbody></table></div></div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlElementStatusHere"/>htmlElementStatusHere ()</h3><pre class="programlisting"><a href="libxml2-HTMLparser.html#htmlStatus">htmlStatus</a> htmlElementStatusHere (const <a href="libxml2-HTMLparser.html#htmlElemDesc">htmlElemDesc</a> * parent, <br/> const <a href="libxml2-HTMLparser.html#htmlElemDesc">htmlElemDesc</a> * elt)<br/>
</pre><p>Checks whether an HTML element may be a direct child
of a parent element. and if so whether it is valid or deprecated.</p>
<div class=“variablelist”><table border=“0”><col
align=“left”/><tbody><tr><td><span
class=“term”>parent
:</span></td><td>HTML
parent element</td></tr><tr><td><span
class=“term”>elt
:</span></td><td>HTML
element</td></tr><tr><td><span
class=“term”>Returns
:</span></td><td>one
of HTML_VALID, HTML_DEPRECATED, <a
href=“libxml2-HTMLparser.html#HTML_INVALID”>HTML_INVALID</a></td></tr></tbody></table></div></div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlEncodeEntities"/>htmlEncodeEntities ()</h3><pre class="programlisting">int htmlEncodeEntities (unsigned char * out, <br/> int * outlen, <br/> const unsigned char * in, <br/> int * inlen, <br/> int quoteChar)<br/>
</pre><p>Take a block of UTF-8 chars in and try to convert it
to an ASCII plus HTML entities block of chars out.</p> <div
class=“variablelist”><table border=“0”><col
align=“left”/><tbody><tr><td><span
class=“term”>out
:</span></td><td>a
pointer to an array of bytes to store the
result</td></tr><tr><td><span
class=“term”>outlen
:</span></td><td>the
length of @out</td></tr><tr><td><span
class=“term”>in
:</span></td><td>a
pointer to an array of UTF-8
chars</td></tr><tr><td><span
class=“term”>inlen
:</span></td><td>the
length of @in</td></tr><tr><td><span
class=“term”>quoteChar
:</span></td><td>the
quote character to escape (' or “) or
zero.</td></tr><tr><td><span
class=”term“>Returns
:</span></td><td>0
if success, -2 if the transcoding fails, or -1 otherwise The value of
@inlen after return is the number of octets consumed as the return value is
positive, else unpredictable. The value of @outlen after return is the
number of octets
consumed.</td></tr></tbody></table></div></div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlEntityLookup"/>htmlEntityLookup ()</h3><pre class="programlisting">const <a href="libxml2-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a> * htmlEntityLookup (const <a href="libxml2-xmlstring.html#xmlChar">xmlChar</a> * name)<br/>
</pre><p>Lookup the given entity in EntitiesTable TODO: the
linear scan is really ugly, an hash table is really needed.</p>
<div class=“variablelist”><table border=“0”><col
align=“left”/><tbody><tr><td><span
class=“term”>name
:</span></td><td>the
entity name</td></tr><tr><td><span
class=“term”>Returns
:</span></td><td>the
associated <a
href=“libxml2-HTMLparser.html#htmlEntityDescPtr”>htmlEntityDescPtr</a>
if found, NULL
otherwise.</td></tr></tbody></table></div></div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlEntityValueLookup"/>htmlEntityValueLookup ()</h3><pre class="programlisting">const <a href="libxml2-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a> * htmlEntityValueLookup (unsigned int value)<br/>
</pre><p>Lookup the given entity in EntitiesTable TODO: the
linear scan is really ugly, an hash table is really needed.</p>
<div class=“variablelist”><table border=“0”><col
align=“left”/><tbody><tr><td><span
class=“term”>value
:</span></td><td>the
entity's unicode
value</td></tr><tr><td><span
class=“term”>Returns
:</span></td><td>the
associated <a
href=“libxml2-HTMLparser.html#htmlEntityDescPtr”>htmlEntityDescPtr</a>
if found, NULL
otherwise.</td></tr></tbody></table></div></div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlFreeParserCtxt"/>htmlFreeParserCtxt ()</h3><pre class="programlisting">void htmlFreeParserCtxt (<a href="libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt)<br/>
</pre><p>Free all the memory used by a parser context. However
the parsed document in ctxt->myDoc is not freed.</p> <div
class=“variablelist”><table border=“0”><col
align=“left”/><tbody><tr><td><span
class=“term”>ctxt
:</span></td><td>an
HTML parser
context</td></tr></tbody></table></div></div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlHandleOmittedElem"/>htmlHandleOmittedElem ()</h3><pre class="programlisting">int htmlHandleOmittedElem (int val)<br/>
</pre><p>Set and return the previous value for handling HTML
omitted tags.</p> <div class=“variablelist”><table
border=“0”><col
align=“left”/><tbody><tr><td><span
class=“term”>val
:</span></td><td>int
0 or 1</td></tr><tr><td><span
class=“term”>Returns
:</span></td><td>the
last value for 0 for no handling, 1 for auto
insertion.</td></tr></tbody></table></div></div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlIsAutoClosed"/>htmlIsAutoClosed ()</h3><pre class="programlisting">int htmlIsAutoClosed (<a href="libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> doc, <br/> <a href="libxml2-HTMLparser.html#htmlNodePtr">htmlNodePtr</a> elem)<br/>
</pre><p>The HTML DTD allows a tag to implicitly close other
tags. The list is kept in htmlStartClose array. This function checks if a
tag is autoclosed by one of it's child</p> <div
class=“variablelist”><table border=“0”><col
align=“left”/><tbody><tr><td><span
class=“term”>doc
:</span></td><td>the
HTML document</td></tr><tr><td><span
class=“term”>elem
:</span></td><td>the
HTML element</td></tr><tr><td><span
class=“term”>Returns
:</span></td><td>1
if autoclosed, 0
otherwise</td></tr></tbody></table></div></div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlIsScriptAttribute"/>htmlIsScriptAttribute ()</h3><pre class="programlisting">int htmlIsScriptAttribute (const <a href="libxml2-xmlstring.html#xmlChar">xmlChar</a> * name)<br/>
</pre><p>Check if an <a
href=“libxml2-SAX.html#attribute”>attribute</a> is of content type
Script</p> <div class=“variablelist”><table
border=“0”><col
align=“left”/><tbody><tr><td><span
class=“term”>name
:</span></td><td>an
<a href=“libxml2-SAX.html#attribute”>attribute</a>
name</td></tr><tr><td><span
class=“term”>Returns
:</span></td><td>1
is the <a href=“libxml2-SAX.html#attribute”>attribute</a> is a
script 0
otherwise</td></tr></tbody></table></div></div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlNewParserCtxt"/>htmlNewParserCtxt ()</h3><pre class="programlisting"><a href="libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> htmlNewParserCtxt (void)<br/>
</pre><p>Allocate and initialize a new parser
context.</p> <div class=“variablelist”><table
border=“0”><col
align=“left”/><tbody><tr><td><span
class=“term”>Returns
:</span></td><td>the
<a
href=“libxml2-HTMLparser.html#htmlParserCtxtPtr”>htmlParserCtxtPtr</a>
or NULL in case of allocation
error</td></tr></tbody></table></div></div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlNodeStatus"/>htmlNodeStatus ()</h3><pre class="programlisting"><a href="libxml2-HTMLparser.html#htmlStatus">htmlStatus</a> htmlNodeStatus (const <a href="libxml2-HTMLparser.html#htmlNodePtr">htmlNodePtr</a> node, <br/> int legacy)<br/>
</pre><p>Checks whether the tree node is valid. Experimental
(the author only uses the HTML enhancements in a SAX parser)</p>
<div class=“variablelist”><table border=“0”><col
align=“left”/><tbody><tr><td><span
class=“term”>node
:</span></td><td>an
<a href=“libxml2-HTMLparser.html#htmlNodePtr”>htmlNodePtr</a>
in a tree</td></tr><tr><td><span
class=“term”>legacy
:</span></td><td>whether
to allow deprecated elements (YES is faster here for Element
nodes)</td></tr><tr><td><span
class=“term”>Returns
:</span></td><td>for
Element nodes, a return from <a
href=“libxml2-HTMLparser.html#htmlElementAllowedHere”>htmlElementAllowedHere</a>
(if legacy allowed) or <a
href=“libxml2-HTMLparser.html#htmlElementStatusHere”>htmlElementStatusHere</a>
(otherwise). for Attribute nodes, a return from <a
href=“libxml2-HTMLparser.html#htmlAttrAllowed”>htmlAttrAllowed</a>
for other nodes, <a
href=“libxml2-HTMLparser.html#HTML_NA”>HTML_NA</a> (no checks
performed)</td></tr></tbody></table></div></div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlParseCharRef"/>htmlParseCharRef ()</h3><pre class="programlisting">int htmlParseCharRef (<a href="libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt)<br/>
</pre><p>parse Reference declarations [66] CharRef ::=
'&#' [0-9]+ ';' | '&#x'
[0-9a-fA-F]+ ';'</p> <div
class=“variablelist”><table border=“0”><col
align=“left”/><tbody><tr><td><span
class=“term”>ctxt
:</span></td><td>an
HTML parser context</td></tr><tr><td><span
class=“term”>Returns
:</span></td><td>the
value parsed (as an
int)</td></tr></tbody></table></div></div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlParseChunk"/>htmlParseChunk ()</h3><pre class="programlisting">int htmlParseChunk (<a href="libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt, <br/> const char * chunk, <br/> int size, <br/> int terminate)<br/>
</pre><p>Parse a Chunk of memory</p> <div
class=“variablelist”><table border=“0”><col
align=“left”/><tbody><tr><td><span
class=“term”>ctxt
:</span></td><td>an
HTML parser context</td></tr><tr><td><span
class=“term”>chunk
:</span></td><td>an
char array</td></tr><tr><td><span
class=“term”>size
:</span></td><td>the
size in byte of the chunk</td></tr><tr><td><span
class=“term”>terminate
:</span></td><td>last
chunk indicator</td></tr><tr><td><span
class=“term”>Returns
:</span></td><td>zero
if no error, the <a
href=“libxml2-xmlerror.html#xmlParserErrors”>xmlParserErrors</a>
otherwise.</td></tr></tbody></table></div></div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlParseDoc"/>htmlParseDoc ()</h3><pre class="programlisting"><a href="libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlParseDoc (const <a href="libxml2-xmlstring.html#xmlChar">xmlChar</a> * cur, <br/> const char * encoding)<br/>
</pre><p>parse an HTML in-memory document and build a
tree.</p> <div class=“variablelist”><table
border=“0”><col
align=“left”/><tbody><tr><td><span
class=“term”>cur
:</span></td><td>a
pointer to an array of <a
href=“libxml2-xmlstring.html#xmlChar”>xmlChar</a></td></tr><tr><td><span
class=“term”>encoding
:</span></td><td>a
free form C string describing the HTML document encoding, or
NULL</td></tr><tr><td><span
class=“term”>Returns
:</span></td><td>the
resulting document
tree</td></tr></tbody></table></div></div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlParseDocument"/>htmlParseDocument ()</h3><pre class="programlisting">int htmlParseDocument (<a href="libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt)<br/>
</pre><p>parse an HTML document (and build a tree if using the
standard SAX interface).</p> <div
class=“variablelist”><table border=“0”><col
align=“left”/><tbody><tr><td><span
class=“term”>ctxt
:</span></td><td>an
HTML parser context</td></tr><tr><td><span
class=“term”>Returns
:</span></td><td>0,
-1 in case of error. the parser context is augmented as a result of the
parsing.</td></tr></tbody></table></div></div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlParseElement"/>htmlParseElement ()</h3><pre class="programlisting">void htmlParseElement (<a href="libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt)<br/>
</pre><p>parse an HTML element, this is highly recursive this
is kept for compatibility with previous code versions [39] element ::=
EmptyElemTag | STag content ETag [41] Attribute ::= Name Eq
AttValue</p> <div class=“variablelist”><table
border=“0”><col
align=“left”/><tbody><tr><td><span
class=“term”>ctxt
:</span></td><td>an
HTML parser
context</td></tr></tbody></table></div></div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlParseEntityRef"/>htmlParseEntityRef ()</h3><pre class="programlisting">const <a href="libxml2-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a> * htmlParseEntityRef (<a href="libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt, <br/> const <a href="libxml2-xmlstring.html#xmlChar">xmlChar</a> ** str)<br/>
</pre><p>parse an HTML ENTITY references [68] EntityRef ::=
'&' Name ';'</p> <div
class=“variablelist”><table border=“0”><col
align=“left”/><tbody><tr><td><span
class=“term”>ctxt
:</span></td><td>an
HTML parser context</td></tr><tr><td><span
class=“term”>str
:</span></td><td>location
to store the entity name</td></tr><tr><td><span
class=“term”>Returns
:</span></td><td>the
associated <a
href=“libxml2-HTMLparser.html#htmlEntityDescPtr”>htmlEntityDescPtr</a>
if found, or NULL otherwise, if non-NULL *str will have to be freed by the
caller.</td></tr></tbody></table></div></div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlParseFile"/>htmlParseFile ()</h3><pre class="programlisting"><a href="libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlParseFile (const char * filename, <br/> const char * encoding)<br/>
</pre><p>parse an HTML file and build a tree. Automatic support
for ZLIB/Compress compressed document is provided by default if found at
compile-time.</p> <div class=“variablelist”><table
border=“0”><col
align=“left”/><tbody><tr><td><span
class=“term”>filename
:</span></td><td>the
filename</td></tr><tr><td><span
class=“term”>encoding
:</span></td><td>a
free form C string describing the HTML document encoding, or
NULL</td></tr><tr><td><span
class=“term”>Returns
:</span></td><td>the
resulting document
tree</td></tr></tbody></table></div></div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlReadDoc"/>htmlReadDoc ()</h3><pre class="programlisting"><a href="libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlReadDoc (const <a href="libxml2-xmlstring.html#xmlChar">xmlChar</a> * cur, <br/> const char * URL, <br/> const char * encoding, <br/> int options)<br/>
</pre><p>parse an XML in-memory document and build a
tree.</p> <div class=“variablelist”><table
border=“0”><col
align=“left”/><tbody><tr><td><span
class=“term”>cur
:</span></td><td>a
pointer to a zero terminated
string</td></tr><tr><td><span
class=“term”>URL
:</span></td><td>the
base URL to use for the
document</td></tr><tr><td><span
class=“term”>encoding
:</span></td><td>the
document encoding, or
NULL</td></tr><tr><td><span
class=“term”>options
:</span></td><td>a
combination of
htmlParserOption(s)</td></tr><tr><td><span
class=“term”>Returns
:</span></td><td>the
resulting document
tree</td></tr></tbody></table></div></div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlReadFd"/>htmlReadFd ()</h3><pre class="programlisting"><a href="libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlReadFd (int fd, <br/> const char * URL, <br/> const char * encoding, <br/> int options)<br/>
</pre><p>parse an XML from a file descriptor and build a
tree.</p> <div class=“variablelist”><table
border=“0”><col
align=“left”/><tbody><tr><td><span
class=“term”>fd
:</span></td><td>an
open file descriptor</td></tr><tr><td><span
class=“term”>URL
:</span></td><td>the
base URL to use for the
document</td></tr><tr><td><span
class=“term”>encoding
:</span></td><td>the
document encoding, or
NULL</td></tr><tr><td><span
class=“term”>options
:</span></td><td>a
combination of
htmlParserOption(s)</td></tr><tr><td><span
class=“term”>Returns
:</span></td><td>the
resulting document
tree</td></tr></tbody></table></div></div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlReadFile"/>htmlReadFile ()</h3><pre class="programlisting"><a href="libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlReadFile (const char * filename, <br/> const char * encoding, <br/> int options)<br/>
</pre><p>parse an XML file from the filesystem or the
network.</p> <div class=“variablelist”><table
border=“0”><col
align=“left”/><tbody><tr><td><span
class=“term”>filename
:</span></td><td>a
file or URL</td></tr><tr><td><span
class=“term”>encoding
:</span></td><td>the
document encoding, or
NULL</td></tr><tr><td><span
class=“term”>options
:</span></td><td>a
combination of
htmlParserOption(s)</td></tr><tr><td><span
class=“term”>Returns
:</span></td><td>the
resulting document
tree</td></tr></tbody></table></div></div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlReadIO"/>htmlReadIO ()</h3><pre class="programlisting"><a href="libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlReadIO (<a href="libxml2-xmlIO.html#xmlInputReadCallback">xmlInputReadCallback</a> ioread, <br/> <a href="libxml2-xmlIO.html#xmlInputCloseCallback">xmlInputCloseCallback</a> ioclose, <br/> void * ioctx, <br/> const char * URL, <br/> const char * encoding, <br/> int options)<br/>
</pre><p>parse an HTML document from I/O functions and source
and build a tree.</p> <div class=“variablelist”><table
border=“0”><col
align=“left”/><tbody><tr><td><span
class=“term”>ioread
:</span></td><td>an
I/O read function</td></tr><tr><td><span
class=“term”>ioclose
:</span></td><td>an
I/O close function</td></tr><tr><td><span
class=“term”>ioctx
:</span></td><td>an
I/O handler</td></tr><tr><td><span
class=“term”>URL
:</span></td><td>the
base URL to use for the
document</td></tr><tr><td><span
class=“term”>encoding
:</span></td><td>the
document encoding, or
NULL</td></tr><tr><td><span
class=“term”>options
:</span></td><td>a
combination of
htmlParserOption(s)</td></tr><tr><td><span
class=“term”>Returns
:</span></td><td>the
resulting document
tree</td></tr></tbody></table></div></div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlReadMemory"/>htmlReadMemory ()</h3><pre class="programlisting"><a href="libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlReadMemory (const char * buffer, <br/> int size, <br/> const char * URL, <br/> const char * encoding, <br/> int options)<br/>
</pre><p>parse an XML in-memory document and build a
tree.</p> <div class=“variablelist”><table
border=“0”><col
align=“left”/><tbody><tr><td><span
class=“term”>buffer
:</span></td><td>a
pointer to a char array</td></tr><tr><td><span
class=“term”>size
:</span></td><td>the
size of the array</td></tr><tr><td><span
class=“term”>URL
:</span></td><td>the
base URL to use for the
document</td></tr><tr><td><span
class=“term”>encoding
:</span></td><td>the
document encoding, or
NULL</td></tr><tr><td><span
class=“term”>options
:</span></td><td>a
combination of
htmlParserOption(s)</td></tr><tr><td><span
class=“term”>Returns
:</span></td><td>the
resulting document
tree</td></tr></tbody></table></div></div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlSAXParseDoc"/>htmlSAXParseDoc ()</h3><pre class="programlisting"><a href="libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlSAXParseDoc (const <a href="libxml2-xmlstring.html#xmlChar">xmlChar</a> * cur, <br/> const char * encoding, <br/> <a href="libxml2-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a> sax, <br/> void * userData)<br/>
</pre><p>Parse an HTML in-memory document. If sax is not NULL,
use the SAX callbacks to handle parse events. If sax is NULL, fallback to
the default DOM behavior and return a tree.</p> <div
class=“variablelist”><table border=“0”><col
align=“left”/><tbody><tr><td><span
class=“term”>cur
:</span></td><td>a
pointer to an array of <a
href=“libxml2-xmlstring.html#xmlChar”>xmlChar</a></td></tr><tr><td><span
class=“term”>encoding
:</span></td><td>a
free form C string describing the HTML document encoding, or
NULL</td></tr><tr><td><span
class=“term”>sax
:</span></td><td>the
SAX handler block</td></tr><tr><td><span
class=“term”>userData
:</span></td><td>if
using SAX, this pointer will be provided on
callbacks.</td></tr><tr><td><span
class=“term”>Returns
:</span></td><td>the
resulting document tree unless SAX is NULL or the document is not well
formed.</td></tr></tbody></table></div></div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlSAXParseFile"/>htmlSAXParseFile ()</h3><pre class="programlisting"><a href="libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlSAXParseFile (const char * filename, <br/> const char * encoding, <br/> <a href="libxml2-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a> sax, <br/> void * userData)<br/>
</pre><p>parse an HTML file and build a tree. Automatic support
for ZLIB/Compress compressed document is provided by default if found at
compile-time. It use the given SAX function block to handle the parsing
callback. If sax is NULL, fallback to the default DOM tree building
routines.</p> <div class=“variablelist”><table
border=“0”><col
align=“left”/><tbody><tr><td><span
class=“term”>filename
:</span></td><td>the
filename</td></tr><tr><td><span
class=“term”>encoding
:</span></td><td>a
free form C string describing the HTML document encoding, or
NULL</td></tr><tr><td><span
class=“term”>sax
:</span></td><td>the
SAX handler block</td></tr><tr><td><span
class=“term”>userData
:</span></td><td>if
using SAX, this pointer will be provided on
callbacks.</td></tr><tr><td><span
class=“term”>Returns
:</span></td><td>the
resulting document tree unless SAX is NULL or the document is not well
formed.</td></tr></tbody></table></div></div>
<hr/> <div class="refsect2" lang="en"><h3><a name="htmlTagLookup"/>htmlTagLookup ()</h3><pre class="programlisting">const <a href="libxml2-HTMLparser.html#htmlElemDesc">htmlElemDesc</a> * htmlTagLookup (const <a href="libxml2-xmlstring.html#xmlChar">xmlChar</a> * tag)<br/>
</pre><p>Lookup the HTML tag in the ElementTable</p>
<div class=“variablelist”><table border=“0”><col
align=“left”/><tbody><tr><td><span
class=“term”>tag
:</span></td><td>The
tag name in lowercase</td></tr><tr><td><span
class=“term”>Returns
:</span></td><td>the
related <a
href=“libxml2-HTMLparser.html#htmlElemDescPtr”>htmlElemDescPtr</a>
or NULL if not
found.</td></tr></tbody></table></div></div>
<hr/> </div> </div> </body>
</html>