* Summary: interface for an HTML 4.0 non-verifying parser
* Description: this module implements an HTML 4.0 non-verifying parser
*              with API compatible with the XML parser ones. It should
*              be able to parse "real world" HTML, even if severely
*              broken from a specification point of view.
*
* Copy: See Copyright for the status of this software.
*
* Author: Patrick Monnerat <pm@datasphere.ch>, DATASPHERE S.A.

/if not defined(HTML_PARSER_H__)
/define HTML_PARSER_H__

/include "libxmlrpg/xmlversion"

/if defined(LIBXML_HTML_ENABLED)

/include "libxmlrpg/xmlTypesC"
/include "libxmlrpg/parser"

* Most of the back-end structures from XML and HTML are shared.

d htmlParserCtxtPtr… d s based(######typedef######) d like(xmlParserCtxtPtr)

d htmlParserCtxt ds based(htmlParserCtxtPtr) d likeds(xmlParserCtxt)

d htmlParserNodeInfoPtr… d s based(######typedef######) d like(xmlParserNodeInfoPtr)

d htmlParserNodeInfo… d ds based(htmlParserNodeInfoPtr) d likeds(xmlParserNodeInfo)

d htmlSAXHandlerPtr… d s based(######typedef######) d like(xmlSAXHandlerPtr)

d htmlSAXHandler ds based(htmlSAXHandlerPtr) d likeds(xmlSAXHandler)

d htmlParserInputPtr… d s based(######typedef######) d like(xmlParserInputPtr)

d htmlParserInput… d ds based(htmlParserInputPtr) d likeds(xmlParserInput)

d htmlDocPtr s based(######typedef######) d like(xmlDocPtr)

d htmlNodePtr s based(######typedef######) d like(xmlNodePtr)

* Internal description of an HTML element, representing HTML 4.01
* and XHTML 1.0 (which share the same structure).

d htmlElemDescPtr… d s * based(######typedef######)

d htmlElemDesc ds based(htmlElemDescPtr) d align qualified d name * const char * d startTag like(xmlCchar) Start tag implied ? d endTag like(xmlCchar) End tag implied ? d saveEndTag like(xmlCchar) Save end tag ? d empty like(xmlCchar) Empty element ? d depr like(xmlCchar) Deprecated element ? d dtd like(xmlCchar) Loose DTD/Frameset d isinline like(xmlCchar) Block 0/inline elem? d desc * const char *

*
* New fields encapsulating HTML structure
*
* Bugs:
*      This is a very limited representation.  It fails to tell us when
*      an element *requires* subelements (we only have whether they're
*      allowed or not), and it doesn't tell us where CDATA and PCDATA
*      are allowed.  Some element relationships are not fully represented:
*      these are flagged with the word MODIFIER
*

d subelts * const char * * d defaultsubelt * const char * d attrs_opt * const char * * d attrs_depr * const char * * d attrs_req * const char * *

* Internal description of an HTML entity.

d htmlEntityDescPtr… d s * based(######typedef######)

d htmlEntityDesc… d ds based(htmlEntityDescPtr) d align qualified d value like(xmlCuint) d name * const char * d desc * const char *

* There is only few public functions.

d htmlTagLookup pr extproc('htmlTagLookup') d like(htmlElemDescPtr) const d tag * value options(*string) const xmlChar *

d htmlEntityLookup… d pr extproc('htmlEntityLookup') d like(htmlEntityDescPtr) const d name * value options(*string) const xmlChar *

d htmlEntityValueLookup… d pr extproc('htmlEntityValueLookup') d like(htmlEntityDescPtr) const d value value like(xmlCuint)

d htmlIsAutoClosed… d pr extproc('htmlIsAutoClosed') d like(xmlCint) d doc value like(htmlDocPtr) d elem value like(htmlNodePtr)

d htmlAutoCloseTag… d pr extproc('htmlAutoCloseTag') d like(xmlCint) d doc value like(htmlDocPtr) d name * value options(*string) const xmlChar * d elem value like(htmlNodePtr)

d htmlParseEntityRef… d pr extproc('htmlParseEntityRef') d like(htmlEntityDescPtr) const d ctxt value like(htmlParserCtxtPtr) d str * const xmlChar *(*)

d htmlParseCharRef… d pr extproc('htmlParseCharRef') d like(xmlCint) d ctxt value like(htmlParserCtxtPtr)

d htmlParseElement… d pr extproc('htmlParseElement') d ctxt value like(htmlParserCtxtPtr)

d htmlNewParserCtxt… d pr extproc('htmlNewParserCtxt') d like(htmlParserCtxtPtr)

d htmlCreateMemoryParserCtxt… d pr extproc('htmlCreateMemoryParserCtxt') d like(htmlParserCtxtPtr) d buffer * value options(*string) const char * d size value like(xmlCint)

d htmlParseDocument… d pr extproc('htmlParseDocument') d like(xmlCint) d ctxt value like(htmlParserCtxtPtr)

d htmlSAXParseDoc… d pr extproc('htmlSAXParseDoc') d like(htmlDocPtr) d cur * value options(*string) xmlChar * d encoding * value options(*string) const char * d sax value like(htmlSAXHandlerPtr) d userData * value void *

d htmlParseDoc pr extproc('htmlParseDoc') d like(htmlDocPtr) d cur * value options(*string) xmlChar * d encoding * value options(*string) const char *

d htmlSAXParseFile… d pr extproc('htmlSAXParseFile') d like(htmlDocPtr) d filename * value options(*string) const char * d encoding * value options(*string) const char * d sax value like(htmlSAXHandlerPtr) d userData * value void *

d htmlParseFile pr extproc('htmlParseFile') d like(htmlDocPtr) d filename * value options(*string) const char * d encoding * value options(*string) const char *

d UTF8ToHtml pr extproc('UTF8ToHtml') d like(xmlCint) d out 65535 options(*varsize) unsigned char [] d outlen like(xmlCint) d in * value options(*string) const unsigned char* d inlen like(xmlCint)

d htmlEncodeEntities… d pr extproc('htmlEncodeEntities') d like(xmlCint) d out 65535 options(*varsize) unsigned char [] d outlen like(xmlCint) d in * value options(*string) const unsigned char* d inlen like(xmlCint) d quoteChar value like(xmlCint)

d htmlIsScriptAttribute… d pr extproc('htmlIsScriptAttribute') d like(xmlCint) d name * value options(*string) const xmlChar *

d htmlHandleOmittedElem… d pr extproc('htmlHandleOmittedElem') d like(xmlCint) d val value like(xmlCint)

/if defined(LIBXML_PUSH_ENABLED)

* Interfaces for the Push mode.

d htmlCreatePushParserCtxt… d pr extproc('htmlCreatePushParserCtxt') d like(htmlParserCtxtPtr) d sax value like(htmlSAXHandlerPtr) d user_data * value void * d chunk * value options(*string) const char * d size value like(xmlCint) d filename * value options(*string) const char * d enc value like(xmlCharEncoding)

d htmlParseChunk pr extproc('htmlParseChunk') d like(xmlCint) d ctxt value like(htmlParserCtxtPtr) d chunk * value options(*string) const char * d size value like(xmlCint) d terminate value like(xmlCint)

/endif                                                                    LIBXML_PUSH_ENABLED

d htmlFreeParserCtxt… d pr extproc('htmlFreeParserCtxt') d ctxt value like(htmlParserCtxtPtr)

* New set of simpler/more flexible APIs

* xmlParserOption:
*
* This is the set of XML parser options that can be passed down
* to the xmlReadDoc() and similar calls.

d htmlParserOption… d s based(######typedef######) d like(xmlCenum) d HTML_PARSE_RECOVER… Relaxed parsing d c X'00000001' d HTML_PARSE_NODEFDTD… No default doctype d c X'00000004' d HTML_PARSE_NOERROR… No error reports d c X'00000020' d HTML_PARSE_NOWARNING… No warning reports d c X'00000040' d HTML_PARSE_PEDANTIC… Pedantic err reports d c X'00000080' d HTML_PARSE_NOBLANKS… Remove blank nodes d c X'00000100' d HTML_PARSE_NONET… Forbid net access d c X'00000800' d HTML_PARSE_NOIMPLIED… No implied html/body d c X'00002000' d HTML_PARSE_COMPACT… compact small txtnod d c X'00010000' d HTML_PARSE_IGNORE_ENC… Ignore encoding hint d c X'00200000'

d htmlCtxtReset pr extproc('htmlCtxtReset') d ctxt value like(htmlParserCtxtPtr)

d htmlCtxtUseOptions… d pr extproc('htmlCtxtUseOptions') d like(xmlCint) d ctxt value like(htmlParserCtxtPtr) d options value like(xmlCint)

d htmlReadDoc pr extproc('htmlReadDoc') d like(htmlDocPtr) d cur * value options(*string) const xmlChar * d URL * value options(*string) const char * d encoding * value options(*string) const char * d options value like(xmlCint)

d htmlReadFile pr extproc('htmlReadFile') d like(htmlDocPtr) d URL * value options(*string) const char * d encoding * value options(*string) const char * d options value like(xmlCint)

d htmlReadMemory pr extproc('htmlReadMemory') d like(htmlDocPtr) d buffer * value options(*string) const char * d size value like(xmlCint) d URL * value options(*string) const char * d encoding * value options(*string) const char * d options value like(xmlCint)

d htmlReadFd pr extproc('htmlReadFd') d like(htmlDocPtr) d fd value like(xmlCint) d URL * value options(*string) const char * d encoding * value options(*string) const char * d options value like(xmlCint)

d htmlReadIO pr extproc('htmlReadIO') d like(htmlDocPtr) d ioread value like(xmlInputReadCallback) d ioclose value like(xmlInputCloseCallback) d ioctx * value void * d URL * value options(*string) const char * d encoding * value options(*string) const char * d options value like(xmlCint)

d htmlCtxtReadDoc… d pr extproc('htmlCtxtReadDoc') d like(htmlDocPtr) d ctxt value like(xmlParserCtxtPtr) d cur * value options(*string) const xmlChar * d URL * value options(*string) const char * d encoding * value options(*string) const char * d options value like(xmlCint)

d htmlCtxtReadFile… d pr extproc('htmlCtxtReadFile') d like(htmlDocPtr) d ctxt value like(xmlParserCtxtPtr) d filename * value options(*string) const char * d encoding * value options(*string) const char * d options value like(xmlCint)

d htmlCtxtReadMemory… d pr extproc('htmlCtxtReadMemory') d like(htmlDocPtr) d ctxt value like(xmlParserCtxtPtr) d buffer * value options(*string) const char * d size value like(xmlCint) d URL * value options(*string) const char * d encoding * value options(*string) const char * d options value like(xmlCint)

d htmlCtxtReadFd pr extproc('htmlCtxtReadFd') d like(htmlDocPtr) d ctxt value like(xmlParserCtxtPtr) d fd value like(xmlCint) d URL * value options(*string) const char * d encoding * value options(*string) const char * d options value like(xmlCint)

d htmlCtxtReadIO pr extproc('htmlCtxtReadIO') d like(htmlDocPtr) d ctxt value like(xmlParserCtxtPtr) d ioread value like(xmlInputReadCallback) d ioclose value like(xmlInputCloseCallback) d ioctx * value void * d URL * value options(*string) const char * d encoding * value options(*string) const char * d options value like(xmlCint)

* Further knowledge of HTML structure

d htmlStatus s based(######typedef######) d like(xmlCenum) d HTML_NA c X'0000' No check at all d HTML_INVALID c X'0001' d HTML_DEPRECATED… d c X'0002' d HTML_VALID c X'0004' d HTML_REQUIRED c X'000C' HTML_VALID ored-in

* Using htmlElemDesc rather than name here, to emphasise the fact
*  that otherwise there's a lookup overhead

d htmlAttrAllowed… d pr extproc('htmlAttrAllowed') d like(htmlStatus) d param1 value like(htmlElemDescPtr) const d param2 * value options(*string) const xmlChar * d param3 value like(xmlCint)

d htmlElementAllowedHere… d pr extproc('htmlElementAllowedHere') d like(xmlCint) d param1 value like(htmlElemDescPtr) const d param2 * value options(*string) const xmlChar *

d htmlElementStatusHere… d pr extproc('htmlElementStatusHere') d like(htmlStatus) d param1 value like(htmlElemDescPtr) const d param2 value like(htmlElemDescPtr) const

d htmlNodeStatus pr extproc('htmlNodeStatus') d like(htmlStatus) d param1 value like(htmlNodePtr) d param2 value like(xmlCint)

* C macros implemented as procedures for ILE/RPG support.

d htmlDefaultSubelement… d pr * extproc('__htmlDefaultSubelement') const char * d elt * value const htmlElemDesc *

d htmlElementAllowedHereDesc… d pr extproc( d '__htmlElementAllowedHereDesc') d like(xmlCint) d parent * value const htmlElemDesc * d elt * value const htmlElemDesc *

d htmlRequiredAttrs… d pr * extproc('__htmlRequiredAttrs') const char * * d elt * value const htmlElemDesc *

/endif                                                                    LIBXML_HTML_ENABLED
/endif                                                                    HTML_PARSER_H__