'use strict';
var Tokenizer = require('../tokenization/tokenizer'),
ForeignContent = require('../common/foreign_content'), UNICODE = require('../common/unicode'), HTML = require('../common/html');
//Aliases var $ = HTML.TAG_NAMES,
NS = HTML.NAMESPACES;
//Tokenizer proxy //NOTE: this proxy simulates adjustment of the Tokenizer which performed by standard parser during tree construction. var TokenizerProxy = module.exports = function (html, options) {
this.tokenizer = new Tokenizer(html, options); this.namespaceStack = []; this.namespaceStackTop = -1; this.currentNamespace = null; this.inForeignContent = false;
};
//API TokenizerProxy.prototype.getNextToken = function () {
var token = this.tokenizer.getNextToken(); if (token.type === Tokenizer.START_TAG_TOKEN) this._handleStartTagToken(token); else if (token.type === Tokenizer.END_TAG_TOKEN) this._handleEndTagToken(token); else if (token.type === Tokenizer.NULL_CHARACTER_TOKEN && this.inForeignContent) { token.type = Tokenizer.CHARACTER_TOKEN; token.chars = UNICODE.REPLACEMENT_CHARACTER; } return token;
};
//Namespace stack mutations TokenizerProxy.prototype._enterNamespace = function (namespace) {
this.namespaceStackTop++; this.namespaceStack.push(namespace); this.inForeignContent = namespace !== NS.HTML; this.currentNamespace = namespace; this.tokenizer.allowCDATA = this.inForeignContent;
};
TokenizerProxy.prototype._leaveCurrentNamespace = function () {
this.namespaceStackTop--; this.namespaceStack.pop(); this.currentNamespace = this.namespaceStack[this.namespaceStackTop]; this.inForeignContent = this.currentNamespace !== NS.HTML; this.tokenizer.allowCDATA = this.inForeignContent;
};
//Token handlers TokenizerProxy.prototype._ensureTokenizerMode = function (tn) {
if (tn === $.TEXTAREA || tn === $.TITLE) this.tokenizer.state = Tokenizer.MODE.RCDATA; else if (tn === $.PLAINTEXT) this.tokenizer.state = Tokenizer.MODE.PLAINTEXT; else if (tn === $.SCRIPT) this.tokenizer.state = Tokenizer.MODE.SCRIPT_DATA; else if (tn === $.STYLE || tn === $.IFRAME || tn === $.XMP || tn === $.NOEMBED || tn === $.NOFRAMES || tn === $.NOSCRIPT) { this.tokenizer.state = Tokenizer.MODE.RAWTEXT; }
};
TokenizerProxy.prototype._handleStartTagToken = function (token) {
var tn = token.tagName; if (tn === $.SVG) this._enterNamespace(NS.SVG); else if (tn === $.MATH) this._enterNamespace(NS.MATHML); else { if (this.inForeignContent) { if (ForeignContent.causesExit(token)) this._leaveCurrentNamespace(); else if (ForeignContent.isMathMLTextIntegrationPoint(tn, this.currentNamespace) || ForeignContent.isHtmlIntegrationPoint(tn, this.currentNamespace, token.attrs)) { this._enterNamespace(NS.HTML); } } else this._ensureTokenizerMode(tn); }
};
TokenizerProxy.prototype._handleEndTagToken = function (token) {
var tn = token.tagName; if (!this.inForeignContent) { var previousNs = this.namespaceStack[this.namespaceStackTop - 1]; //NOTE: check for exit from integration point if (ForeignContent.isMathMLTextIntegrationPoint(tn, previousNs) || ForeignContent.isHtmlIntegrationPoint(tn, previousNs, token.attrs)) { this._leaveCurrentNamespace(); } else if (tn === $.SCRIPT) this.tokenizer.state = Tokenizer.MODE.DATA; } else if ((tn === $.SVG && this.currentNamespace === NS.SVG) || (tn === $.MATH && this.currentNamespace === NS.MATHML)) this._leaveCurrentNamespace();
};