'use strict';

var UNICODE = require('../common/unicode');

//Aliases var $ = UNICODE.CODE_POINTS;

//Utils

//OPTIMIZATION: these utility functions should not be moved out of this module. V8 Crankshaft will not inline //this functions if they will be situated in another module due to context switch. //Always perform inlining check before modifying this functions ('node –trace-inlining'). function isReservedCodePoint(cp) {

return cp >= 0xD800 && cp <= 0xDFFF || cp > 0x10FFFF;

}

function isSurrogatePair(cp1, cp2) {

return cp1 >= 0xD800 && cp1 <= 0xDBFF && cp2 >= 0xDC00 && cp2 <= 0xDFFF;

}

function getSurrogatePairCodePoint(cp1, cp2) {

return (cp1 - 0xD800) * 0x400 + 0x2400 + cp2;

}

//Preprocessor //NOTE: HTML input preprocessing //(see: www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#preprocessing-the-input-stream) var Preprocessor = module.exports = function (html) {

this.write(html);
//NOTE: one leading U+FEFF BYTE ORDER MARK character must be ignored if any are present in the input stream.
this.pos = this.html.charCodeAt(0) === $.BOM ? 0 : -1;
this.gapStack = [];
this.lastGapPos = -1;
this.skipNextNewLine = false;

};

Preprocessor.prototype.write = function (html) {

if (this.html) {
    this.html = this.html.substring(0, this.pos + 1) +
                html +
                this.html.substring(this.pos + 1, this.html.length);
}
else
    this.html = html;
this.lastCharPos = this.html.length - 1;

};

Preprocessor.prototype.advanceAndPeekCodePoint = function () {

this.pos++;
if (this.pos > this.lastCharPos)
    return $.EOF;
var cp = this.html.charCodeAt(this.pos);
//NOTE: any U+000A LINE FEED (LF) characters that immediately follow a U+000D CARRIAGE RETURN (CR) character
//must be ignored.
if (this.skipNextNewLine && cp === $.LINE_FEED) {
    this.skipNextNewLine = false;
    this._addGap();
    return this.advanceAndPeekCodePoint();
}
//NOTE: all U+000D CARRIAGE RETURN (CR) characters must be converted to U+000A LINE FEED (LF) characters
if (cp === $.CARRIAGE_RETURN) {
    this.skipNextNewLine = true;
    return $.LINE_FEED;
}
this.skipNextNewLine = false;
//OPTIMIZATION: first perform check if the code point in the allowed range that covers most common
//HTML input (e.g. ASCII codes) to avoid performance-cost operations for high-range code points.
return cp >= 0xD800 ? this._processHighRangeCodePoint(cp) : cp;

};

Preprocessor.prototype._processHighRangeCodePoint = function (cp) {

//NOTE: try to peek a surrogate pair
if (this.pos !== this.lastCharPos) {
    var nextCp = this.html.charCodeAt(this.pos + 1);
    if (isSurrogatePair(cp, nextCp)) {
        //NOTE: we have a surrogate pair. Peek pair character and recalculate code point.
        this.pos++;
        cp = getSurrogatePairCodePoint(cp, nextCp);
        //NOTE: add gap that should be avoided during retreat
        this._addGap();
    }
}
if (isReservedCodePoint(cp))
    cp = $.REPLACEMENT_CHARACTER;
return cp;

};

Preprocessor.prototype._addGap = function () {

this.gapStack.push(this.lastGapPos);
this.lastGapPos = this.pos;

};

Preprocessor.prototype.retreat = function () {

if (this.pos === this.lastGapPos) {
    this.lastGapPos = this.gapStack.pop();
    this.pos--;
}
this.pos--;

};