“use strict”; const whatwgEncoding = require(“whatwg-encoding”);

// html.spec.whatwg.org/#encoding-sniffing-algorithm module.exports = function sniffHTMLEncoding(buffer, options) {

let encoding = whatwgEncoding.getBOMEncoding(buffer); // see https://github.com/whatwg/html/issues/1910

if (options === undefined) {
  options = {};
}

if (encoding === null && options.transportLayerEncodingLabel !== undefined) {
  encoding = whatwgEncoding.labelToName(options.transportLayerEncodingLabel);
}

if (encoding === null) {
  encoding = prescanMetaCharset(buffer);
}

if (encoding === null && options.defaultEncoding !== undefined) {
  encoding = options.defaultEncoding;
}

if (encoding === null) {
  encoding = "windows-1252";
}

return encoding;

};

// html.spec.whatwg.org/multipage/syntax.html#prescan-a-byte-stream-to-determine-its-encoding function prescanMetaCharset(buffer) {

const l = Math.min(buffer.length, 1024);
for (let i = 0; i < l; i++) {
  let c = buffer[i];
  if (c === 0x3C) {
    // "<"
    let c1 = buffer[i + 1];
    let c2 = buffer[i + 2];
    const c3 = buffer[i + 3];
    const c4 = buffer[i + 4];
    const c5 = buffer[i + 5];
    // !-- (comment start)
    if (c1 === 0x21 && c2 === 0x2D && c3 === 0x2D) {
      i += 4;
      for (; i < l; i++) {
        c = buffer[i];
        c1 = buffer[i + 1];
        c2 = buffer[i + 2];
        // --> (comment end)
        if (c === 0x2D && c1 === 0x2D && c2 === 0x3E) {
          i += 2;
          break;
        }
      }
    } else if ((c1 === 0x4D || c1 === 0x6D) &&
       (c2 === 0x45 || c2 === 0x65) &&
       (c3 === 0x54 || c3 === 0x74) &&
       (c4 === 0x41 || c4 === 0x61) &&
       (isSpaceCharacter(c5) || c5 === 0x2F)) {
      // "meta" + space or /
      i += 6;
      let gotPragma = false;
      let needPragma = null;
      let charset = null;

      let attrRes;
      do {
        attrRes = getAttribute(buffer, i, l);
        if (attrRes.attr) {
          if (attrRes.attr.name === "http-equiv") {
            gotPragma = attrRes.attr.value === "content-type";
          } else if (attrRes.attr.name === "content" && !charset) {
            charset = extractCharacterEncodingFromMeta(attrRes.attr.value);
            if (charset !== null) {
              needPragma = true;
            }
          } else if (attrRes.attr.name === "charset") {
            charset = whatwgEncoding.labelToName(attrRes.attr.value);
            needPragma = false;
          }
        }
        i = attrRes.i;
      } while (attrRes.attr);

      if (needPragma === null) {
        continue;
      }
      if (needPragma === true && gotPragma === false) {
        continue;
      }
      if (charset === null) {
        continue;
      }

      if (charset === "UTF-16LE" || charset === "UTF-16BE") {
        charset = "UTF-8";
      }
      if (charset === "x-user-defined") {
        charset = "windows-1252";
      }

      return charset;
    } else if ((c1 >= 0x41 && c1 <= 0x5A) || (c1 >= 0x61 && c1 <= 0x7A)) {
      // a-z or A-Z
      for (i += 2; i < l; i++) {
        c = buffer[i];
        // space or >
        if (isSpaceCharacter(c) || c === 0x3E) {
          break;
        }
      }
      let attrRes;
      do {
        attrRes = getAttribute(buffer, i, l);
        i = attrRes.i;
      } while (attrRes.attr);
    } else if (c1 === 0x21 || c1 === 0x2F || c1 === 0x3F) {
      // ! or / or ?
      for (i += 2; i < l; i++) {
        c = buffer[i];
        // >
        if (c === 0x3E) {
          break;
        }
      }
    }
  }
}
return null;

}

// html.spec.whatwg.org/multipage/syntax.html#concept-get-attributes-when-sniffing function getAttribute(buffer, i, l) {

for (; i < l; i++) {
  let c = buffer[i];
  // space or /
  if (isSpaceCharacter(c) || c === 0x2F) {
    continue;
  }
  // ">"
  if (c === 0x3E) {
    i++;
    break;
  }
  let name = "";
  let value = "";
  nameLoop:for (; i < l; i++) {
    c = buffer[i];
    // "="
    if (c === 0x3D && name !== "") {
      i++;
      break;
    }
    // space
    if (isSpaceCharacter(c)) {
      for (i++; i < l; i++) {
        c = buffer[i];
        // space
        if (isSpaceCharacter(c)) {
          continue;
        }
        // not "="
        if (c !== 0x3D) {
          return { attr: { name, value }, i };
        }

        i++;
        break nameLoop;
      }
      break;
    }
    // / or >
    if (c === 0x2F || c === 0x3E) {
      return { attr: { name, value }, i };
    }
    // A-Z
    if (c >= 0x41 && c <= 0x5A) {
      name += String.fromCharCode(c + 0x20); // lowercase
    } else {
      name += String.fromCharCode(c);
    }
  }
  c = buffer[i];
  // space
  if (isSpaceCharacter(c)) {
    for (i++; i < l; i++) {
      c = buffer[i];
      // space
      if (isSpaceCharacter(c)) {
        continue;
      } else {
        break;
      }
    }
  }
  // " or '
  if (c === 0x22 || c === 0x27) {
    const quote = c;
    for (i++; i < l; i++) {
      c = buffer[i];

      if (c === quote) {
        i++;
        return { attr: { name, value }, i };
      }

      // A-Z
      if (c >= 0x41 && c <= 0x5A) {
        value += String.fromCharCode(c + 0x20); // lowercase
      } else {
        value += String.fromCharCode(c);
      }
    }
  }

  // >
  if (c === 0x3E) {
    return { attr: { name, value }, i };
  }

  // A-Z
  if (c >= 0x41 && c <= 0x5A) {
    value += String.fromCharCode(c + 0x20); // lowercase
  } else {
    value += String.fromCharCode(c);
  }

  for (i++; i < l; i++) {
    c = buffer[i];

    // space or >
    if (isSpaceCharacter(c) || c === 0x3E) {
      return { attr: { name, value }, i };
    }

    // A-Z
    if (c >= 0x41 && c <= 0x5A) {
      value += String.fromCharCode(c + 0x20); // lowercase
    } else {
      value += String.fromCharCode(c);
    }
  }
}
return { i };

}

function extractCharacterEncodingFromMeta(string) {

let position = 0;

while (true) {
  let subPosition = string.substring(position).search(/charset/i);

  if (subPosition === -1) {
    return null;
  }
  subPosition += "charset".length;

  while (isSpaceCharacter(string[subPosition].charCodeAt(0))) {
    ++subPosition;
  }

  if (string[subPosition] !== "=") {
    position = subPosition - 1;
    continue;
  }

  ++subPosition;

  while (isSpaceCharacter(string[subPosition].charCodeAt(0))) {
    ++subPosition;
  }

  position = subPosition;
  break;
}

if (string[position] === "\"" || string[position] === "'") {
  const nextIndex = string.indexOf(string[position], position + 1);

  if (nextIndex !== -1) {
    return whatwgEncoding.labelToName(string.substring(position + 1, nextIndex));
  }

  // It is an unmatched quotation mark
  return null;
}

if (string.length === position + 1) {
  return null;
}

let end = string.substring(position + 1).search(/\x09|\x0A|\x0C|\x0D|\x20|;/);
if (end === -1) {
  end = string.length;
}
return whatwgEncoding.labelToName(string.substring(position, end));

}

function isSpaceCharacter© {

return c === 0x09 || c === 0x0A || c === 0x0C || c === 0x0D || c === 0x20;

}