Package org.jsoup.parser
Class Tokeniser
java.lang.Object
org.jsoup.parser.Tokeniser
Readers the input stream into tokens.
-
Field Summary
FieldsModifier and TypeFieldDescription(package private) Token.Character
private StringBuilder
private String
private final int[]
(package private) Token.Comment
(package private) StringBuilder
(package private) Token.Doctype
private Token
(package private) Token.EndTag
private final ParseErrorList
private boolean
private String
private final int[]
private static final char[]
private final CharacterReader
(package private) static final char
(package private) Token.StartTag
private TokeniserState
(package private) Token.Tag
(package private) static final int[]
(package private) static final int
-
Constructor Summary
Constructors -
Method Summary
Modifier and TypeMethodDescription(package private) void
advanceTransition
(TokeniserState state) (package private) String
private void
characterReferenceError
(String message) (package private) int[]
consumeCharacterReference
(Character additionalAllowedCharacter, boolean inAttribute) (package private) void
(package private) void
(package private) void
(package private) Token.Tag
createTagPending
(boolean start) (package private) void
(package private) boolean
(package private) void
emit
(char c) (package private) void
emit
(char[] chars) (package private) void
emit
(int[] codepoints) (package private) void
(package private) void
(package private) void
(package private) void
(package private) void
(package private) void
eofError
(TokeniserState state) (package private) void
(package private) void
error
(TokeniserState state) (package private) TokeniserState
getState()
(package private) boolean
(package private) Token
read()
(package private) void
transition
(TokeniserState state) (package private) String
unescapeEntities
(boolean inAttribute) Utility method to consume reader and unescape entities found within.
-
Field Details
-
replacementChar
static final char replacementChar- See Also:
-
notCharRefCharsSorted
private static final char[] notCharRefCharsSorted -
win1252ExtensionsStart
static final int win1252ExtensionsStart- See Also:
-
win1252Extensions
static final int[] win1252Extensions -
reader
-
errors
-
state
-
emitPending
-
isEmitPending
private boolean isEmitPending -
charsString
-
charsBuilder
-
dataBuffer
StringBuilder dataBuffer -
tagPending
Token.Tag tagPending -
startPending
Token.StartTag startPending -
endPending
Token.EndTag endPending -
charPending
Token.Character charPending -
doctypePending
Token.Doctype doctypePending -
commentPending
Token.Comment commentPending -
lastStartTag
-
codepointHolder
private final int[] codepointHolder -
multipointHolder
private final int[] multipointHolder
-
-
Constructor Details
-
Tokeniser
Tokeniser(CharacterReader reader, ParseErrorList errors)
-
-
Method Details
-
read
Token read() -
emit
-
emit
-
emit
void emit(char[] chars) -
emit
void emit(int[] codepoints) -
emit
void emit(char c) -
getState
TokeniserState getState() -
transition
-
advanceTransition
-
consumeCharacterReference
-
createTagPending
-
emitTagPending
void emitTagPending() -
createCommentPending
void createCommentPending() -
emitCommentPending
void emitCommentPending() -
createBogusCommentPending
void createBogusCommentPending() -
createDoctypePending
void createDoctypePending() -
emitDoctypePending
void emitDoctypePending() -
createTempBuffer
void createTempBuffer() -
isAppropriateEndTagToken
boolean isAppropriateEndTagToken() -
appropriateEndTagName
String appropriateEndTagName() -
error
-
eofError
-
characterReferenceError
-
error
-
currentNodeInHtmlNS
boolean currentNodeInHtmlNS() -
unescapeEntities
Utility method to consume reader and unescape entities found within.- Parameters:
inAttribute
- if the text to be unescaped is in an attribute- Returns:
- unescaped string from reader
-