21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
22 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
23 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
24 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
25 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
26 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
27 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
28 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
29 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
30 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
31 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
32 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
33 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
34 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
36 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5,
52 uint32_t *ucs4,
size_t *clen)
73 uint8_t **s,
size_t *
len)
147 uint32_t off, uint32_t *nextoff)
167 uint32_t
len, uint32_t off, uint32_t *nextoff)
parserutils_error parserutils_charset_utf8_length(const uint8_t *s, size_t max, size_t *len)
Calculate the length (in characters) of a bounded UTF-8 string.
parserutils_error parserutils_charset_utf8_from_ucs4(uint32_t ucs4, uint8_t **s, size_t *len)
Convert a single UCS-4 character into a UTF-8 multibyte sequence.
parserutils_error parserutils_charset_utf8_next_paranoid(const uint8_t *s, uint32_t len, uint32_t off, uint32_t *nextoff)
Find next legal UTF-8 char in string.
parserutils_error parserutils_charset_utf8_to_ucs4(const uint8_t *s, size_t len, uint32_t *ucs4, size_t *clen)
Convert a UTF-8 multibyte sequence into a single UCS-4 character.
const uint8_t numContinuations[256]
Number of continuation bytes for a given start byte.
parserutils_error parserutils_charset_utf8_next(const uint8_t *s, uint32_t len, uint32_t off, uint32_t *nextoff)
Find next legal UTF-8 char in string.
parserutils_error parserutils_charset_utf8_prev(const uint8_t *s, uint32_t off, uint32_t *prevoff)
Find previous legal UTF-8 char in string.
parserutils_error parserutils_charset_utf8_char_byte_length(const uint8_t *s, size_t *len)
Calculate the length (in bytes) of a UTF-8 character.
UTF-8 manipulation functions (interface).
UTF-8 manipulation macros (implementation).
#define UTF8_TO_UCS4(s, len, ucs4, clen, error)
Convert a UTF-8 multibyte sequence into a single UCS-4 character.
#define UTF8_PREV(s, off, prevoff, error)
Find previous legal UTF-8 char in string.
#define UTF8_FROM_UCS4(ucs4, s, len, error)
Convert a single UCS-4 character into a UTF-8 multibyte sequence.
#define UTF8_NEXT(s, len, off, nextoff, error)
Find next legal UTF-8 char in string.
#define UTF8_LENGTH(s, max, len, error)
Calculate the length (in characters) of a bounded UTF-8 string.
#define UTF8_CHAR_BYTE_LENGTH(s, len, error)
Calculate the length (in bytes) of a UTF-8 character.
#define UTF8_NEXT_PARANOID(s, len, off, nextoff, error)
Skip to start of next sequence in UTF-8 input.