Libparserutils
aliases.c
Go to the documentation of this file.
1/*
2 * This file is part of LibParserUtils.
3 * Licensed under the MIT License,
4 * http://www.opensource.org/licenses/mit-license.php
5 * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
6 */
7
8#include <ctype.h>
9#include <stdbool.h>
10#include <stddef.h>
11#include <stdio.h>
12#include <stdlib.h>
13#include <string.h>
14
15#include <assert.h>
16
17#include "charset/aliases.h"
18#include "utils/utils.h"
19
20/* Bring in the aliases tables */
21#include "aliases.inc"
22
23typedef struct {
24 size_t slen;
25 const char *s;
27
28
29#define IS_PUNCT_OR_SPACE(x) \
30 (!(((x) >= 'A' && (x) <= 'Z') || \
31 ((x) >= 'a' && (x) <= 'z') || \
32 ((x) >= '0' && (x) <= '9')))
33
34
35static int parserutils_charset_alias_match(const void *a, const void *b)
36{
38 parserutils_charset_aliases_alias *alias = (parserutils_charset_aliases_alias*)b;
39 size_t key_left = s->slen;
40 size_t alias_left = alias->name_len;
41 const char *s_alias = alias->name;
42 const char *s_key = s->s;
43 int cmpret;
44
45 while ((key_left > 0) && (alias_left > 0)) {
46 while ((key_left > 0) && IS_PUNCT_OR_SPACE(*s_key)) {
47 key_left--; s_key++;
48 }
49
50 if (key_left == 0)
51 break;
52
53 cmpret = tolower(*s_key) - *s_alias;
54
55 if (cmpret != 0) {
56 return cmpret;
57 }
58
59 key_left--;
60 s_key++;
61 alias_left--;
62 s_alias++;
63 }
64
65 while ((key_left > 0) && IS_PUNCT_OR_SPACE(*s_key)) {
66 key_left--; s_key++;
67 }
68
69 return key_left - alias_left;
70}
71
80 const char *alias, size_t len)
81{
82 parserutils_charset_aliases_alias *c;
84
85 s.slen = len;
86 s.s = alias;
87
88 c = (parserutils_charset_aliases_alias*)bsearch(&s,
89 &charset_aliases[0],
90 charset_aliases_count,
91 sizeof(parserutils_charset_aliases_alias),
93
94 if (c == NULL)
95 return NULL;
96
97 return c->canon;
98}
99
107uint16_t parserutils_charset_mibenum_from_name(const char *alias, size_t len)
108{
110
111 if (alias == NULL)
112 return 0;
113
115 if (c == NULL)
116 return 0;
117
118 return c->mib_enum;
119}
120
127const char *parserutils_charset_mibenum_to_name(uint16_t mibenum)
128{
129 int i;
131
132 for (i = 0; i < charset_aliases_canon_count; ++i) {
133 c = &canonical_charset_names[i];
134 if (c->mib_enum == mibenum)
135 return c->name;
136 }
137
138 return NULL;
139}
140
148{
149 return MIBENUM_IS_UNICODE(mibenum);
150}
parserutils_charset_aliases_canon * parserutils__charset_alias_canonicalise(const char *alias, size_t len)
Retrieve the canonical form of an alias name.
Definition: aliases.c:79
bool parserutils_charset_mibenum_is_unicode(uint16_t mibenum)
Detect if a parserutils_charset is Unicode.
Definition: aliases.c:147
#define IS_PUNCT_OR_SPACE(x)
Definition: aliases.c:29
uint16_t parserutils_charset_mibenum_from_name(const char *alias, size_t len)
Retrieve the MIB enum value assigned to an encoding name.
Definition: aliases.c:107
const char * parserutils_charset_mibenum_to_name(uint16_t mibenum)
Retrieve the canonical name of an encoding from the MIB enum.
Definition: aliases.c:127
static int parserutils_charset_alias_match(const void *a, const void *b)
Definition: aliases.c:35
size_t len
Definition: codec_8859.c:23
size_t slen
Definition: aliases.c:24
const char * s
Definition: aliases.c:25