]> git.cameronkatri.com Git - mandoc.git/blob - chars.c
aa6de429dd3f72cdbd683648fa13846283397c01
[mandoc.git] / chars.c
1 /* $Id: chars.c,v 1.24 2010/07/26 13:59:00 kristaps Exp $ */
2 /*
3 * Copyright (c) 2009 Kristaps Dzonsons <kristaps@bsd.lv>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20
21 #include <assert.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <string.h>
25
26 #include "mandoc.h"
27 #include "chars.h"
28
29 #define PRINT_HI 126
30 #define PRINT_LO 32
31
32 struct ln {
33 struct ln *next;
34 const char *code;
35 const char *ascii;
36 int unicode;
37 int type;
38 #define CHARS_CHAR (1 << 0)
39 #define CHARS_STRING (1 << 1)
40 #define CHARS_BOTH (CHARS_CHAR | CHARS_STRING)
41 };
42
43 #define LINES_MAX 370
44
45 #define CHAR(in, ch, code) \
46 { NULL, (in), (ch), (code), CHARS_CHAR },
47 #define STRING(in, ch, code) \
48 { NULL, (in), (ch), (code), CHARS_STRING },
49 #define BOTH(in, ch, code) \
50 { NULL, (in), (ch), (code), CHARS_BOTH },
51
52 #define CHAR_TBL_START static struct ln lines[LINES_MAX] = {
53 #define CHAR_TBL_END };
54
55 #include "chars.in"
56
57 struct tbl {
58 enum chars type;
59 struct ln **htab;
60 };
61
62 static inline int match(const struct ln *,
63 const char *, size_t, int);
64 static const struct ln *find(struct tbl *, const char *, size_t, int);
65
66
67 void
68 chars_free(void *arg)
69 {
70 struct tbl *tab;
71
72 tab = (struct tbl *)arg;
73
74 free(tab->htab);
75 free(tab);
76 }
77
78
79 void *
80 chars_init(enum chars type)
81 {
82 struct tbl *tab;
83 struct ln **htab;
84 struct ln *pp;
85 int i, hash;
86
87 /*
88 * Constructs a very basic chaining hashtable. The hash routine
89 * is simply the integral value of the first character.
90 * Subsequent entries are chained in the order they're processed
91 * (they're in-line re-ordered during lookup).
92 */
93
94 tab = malloc(sizeof(struct tbl));
95 if (NULL == tab) {
96 perror(NULL);
97 exit(EXIT_FAILURE);
98 }
99
100 htab = calloc(PRINT_HI - PRINT_LO + 1, sizeof(struct ln **));
101 if (NULL == htab) {
102 perror(NULL);
103 exit(EXIT_FAILURE);
104 }
105
106 for (i = 0; i < LINES_MAX; i++) {
107 hash = (int)lines[i].code[0] - PRINT_LO;
108
109 if (NULL == (pp = htab[hash])) {
110 htab[hash] = &lines[i];
111 continue;
112 }
113
114 for ( ; pp->next; pp = pp->next)
115 /* Scan ahead. */ ;
116 pp->next = &lines[i];
117 }
118
119 tab->htab = htab;
120 tab->type = type;
121 return(tab);
122 }
123
124
125 /*
126 * Special character to Unicode codepoint.
127 */
128 int
129 chars_spec2cp(void *arg, const char *p, size_t sz)
130 {
131 const struct ln *ln;
132
133 ln = find((struct tbl *)arg, p, sz, CHARS_CHAR);
134 if (NULL == ln)
135 return(-1);
136 return(ln->unicode);
137 }
138
139
140 /*
141 * Reserved word to Unicode codepoint.
142 */
143 int
144 chars_res2cp(void *arg, const char *p, size_t sz)
145 {
146 const struct ln *ln;
147
148 ln = find((struct tbl *)arg, p, sz, CHARS_STRING);
149 if (NULL == ln)
150 return(-1);
151 return(ln->unicode);
152 }
153
154
155 /*
156 * Special character to string array.
157 */
158 const char *
159 chars_spec2str(void *arg, const char *p, size_t sz, size_t *rsz)
160 {
161 const struct ln *ln;
162
163 ln = find((struct tbl *)arg, p, sz, CHARS_CHAR);
164 if (NULL == ln)
165 return(NULL);
166
167 *rsz = strlen(ln->ascii);
168 return(ln->ascii);
169 }
170
171
172 /*
173 * Reserved word to string array.
174 */
175 const char *
176 chars_res2str(void *arg, const char *p, size_t sz, size_t *rsz)
177 {
178 const struct ln *ln;
179
180 ln = find((struct tbl *)arg, p, sz, CHARS_STRING);
181 if (NULL == ln)
182 return(NULL);
183
184 *rsz = strlen(ln->ascii);
185 return(ln->ascii);
186 }
187
188
189 static const struct ln *
190 find(struct tbl *tab, const char *p, size_t sz, int type)
191 {
192 struct ln *pp, *prev;
193 struct ln **htab;
194 int hash;
195
196 assert(p);
197 if (0 == sz)
198 return(NULL);
199
200 if (p[0] < PRINT_LO || p[0] > PRINT_HI)
201 return(NULL);
202
203 /*
204 * Lookup the symbol in the symbol hash. See ascii2htab for the
205 * hashtable specs. This dynamically re-orders the hash chain
206 * to optimise for repeat hits.
207 */
208
209 hash = (int)p[0] - PRINT_LO;
210 htab = tab->htab;
211
212 if (NULL == (pp = htab[hash]))
213 return(NULL);
214
215 for (prev = NULL; pp; pp = pp->next) {
216 if ( ! match(pp, p, sz, type)) {
217 prev = pp;
218 continue;
219 }
220
221 if (prev) {
222 prev->next = pp->next;
223 pp->next = htab[hash];
224 htab[hash] = pp;
225 }
226
227 return(pp);
228 }
229
230 return(NULL);
231 }
232
233
234 static inline int
235 match(const struct ln *ln, const char *p, size_t sz, int type)
236 {
237
238 if ( ! (ln->type & type))
239 return(0);
240 if (strncmp(ln->code, p, sz))
241 return(0);
242 return('\0' == ln->code[(int)sz]);
243 }