]> git.cameronkatri.com Git - mandoc.git/blob - chars.c
Note discarding of \m, \M, and \s in COMPATIBILITY sections.
[mandoc.git] / chars.c
1 /* $Id: chars.c,v 1.23 2010/07/18 12:10:08 kristaps Exp $ */
2 /*
3 * Copyright (c) 2009 Kristaps Dzonsons <kristaps@bsd.lv>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20
21 #include <assert.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <string.h>
25
26 #include "mandoc.h"
27 #include "chars.h"
28
29 #define PRINT_HI 126
30 #define PRINT_LO 32
31
32 struct ln {
33 struct ln *next;
34 const char *code;
35 const char *ascii;
36 size_t asciisz;
37 int unicode;
38 int type;
39 #define CHARS_CHAR (1 << 0)
40 #define CHARS_STRING (1 << 1)
41 #define CHARS_BOTH (CHARS_CHAR | CHARS_STRING)
42 };
43
44 #define LINES_MAX 370
45
46 #define CHAR(in, ch, chsz, code) \
47 { NULL, (in), (ch), (chsz), (code), CHARS_CHAR },
48 #define STRING(in, ch, chsz, code) \
49 { NULL, (in), (ch), (chsz), (code), CHARS_STRING },
50 #define BOTH(in, ch, chsz, code) \
51 { NULL, (in), (ch), (chsz), (code), CHARS_BOTH },
52
53 #define CHAR_TBL_START static struct ln lines[LINES_MAX] = {
54 #define CHAR_TBL_END };
55
56 #include "chars.in"
57
58 struct tbl {
59 enum chars type;
60 struct ln **htab;
61 };
62
63 static inline int match(const struct ln *,
64 const char *, size_t, int);
65 static const struct ln *find(struct tbl *, const char *, size_t, int);
66
67
68 void
69 chars_free(void *arg)
70 {
71 struct tbl *tab;
72
73 tab = (struct tbl *)arg;
74
75 free(tab->htab);
76 free(tab);
77 }
78
79
80 void *
81 chars_init(enum chars type)
82 {
83 struct tbl *tab;
84 struct ln **htab;
85 struct ln *pp;
86 int i, hash;
87
88 /*
89 * Constructs a very basic chaining hashtable. The hash routine
90 * is simply the integral value of the first character.
91 * Subsequent entries are chained in the order they're processed
92 * (they're in-line re-ordered during lookup).
93 */
94
95 tab = malloc(sizeof(struct tbl));
96 if (NULL == tab) {
97 perror(NULL);
98 exit(EXIT_FAILURE);
99 }
100
101 htab = calloc(PRINT_HI - PRINT_LO + 1, sizeof(struct ln **));
102 if (NULL == htab) {
103 perror(NULL);
104 exit(EXIT_FAILURE);
105 }
106
107 for (i = 0; i < LINES_MAX; i++) {
108 hash = (int)lines[i].code[0] - PRINT_LO;
109
110 if (NULL == (pp = htab[hash])) {
111 htab[hash] = &lines[i];
112 continue;
113 }
114
115 for ( ; pp->next; pp = pp->next)
116 /* Scan ahead. */ ;
117 pp->next = &lines[i];
118 }
119
120 tab->htab = htab;
121 tab->type = type;
122 return(tab);
123 }
124
125
126 /*
127 * Special character to Unicode codepoint.
128 */
129 int
130 chars_spec2cp(void *arg, const char *p, size_t sz)
131 {
132 const struct ln *ln;
133
134 ln = find((struct tbl *)arg, p, sz, CHARS_CHAR);
135 if (NULL == ln)
136 return(-1);
137 return(ln->unicode);
138 }
139
140
141 /*
142 * Reserved word to Unicode codepoint.
143 */
144 int
145 chars_res2cp(void *arg, const char *p, size_t sz)
146 {
147 const struct ln *ln;
148
149 ln = find((struct tbl *)arg, p, sz, CHARS_STRING);
150 if (NULL == ln)
151 return(-1);
152 return(ln->unicode);
153 }
154
155
156 /*
157 * Special character to string array.
158 */
159 const char *
160 chars_spec2str(void *arg, const char *p, size_t sz, size_t *rsz)
161 {
162 const struct ln *ln;
163
164 ln = find((struct tbl *)arg, p, sz, CHARS_CHAR);
165 if (NULL == ln)
166 return(NULL);
167
168 *rsz = ln->asciisz;
169 return(ln->ascii);
170 }
171
172
173 /*
174 * Reserved word to string array.
175 */
176 const char *
177 chars_res2str(void *arg, const char *p, size_t sz, size_t *rsz)
178 {
179 const struct ln *ln;
180
181 ln = find((struct tbl *)arg, p, sz, CHARS_STRING);
182 if (NULL == ln)
183 return(NULL);
184
185 *rsz = ln->asciisz;
186 return(ln->ascii);
187 }
188
189
190 static const struct ln *
191 find(struct tbl *tab, const char *p, size_t sz, int type)
192 {
193 struct ln *pp, *prev;
194 struct ln **htab;
195 int hash;
196
197 assert(p);
198 if (0 == sz)
199 return(NULL);
200
201 if (p[0] < PRINT_LO || p[0] > PRINT_HI)
202 return(NULL);
203
204 /*
205 * Lookup the symbol in the symbol hash. See ascii2htab for the
206 * hashtable specs. This dynamically re-orders the hash chain
207 * to optimise for repeat hits.
208 */
209
210 hash = (int)p[0] - PRINT_LO;
211 htab = tab->htab;
212
213 if (NULL == (pp = htab[hash]))
214 return(NULL);
215
216 for (prev = NULL; pp; pp = pp->next) {
217 if ( ! match(pp, p, sz, type)) {
218 prev = pp;
219 continue;
220 }
221
222 if (prev) {
223 prev->next = pp->next;
224 pp->next = htab[hash];
225 htab[hash] = pp;
226 }
227
228 return(pp);
229 }
230
231 return(NULL);
232 }
233
234
235 static inline int
236 match(const struct ln *ln, const char *p, size_t sz, int type)
237 {
238
239 if ( ! (ln->type & type))
240 return(0);
241 if (strncmp(ln->code, p, sz))
242 return(0);
243 return('\0' == ln->code[(int)sz]);
244 }