]> git.cameronkatri.com Git - mandoc.git/blob - mandoc.c
new: escape sequence handling
[mandoc.git] / mandoc.c
1 /* $Id: mandoc.c,v 1.35 2010/09/04 20:18:53 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20
21 #include <sys/types.h>
22
23 #include <assert.h>
24 #include <ctype.h>
25 #include <stdlib.h>
26 #include <stdio.h>
27 #include <string.h>
28 #include <time.h>
29
30 #include "mandoc.h"
31 #include "libmandoc.h"
32
33 static int a2time(time_t *, const char *, const char *);
34
35
36 int
37 mandoc_special(char *p)
38 {
39 int len, i;
40 char term;
41 char *sv;
42
43 len = 0;
44 term = '\0';
45 sv = p;
46
47 assert('\\' == *p);
48 p++;
49
50 switch (*p++) {
51 #if 0
52 case ('Z'):
53 /* FALLTHROUGH */
54 case ('X'):
55 /* FALLTHROUGH */
56 case ('x'):
57 /* FALLTHROUGH */
58 case ('S'):
59 /* FALLTHROUGH */
60 case ('R'):
61 /* FALLTHROUGH */
62 case ('N'):
63 /* FALLTHROUGH */
64 case ('l'):
65 /* FALLTHROUGH */
66 case ('L'):
67 /* FALLTHROUGH */
68 case ('H'):
69 /* FALLTHROUGH */
70 case ('h'):
71 /* FALLTHROUGH */
72 case ('D'):
73 /* FALLTHROUGH */
74 case ('C'):
75 /* FALLTHROUGH */
76 case ('b'):
77 /* FALLTHROUGH */
78 case ('B'):
79 /* FALLTHROUGH */
80 case ('a'):
81 /* FALLTHROUGH */
82 case ('A'):
83 if (*p++ != '\'')
84 return(0);
85 term = '\'';
86 break;
87 #endif
88 case ('h'):
89 /* FALLTHROUGH */
90 case ('v'):
91 /* FALLTHROUGH */
92 case ('s'):
93 if (ASCII_HYPH == *p)
94 *p = '-';
95
96 i = 0;
97 if ('+' == *p || '-' == *p) {
98 p++;
99 i = 1;
100 }
101
102 switch (*p++) {
103 case ('('):
104 len = 2;
105 break;
106 case ('['):
107 term = ']';
108 break;
109 case ('\''):
110 term = '\'';
111 break;
112 case ('0'):
113 i = 1;
114 /* FALLTHROUGH */
115 default:
116 len = 1;
117 p--;
118 break;
119 }
120
121 if (ASCII_HYPH == *p)
122 *p = '-';
123 if ('+' == *p || '-' == *p) {
124 if (i)
125 return(0);
126 p++;
127 }
128
129 /* Handle embedded numerical subexp or escape. */
130
131 if ('(' == *p) {
132 while (*p && ')' != *p)
133 if ('\\' == *p++) {
134 i = mandoc_special(--p);
135 if (0 == i)
136 return(0);
137 p += i;
138 }
139
140 if (')' == *p++)
141 break;
142
143 return(0);
144 } else if ('\\' == *p) {
145 if (0 == (i = mandoc_special(p)))
146 return(0);
147 p += i;
148 }
149
150 break;
151 #if 0
152 case ('Y'):
153 /* FALLTHROUGH */
154 case ('V'):
155 /* FALLTHROUGH */
156 case ('$'):
157 /* FALLTHROUGH */
158 case ('n'):
159 /* FALLTHROUGH */
160 #endif
161 case ('k'):
162 /* FALLTHROUGH */
163 case ('M'):
164 /* FALLTHROUGH */
165 case ('m'):
166 /* FALLTHROUGH */
167 case ('f'):
168 /* FALLTHROUGH */
169 case ('F'):
170 /* FALLTHROUGH */
171 case ('*'):
172 switch (*p++) {
173 case ('('):
174 len = 2;
175 break;
176 case ('['):
177 term = ']';
178 break;
179 default:
180 len = 1;
181 p--;
182 break;
183 }
184 break;
185 case ('('):
186 len = 2;
187 break;
188 case ('['):
189 term = ']';
190 break;
191 case ('z'):
192 len = 1;
193 if ('\\' == *p) {
194 if (0 == (i = mandoc_special(p)))
195 return(0);
196 p += i;
197 return(*p ? (int)(p - sv) : 0);
198 }
199 break;
200 case ('o'):
201 /* FALLTHROUGH */
202 case ('w'):
203 if ('\'' == *p++) {
204 term = '\'';
205 break;
206 }
207 /* FALLTHROUGH */
208 default:
209 len = 1;
210 p--;
211 break;
212 }
213
214 if (term) {
215 for ( ; *p && term != *p; p++)
216 if (ASCII_HYPH == *p)
217 *p = '-';
218 return(*p ? (int)(p - sv) : 0);
219 }
220
221 for (i = 0; *p && i < len; i++, p++)
222 if (ASCII_HYPH == *p)
223 *p = '-';
224 return(i == len ? (int)(p - sv) : 0);
225 }
226
227
228 void *
229 mandoc_calloc(size_t num, size_t size)
230 {
231 void *ptr;
232
233 ptr = calloc(num, size);
234 if (NULL == ptr) {
235 perror(NULL);
236 exit((int)MANDOCLEVEL_SYSERR);
237 }
238
239 return(ptr);
240 }
241
242
243 void *
244 mandoc_malloc(size_t size)
245 {
246 void *ptr;
247
248 ptr = malloc(size);
249 if (NULL == ptr) {
250 perror(NULL);
251 exit((int)MANDOCLEVEL_SYSERR);
252 }
253
254 return(ptr);
255 }
256
257
258 void *
259 mandoc_realloc(void *ptr, size_t size)
260 {
261
262 ptr = realloc(ptr, size);
263 if (NULL == ptr) {
264 perror(NULL);
265 exit((int)MANDOCLEVEL_SYSERR);
266 }
267
268 return(ptr);
269 }
270
271
272 char *
273 mandoc_strdup(const char *ptr)
274 {
275 char *p;
276
277 p = strdup(ptr);
278 if (NULL == p) {
279 perror(NULL);
280 exit((int)MANDOCLEVEL_SYSERR);
281 }
282
283 return(p);
284 }
285
286
287 static int
288 a2time(time_t *t, const char *fmt, const char *p)
289 {
290 struct tm tm;
291 char *pp;
292
293 memset(&tm, 0, sizeof(struct tm));
294
295 pp = strptime(p, fmt, &tm);
296 if (NULL != pp && '\0' == *pp) {
297 *t = mktime(&tm);
298 return(1);
299 }
300
301 return(0);
302 }
303
304
305 /*
306 * Convert from a manual date string (see mdoc(7) and man(7)) into a
307 * date according to the stipulated date type.
308 */
309 time_t
310 mandoc_a2time(int flags, const char *p)
311 {
312 time_t t;
313
314 if (MTIME_MDOCDATE & flags) {
315 if (0 == strcmp(p, "$" "Mdocdate$"))
316 return(time(NULL));
317 if (a2time(&t, "$" "Mdocdate: %b %d %Y $", p))
318 return(t);
319 }
320
321 if (MTIME_CANONICAL & flags || MTIME_REDUCED & flags)
322 if (a2time(&t, "%b %d, %Y", p))
323 return(t);
324
325 if (MTIME_ISO_8601 & flags)
326 if (a2time(&t, "%Y-%m-%d", p))
327 return(t);
328
329 if (MTIME_REDUCED & flags) {
330 if (a2time(&t, "%d, %Y", p))
331 return(t);
332 if (a2time(&t, "%Y", p))
333 return(t);
334 }
335
336 return(0);
337 }
338
339
340 int
341 mandoc_eos(const char *p, size_t sz, int enclosed)
342 {
343 const char *q;
344 int found;
345
346 if (0 == sz)
347 return(0);
348
349 /*
350 * End-of-sentence recognition must include situations where
351 * some symbols, such as `)', allow prior EOS punctuation to
352 * propogate outward.
353 */
354
355 found = 0;
356 for (q = p + (int)sz - 1; q >= p; q--) {
357 switch (*q) {
358 case ('\"'):
359 /* FALLTHROUGH */
360 case ('\''):
361 /* FALLTHROUGH */
362 case (']'):
363 /* FALLTHROUGH */
364 case (')'):
365 if (0 == found)
366 enclosed = 1;
367 break;
368 case ('.'):
369 /* FALLTHROUGH */
370 case ('!'):
371 /* FALLTHROUGH */
372 case ('?'):
373 found = 1;
374 break;
375 default:
376 return(found && (!enclosed || isalnum((unsigned char)*q)));
377 }
378 }
379
380 return(found && !enclosed);
381 }
382
383
384 int
385 mandoc_hyph(const char *start, const char *c)
386 {
387
388 /*
389 * Choose whether to break at a hyphenated character. We only
390 * do this if it's free-standing within a word.
391 */
392
393 /* Skip first/last character of buffer. */
394 if (c == start || '\0' == *(c + 1))
395 return(0);
396 /* Skip first/last character of word. */
397 if ('\t' == *(c + 1) || '\t' == *(c - 1))
398 return(0);
399 if (' ' == *(c + 1) || ' ' == *(c - 1))
400 return(0);
401 /* Skip double invocations. */
402 if ('-' == *(c + 1) || '-' == *(c - 1))
403 return(0);
404 /* Skip escapes. */
405 if ('\\' == *(c - 1))
406 return(0);
407
408 return(1);
409 }