To better match groff parsing, reject digits and some mathematical
[mandoc.git] / mandocd.c
1 /* $Id: mandocd.c,v 1.13 2022/04/14 16:43:44 schwarze Exp $ */
2 /*
3 * Copyright (c) 2017 Michael Stapelberg <stapelberg@debian.org>
4 * Copyright (c) 2017, 2019, 2021 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #if NEED_XPG4_2
21 #define _XPG4_2
22 #endif
23
24 #include <sys/types.h>
25 #include <sys/socket.h>
26
27 #if HAVE_ERR
28 #include <err.h>
29 #endif
30 #include <limits.h>
31 #include <stdint.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <unistd.h>
36
37 #include "mandoc.h"
38 #if DEBUG_MEMORY
39 #define DEBUG_NODEF 1
40 #include "mandoc_dbg.h"
41 #endif
42 #include "roff.h"
43 #include "mdoc.h"
44 #include "man.h"
45 #include "mandoc_parse.h"
46 #include "main.h"
47 #include "manconf.h"
48
49 enum outt {
50 OUTT_ASCII = 0,
51 OUTT_UTF8,
52 OUTT_HTML
53 };
54
55 static void process(struct mparse *, enum outt, void *);
56 static int read_fds(int, int *);
57 static void usage(void) __attribute__((__noreturn__));
58
59
60 #define NUM_FDS 3
61 static int
62 read_fds(int clientfd, int *fds)
63 {
64 struct msghdr msg;
65 struct iovec iov[1];
66 unsigned char dummy[1];
67 struct cmsghdr *cmsg;
68 int *walk;
69 int cnt;
70
71 /* Union used for alignment. */
72 union {
73 uint8_t controlbuf[CMSG_SPACE(NUM_FDS * sizeof(int))];
74 struct cmsghdr align;
75 } u;
76
77 memset(&msg, '\0', sizeof(msg));
78 msg.msg_control = u.controlbuf;
79 msg.msg_controllen = sizeof(u.controlbuf);
80
81 /*
82 * Read a dummy byte - sendmsg cannot send an empty message,
83 * even if we are only interested in the OOB data.
84 */
85
86 iov[0].iov_base = dummy;
87 iov[0].iov_len = sizeof(dummy);
88 msg.msg_iov = iov;
89 msg.msg_iovlen = 1;
90
91 switch (recvmsg(clientfd, &msg, 0)) {
92 case -1:
93 warn("recvmsg");
94 return -1;
95 case 0:
96 return 0;
97 default:
98 break;
99 }
100
101 if ((cmsg = CMSG_FIRSTHDR(&msg)) == NULL) {
102 warnx("CMSG_FIRSTHDR: missing control message");
103 return -1;
104 }
105
106 if (cmsg->cmsg_level != SOL_SOCKET ||
107 cmsg->cmsg_type != SCM_RIGHTS ||
108 cmsg->cmsg_len != CMSG_LEN(NUM_FDS * sizeof(int))) {
109 warnx("CMSG_FIRSTHDR: invalid control message");
110 return -1;
111 }
112
113 walk = (int *)CMSG_DATA(cmsg);
114 for (cnt = 0; cnt < NUM_FDS; cnt++)
115 fds[cnt] = *walk++;
116
117 return 1;
118 }
119
120 int
121 main(int argc, char *argv[])
122 {
123 struct manoutput options;
124 struct mparse *parser;
125 void *formatter;
126 const char *defos;
127 const char *errstr;
128 int clientfd;
129 int old_stdin;
130 int old_stdout;
131 int old_stderr;
132 int fds[3];
133 int state, opt;
134 enum outt outtype;
135
136 #if DEBUG_MEMORY
137 mandoc_dbg_init(argc, argv);
138 #endif
139
140 defos = NULL;
141 outtype = OUTT_ASCII;
142 while ((opt = getopt(argc, argv, "I:T:")) != -1) {
143 switch (opt) {
144 case 'I':
145 if (strncmp(optarg, "os=", 3) == 0)
146 defos = optarg + 3;
147 else {
148 warnx("-I %s: Bad argument", optarg);
149 usage();
150 }
151 break;
152 case 'T':
153 if (strcmp(optarg, "ascii") == 0)
154 outtype = OUTT_ASCII;
155 else if (strcmp(optarg, "utf8") == 0)
156 outtype = OUTT_UTF8;
157 else if (strcmp(optarg, "html") == 0)
158 outtype = OUTT_HTML;
159 else {
160 warnx("-T %s: Bad argument", optarg);
161 usage();
162 }
163 break;
164 default:
165 usage();
166 }
167 }
168
169 if (argc > 0) {
170 argc -= optind;
171 argv += optind;
172 }
173 if (argc != 1)
174 usage();
175
176 errstr = NULL;
177 clientfd = strtonum(argv[0], 3, INT_MAX, &errstr);
178 if (errstr)
179 errx(1, "file descriptor %s %s", argv[1], errstr);
180
181 mchars_alloc();
182 parser = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1 |
183 MPARSE_VALIDATE, MANDOC_OS_OTHER, defos);
184
185 memset(&options, 0, sizeof(options));
186 switch (outtype) {
187 case OUTT_ASCII:
188 formatter = ascii_alloc(&options);
189 break;
190 case OUTT_UTF8:
191 formatter = utf8_alloc(&options);
192 break;
193 case OUTT_HTML:
194 options.fragment = 1;
195 formatter = html_alloc(&options);
196 break;
197 }
198
199 state = 1; /* work to do */
200 fflush(stdout);
201 fflush(stderr);
202 if ((old_stdin = dup(STDIN_FILENO)) == -1 ||
203 (old_stdout = dup(STDOUT_FILENO)) == -1 ||
204 (old_stderr = dup(STDERR_FILENO)) == -1) {
205 warn("dup");
206 state = -1; /* error */
207 }
208
209 while (state == 1 && (state = read_fds(clientfd, fds)) == 1) {
210 if (dup2(fds[0], STDIN_FILENO) == -1 ||
211 dup2(fds[1], STDOUT_FILENO) == -1 ||
212 dup2(fds[2], STDERR_FILENO) == -1) {
213 warn("dup2");
214 state = -1;
215 break;
216 }
217
218 close(fds[0]);
219 close(fds[1]);
220 close(fds[2]);
221
222 process(parser, outtype, formatter);
223 mparse_reset(parser);
224 if (outtype == OUTT_HTML)
225 html_reset(formatter);
226
227 fflush(stdout);
228 fflush(stderr);
229 /* Close file descriptors by restoring the old ones. */
230 if (dup2(old_stderr, STDERR_FILENO) == -1 ||
231 dup2(old_stdout, STDOUT_FILENO) == -1 ||
232 dup2(old_stdin, STDIN_FILENO) == -1) {
233 warn("dup2");
234 state = -1;
235 break;
236 }
237 }
238
239 close(clientfd);
240 switch (outtype) {
241 case OUTT_ASCII:
242 case OUTT_UTF8:
243 ascii_free(formatter);
244 break;
245 case OUTT_HTML:
246 html_free(formatter);
247 break;
248 }
249 mparse_free(parser);
250 mchars_free();
251 #if DEBUG_MEMORY
252 mandoc_dbg_finish();
253 #endif
254 return state == -1 ? 1 : 0;
255 }
256
257 static void
258 process(struct mparse *parser, enum outt outtype, void *formatter)
259 {
260 struct roff_meta *meta;
261
262 mparse_readfd(parser, STDIN_FILENO, "<unixfd>");
263 meta = mparse_result(parser);
264 if (meta->macroset == MACROSET_MDOC) {
265 switch (outtype) {
266 case OUTT_ASCII:
267 case OUTT_UTF8:
268 terminal_mdoc(formatter, meta);
269 break;
270 case OUTT_HTML:
271 html_mdoc(formatter, meta);
272 break;
273 }
274 }
275 if (meta->macroset == MACROSET_MAN) {
276 switch (outtype) {
277 case OUTT_ASCII:
278 case OUTT_UTF8:
279 terminal_man(formatter, meta);
280 break;
281 case OUTT_HTML:
282 html_man(formatter, meta);
283 break;
284 }
285 }
286 }
287
288 void
289 usage(void)
290 {
291 fprintf(stderr, "usage: mandocd [-I os=name] [-T output] socket_fd\n");
292 exit(1);
293 }