]> git.cameronkatri.com Git - mandoc.git/commitdiff
Improve mandocdb's ability to handle NAME sections by sucking the entire
authorKristaps Dzonsons <kristaps@bsd.lv>
Fri, 23 Mar 2012 06:52:17 +0000 (06:52 +0000)
committerKristaps Dzonsons <kristaps@bsd.lv>
Fri, 23 Mar 2012 06:52:17 +0000 (06:52 +0000)
shebang into a buffer and parsing it that way.  This improves on many
cruddy -man manuals in the wild.

mandocdb.c

index d6b160d50867825f9c4983d2b094aa8a3bbea3fd..e621c1d2a21138fa8c8fbb6736030af14c4099b0 100644 (file)
@@ -1,4 +1,4 @@
-/*     $Id: mandocdb.c,v 1.45 2012/03/23 05:45:45 kristaps Exp $ */
+/*     $Id: mandocdb.c,v 1.46 2012/03/23 06:52:17 kristaps Exp $ */
 /*
  * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
  * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
@@ -547,7 +547,7 @@ out:
 
 usage:
        fprintf(stderr,
-               "usage: %s [-avvv] [-C file] | dir ... | -t file ...\n"
+               "usage: %s [-av] [-C file] | dir ... | -t file ...\n"
                "                        -d dir [file ...] | "
                "-u dir [file ...]\n",
                progname);
@@ -822,9 +822,9 @@ index_merge(const struct of *of, struct mparse *mp,
                while (0 == (*files->seq)(files, &key, &val, seq)) {
                        seq = R_NEXT;
                        if (val.size)
-                               fprintf(stderr, "%s: probably "
-                                   "unreachable, title is %s\n",
-                                   (char *)val.data, (char *)key.data);
+                               WARNING((char *)val.data, basedir, 
+                                       "Probably unreachable, title "
+                                       "is %s", (char *)key.data);
                }
                (*files->close)(files);
        }
@@ -1323,8 +1323,8 @@ static int
 pman_node(MAN_ARGS)
 {
        const struct man_node *head, *body;
-       const char      *start, *sv;
-       size_t           sz;
+       char            *start, *sv, *title;
+       size_t           sz, titlesz;
 
        if (NULL == n)
                return(0);
@@ -1347,8 +1347,54 @@ pman_node(MAN_ARGS)
                                NULL != (body = body->child) &&
                                MAN_TEXT == body->type) {
 
-                       assert(body->string);
-                       start = sv = body->string;
+                       title = NULL;
+                       titlesz = 0;
+                       /*
+                        * Suck the entire NAME section into memory.
+                        * Yes, we might run away.
+                        * But too many manuals have big, spread-out
+                        * NAME sections over many lines.
+                        */
+                       for ( ; NULL != body; body = body->next) {
+                               if (MAN_TEXT != body->type)
+                                       break;
+                               if (0 == (sz = strlen(body->string)))
+                                       continue;
+                               title = mandoc_realloc
+                                       (title, titlesz + sz + 1);
+                               memcpy(title + titlesz, body->string, sz);
+                               titlesz += sz + 1;
+                               title[(int)titlesz - 1] = ' ';
+                       }
+                       if (NULL == title)
+                               return(0);
+
+                       title = mandoc_realloc(title, titlesz + 1);
+                       title[(int)titlesz] = '\0';
+
+                       /* Skip leading space.  */
+
+                       sv = title;
+                       while (isspace((unsigned char)*sv))
+                               sv++;
+
+                       if (0 == (sz = strlen(sv))) {
+                               free(title);
+                               return(0);
+                       }
+
+                       /* Erase trailing space. */
+
+                       start = &sv[sz - 1];
+                       while (start > sv && isspace((unsigned char)*start))
+                               *start-- = '\0';
+
+                       if (start == sv) {
+                               free(title);
+                               return(0);
+                       }
+
+                       start = sv;
 
                        /* 
                         * Go through a special heuristic dance here.
@@ -1386,10 +1432,11 @@ pman_node(MAN_ARGS)
 
                        if (sv == start) {
                                buf_append(buf, start);
+                               free(title);
                                return(1);
                        }
 
-                       while (' ' == *start)
+                       while (isspace((unsigned char)*start))
                                start++;
 
                        if (0 == strncmp(start, "-", 1))
@@ -1411,6 +1458,7 @@ pman_node(MAN_ARGS)
                        buf_appendb(buf, start, sz);
 
                        hash_put(hash, buf, TYPE_Nd);
+                       free(title);
                }
        }