From 3d871d9086bbc5f79cd485dfb26b9d02a19cf2e3 Mon Sep 17 00:00:00 2001 From: Ingo Schwarze Date: Sun, 5 Jan 2014 20:26:36 +0000 Subject: Add an option -Q (quick) to mandocdb(8) for accelerated generation of reduced-size databases. Implement this by allowing the parsers to optionally abort the parse sequence after the NAME section. While here, garbage collect the unused void *arg attribute of struct mparse and mparse_alloc() and fix some errors in mandoc(3). This reduces the processing time of mandocdb(8) on /usr/share/man by a factor of 2 and the database size by a factor of 4. However, it still takes 5 times the time and 6 times the space of makewhatis(8), so more work is clearly needed. --- libman.h | 3 ++- libmandoc.h | 6 +++--- libmdoc.h | 3 ++- main.c | 4 ++-- man.c | 12 ++++++++++-- mandoc.3 | 55 ++++++++++++++++++++++++++++++++++++++++++++++--------- mandoc.h | 4 ++-- mandocdb.c | 20 ++++++++++++-------- mdoc.c | 14 +++++++++++--- read.c | 20 +++++++++++--------- 10 files changed, 101 insertions(+), 40 deletions(-) diff --git a/libman.h b/libman.h index f2ba6a12..7d6de382 100644 --- a/libman.h +++ b/libman.h @@ -1,4 +1,4 @@ -/* $Id: libman.h,v 1.56 2012/11/17 00:26:33 schwarze Exp $ */ +/* $Id: libman.h,v 1.57 2014/01/05 20:26:36 schwarze Exp $ */ /* * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons * @@ -24,6 +24,7 @@ enum man_next { struct man { struct mparse *parse; /* parse pointer */ + int quick; /* abort parse early */ int flags; /* parse flags */ #define MAN_HALT (1 << 0) /* badness happened: die */ #define MAN_ELINE (1 << 1) /* Next-line element scope. */ diff --git a/libmandoc.h b/libmandoc.h index ab243ff9..7a1b4219 100644 --- a/libmandoc.h +++ b/libmandoc.h @@ -1,4 +1,4 @@ -/* $Id: libmandoc.h,v 1.37 2014/01/05 19:10:56 joerg Exp $ */ +/* $Id: libmandoc.h,v 1.38 2014/01/05 20:26:36 schwarze Exp $ */ /* * Copyright (c) 2009, 2010, 2011, 2012 Kristaps Dzonsons * Copyright (c) 2013 Ingo Schwarze @@ -50,7 +50,7 @@ int mandoc_strntoi(const char *, size_t, int); const char *mandoc_a2msec(const char*); void mdoc_free(struct mdoc *); -struct mdoc *mdoc_alloc(struct roff *, struct mparse *, char *); +struct mdoc *mdoc_alloc(struct roff *, struct mparse *, char *, int); void mdoc_reset(struct mdoc *); int mdoc_parseln(struct mdoc *, int, char *, int); int mdoc_endparse(struct mdoc *); @@ -58,7 +58,7 @@ int mdoc_addspan(struct mdoc *, const struct tbl_span *); int mdoc_addeqn(struct mdoc *, const struct eqn *); void man_free(struct man *); -struct man *man_alloc(struct roff *, struct mparse *); +struct man *man_alloc(struct roff *, struct mparse *, int); void man_reset(struct man *); int man_parseln(struct man *, int, char *, int); int man_endparse(struct man *); diff --git a/libmdoc.h b/libmdoc.h index 3f14519d..cd0ce9d3 100644 --- a/libmdoc.h +++ b/libmdoc.h @@ -1,4 +1,4 @@ -/* $Id: libmdoc.h,v 1.82 2013/10/21 23:47:58 schwarze Exp $ */ +/* $Id: libmdoc.h,v 1.83 2014/01/05 20:26:36 schwarze Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons * Copyright (c) 2013 Ingo Schwarze @@ -26,6 +26,7 @@ enum mdoc_next { struct mdoc { struct mparse *parse; /* parse pointer */ char *defos; /* default argument for .Os */ + int quick; /* abort parse early */ int flags; /* parse flags */ #define MDOC_HALT (1 << 0) /* error in parse: halt */ #define MDOC_LITERAL (1 << 1) /* in a literal scope */ diff --git a/main.c b/main.c index 7e5c7a98..5038b639 100644 --- a/main.c +++ b/main.c @@ -1,4 +1,4 @@ -/* $Id: main.c,v 1.167 2012/11/19 17:22:26 schwarze Exp $ */ +/* $Id: main.c,v 1.168 2014/01/05 20:26:36 schwarze Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons * Copyright (c) 2010, 2011, 2012 Ingo Schwarze @@ -140,7 +140,7 @@ main(int argc, char *argv[]) /* NOTREACHED */ } - curp.mp = mparse_alloc(type, curp.wlevel, mmsg, &curp, defos); + curp.mp = mparse_alloc(type, curp.wlevel, mmsg, defos, 0); /* * Conditionally start up the lookaside buffer before parsing. diff --git a/man.c b/man.c index 72c6afab..b810ac7b 100644 --- a/man.c +++ b/man.c @@ -1,6 +1,7 @@ -/* $Id: man.c,v 1.122 2013/12/31 23:23:10 schwarze Exp $ */ +/* $Id: man.c,v 1.123 2014/01/05 20:26:36 schwarze Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons + * Copyright (c) 2013, 2014 Ingo Schwarze * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -97,7 +98,7 @@ man_free(struct man *man) struct man * -man_alloc(struct roff *roff, struct mparse *parse) +man_alloc(struct roff *roff, struct mparse *parse, int quick) { struct man *p; @@ -105,6 +106,7 @@ man_alloc(struct roff *roff, struct mparse *parse) man_hash_init(); p->parse = parse; + p->quick = quick; p->roff = roff; man_alloc1(p); @@ -604,6 +606,12 @@ man_pmacro(struct man *man, int ln, char *buf, int offs) if ( ! (*man_macros[tok].fp)(man, tok, ln, ppos, &offs, buf)) goto err; + /* In quick mode (for mandocdb), abort after the NAME section. */ + + if (man->quick && MAN_SH == tok && + strcmp(man->last->prev->child->string, "NAME")) + return(2); + /* * We weren't in a block-line scope when entering the * above-parsed macro, so return. diff --git a/mandoc.3 b/mandoc.3 index fe6503d5..db763749 100644 --- a/mandoc.3 +++ b/mandoc.3 @@ -1,4 +1,4 @@ -.\" $Id: mandoc.3,v 1.22 2013/10/06 17:01:52 schwarze Exp $ +.\" $Id: mandoc.3,v 1.23 2014/01/05 20:26:36 schwarze Exp $ .\" .\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons .\" Copyright (c) 2010 Ingo Schwarze @@ -15,7 +15,7 @@ .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. .\" -.Dd $Mdocdate: October 6 2013 $ +.Dd $Mdocdate: January 5 2014 $ .Dt MANDOC 3 .Os .Sh NAME @@ -50,8 +50,8 @@ .In mandoc.h .Ft "enum mandoc_esc" .Fo mandoc_escape -.Fa "const char const **end" -.Fa "const char const **start" +.Fa "const char **end" +.Fa "const char **start" .Fa "int *sz" .Fc .Ft "const struct man_meta *" @@ -97,16 +97,17 @@ .Fc .Ft void .Fo mparse_alloc -.Fa "enum mparset type" +.Fa "enum mparset inttype" .Fa "enum mandoclevel wlevel" -.Fa "mandocmsg msg" -.Fa "void *msgarg" +.Fa "mandocmsg mmsg" +.Fa "char *defos" +.Fa "int quick" .Fc .Ft void .Fo mparse_free .Fa "struct mparse *parse" .Fc -.Ft void +.Ft const char * .Fo mparse_getkeep .Fa "const struct mparse *parse" .Fc @@ -203,7 +204,7 @@ An escape sequence classification. A fatal error, error, or warning message during parsing. .It Vt "enum mandoclevel" A classification of an -.Vt "enum mandoclevel" +.Vt "enum mandocerr" as regards system operation. .It Vt "struct mchars" An opaque pointer to an object allowing for translation between @@ -352,6 +353,42 @@ implemented in .Pa mdoc.c . .It Fn mparse_alloc Allocate a parser. +The arguments have the following effect: +.Bl -tag -offset 5n -width inttype +.It Ar inttype +When set to +.Dv MPARSE_MDOC +or +.Dv MPARSE_MAN , +only that parser will be used. +With +.Dv MPARSE_AUTO , +the document type will be automatically detected. +.It Ar wlevel +Can be set to +.Dv MANDOCLEVEL_FATAL , +.Dv MANDOCLEVEL_ERROR , +or +.Dv MANDOCLEVEL_WARNING . +Messages below the selected level will be suppressed. +.It Ar mmsg +A callback function to handle errors and warnings. +See +.Pa main.c +for an example. +.It Ar defos +A default string for the +.Xr mdoc 7 +.Sq \&Os +macro, overriding the +.Dv OSNAME +preprocessor definition and the results of +.Xr uname 3 . +.It Ar quick +When set, parsing is aborted after the NAME section. +This is for example useful to quickly build minimal databases. +.El +.Pp The same parser may be used for multiple files so long as .Fn mparse_reset is called between parses. diff --git a/mandoc.h b/mandoc.h index 3f1f118a..9c209ee3 100644 --- a/mandoc.h +++ b/mandoc.h @@ -1,4 +1,4 @@ -/* $Id: mandoc.h,v 1.113 2014/01/02 16:29:55 schwarze Exp $ */ +/* $Id: mandoc.h,v 1.114 2014/01/05 20:26:36 schwarze Exp $ */ /* * Copyright (c) 2010, 2011 Kristaps Dzonsons * Copyright (c) 2012, 2013, 2014 Ingo Schwarze @@ -422,7 +422,7 @@ int mchars_spec2cp(const struct mchars *, const char *mchars_spec2str(const struct mchars *, const char *, size_t, size_t *); struct mparse *mparse_alloc(enum mparset, enum mandoclevel, - mandocmsg, void *, char *); + mandocmsg, char *, int); void mparse_free(struct mparse *); void mparse_keep(struct mparse *); enum mandoclevel mparse_readfd(struct mparse *, int, const char *); diff --git a/mandocdb.c b/mandocdb.c index 0effafbc..7068ddbc 100644 --- a/mandocdb.c +++ b/mandocdb.c @@ -1,4 +1,4 @@ -/* $Id: mandocdb.c,v 1.101 2014/01/05 04:48:40 schwarze Exp $ */ +/* $Id: mandocdb.c,v 1.102 2014/01/05 20:26:36 schwarze Exp $ */ /* * Copyright (c) 2011, 2012 Kristaps Dzonsons * Copyright (c) 2011, 2012, 2013, 2014 Ingo Schwarze @@ -167,8 +167,9 @@ static int treescan(void); static size_t utf8(unsigned int, char [7]); static char *progname; -static int use_all; /* use all found files */ static int nodb; /* no database changes */ +static int quick; /* abort the parse early */ +static int use_all; /* use all found files */ static int verb; /* print what we're doing */ static int warnings; /* warn about crap */ static int write_utf8; /* write UTF-8 output; else ASCII */ @@ -347,7 +348,7 @@ main(int argc, char *argv[]) path_arg = NULL; op = OP_DEFAULT; - while (-1 != (ch = getopt(argc, argv, "aC:d:nT:tu:vW"))) + while (-1 != (ch = getopt(argc, argv, "aC:d:nQT:tu:vW"))) switch (ch) { case ('a'): use_all = 1; @@ -365,6 +366,9 @@ main(int argc, char *argv[]) case ('n'): nodb = 1; break; + case ('Q'): + quick = 1; + break; case ('T'): if (strcmp(optarg, "utf8")) { fprintf(stderr, "-T%s: Unsupported " @@ -404,7 +408,7 @@ main(int argc, char *argv[]) exitcode = (int)MANDOCLEVEL_OK; mp = mparse_alloc(MPARSE_AUTO, - MANDOCLEVEL_FATAL, NULL, NULL, NULL); + MANDOCLEVEL_FATAL, NULL, NULL, quick); mc = mchars_alloc(); ohash_init(&mpages, 6, &mpages_info); @@ -494,11 +498,11 @@ out: ohash_delete(&mlinks); return(exitcode); usage: - fprintf(stderr, "usage: %s [-anvW] [-C file] [-Tutf8]\n" - " %s [-anvW] [-Tutf8] dir ...\n" - " %s [-nvW] [-Tutf8] -d dir [file ...]\n" + fprintf(stderr, "usage: %s [-anQvW] [-C file] [-Tutf8]\n" + " %s [-anQvW] [-Tutf8] dir ...\n" + " %s [-nQvW] [-Tutf8] -d dir [file ...]\n" " %s [-nvW] -u dir [file ...]\n" - " %s -t file ...\n", + " %s [-Q] -t file ...\n", progname, progname, progname, progname, progname); diff --git a/mdoc.c b/mdoc.c index 58100277..a586e11c 100644 --- a/mdoc.c +++ b/mdoc.c @@ -1,7 +1,7 @@ -/* $Id: mdoc.c,v 1.207 2013/12/31 23:23:11 schwarze Exp $ */ +/* $Id: mdoc.c,v 1.208 2014/01/05 20:26:36 schwarze Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons - * Copyright (c) 2010, 2012, 2013 Ingo Schwarze + * Copyright (c) 2010, 2012, 2013, 2014 Ingo Schwarze * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -197,7 +197,8 @@ mdoc_free(struct mdoc *mdoc) * Allocate volatile and non-volatile parse resources. */ struct mdoc * -mdoc_alloc(struct roff *roff, struct mparse *parse, char *defos) +mdoc_alloc(struct roff *roff, struct mparse *parse, + char *defos, int quick) { struct mdoc *p; @@ -205,6 +206,7 @@ mdoc_alloc(struct roff *roff, struct mparse *parse, char *defos) p->parse = parse; p->defos = defos; + p->quick = quick; p->roff = roff; mdoc_hash_init(); @@ -961,6 +963,12 @@ mdoc_pmacro(struct mdoc *mdoc, int ln, char *buf, int offs) if ( ! mdoc_macro(mdoc, tok, ln, sv, &offs, buf)) goto err; + /* In quick mode (for mandocdb), abort after the NAME section. */ + + if (mdoc->quick && MDOC_Sh == tok && + SEC_NAME != mdoc->last->sec) + return(2); + return(1); err: /* Error out. */ diff --git a/read.c b/read.c index 7106c52d..417200e7 100644 --- a/read.c +++ b/read.c @@ -1,4 +1,4 @@ -/* $Id: read.c,v 1.40 2014/01/02 16:29:55 schwarze Exp $ */ +/* $Id: read.c,v 1.41 2014/01/05 20:26:36 schwarze Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons * Copyright (c) 2010-2014 Ingo Schwarze @@ -60,10 +60,10 @@ struct mparse { struct roff *roff; /* roff parser (!NULL) */ int reparse_count; /* finite interp. stack */ mandocmsg mmsg; /* warning/error message handler */ - void *arg; /* argument to mmsg */ const char *file; struct buf *secondary; char *defos; /* default operating system */ + int quick; /* abort the parse early */ }; static void resize_buf(struct buf *, size_t); @@ -258,13 +258,14 @@ pset(const char *buf, int pos, struct mparse *curp) case (MPARSE_MDOC): if (NULL == curp->pmdoc) curp->pmdoc = mdoc_alloc(curp->roff, curp, - curp->defos); + curp->defos, curp->quick); assert(curp->pmdoc); curp->mdoc = curp->pmdoc; return; case (MPARSE_MAN): if (NULL == curp->pman) - curp->pman = man_alloc(curp->roff, curp); + curp->pman = man_alloc(curp->roff, curp, + curp->quick); assert(curp->pman); curp->man = curp->pman; return; @@ -275,14 +276,14 @@ pset(const char *buf, int pos, struct mparse *curp) if (pos >= 3 && 0 == memcmp(buf, ".Dd", 3)) { if (NULL == curp->pmdoc) curp->pmdoc = mdoc_alloc(curp->roff, curp, - curp->defos); + curp->defos, curp->quick); assert(curp->pmdoc); curp->mdoc = curp->pmdoc; return; } if (NULL == curp->pman) - curp->pman = man_alloc(curp->roff, curp); + curp->pman = man_alloc(curp->roff, curp, curp->quick); assert(curp->pman); curp->man = curp->pman; } @@ -560,7 +561,8 @@ rerun: if (0 == rc) { assert(MANDOCLEVEL_FATAL <= curp->file_status); break; - } + } else if (2 == rc) + break; /* Temporary buffers typically are not full. */ @@ -763,7 +765,7 @@ out: struct mparse * mparse_alloc(enum mparset inttype, enum mandoclevel wlevel, - mandocmsg mmsg, void *arg, char *defos) + mandocmsg mmsg, char *defos, int quick) { struct mparse *curp; @@ -773,9 +775,9 @@ mparse_alloc(enum mparset inttype, enum mandoclevel wlevel, curp->wlevel = wlevel; curp->mmsg = mmsg; - curp->arg = arg; curp->inttype = inttype; curp->defos = defos; + curp->quick = quick; curp->roff = roff_alloc(inttype, curp); return(curp); -- cgit v1.2.3-56-ge451