implement -Tman .Bk; OpenBSD rev. 1.10

[mandoc.git] / apropos.c
diff --git a/apropos.c b/apropos.c

index 6afd3ec814f2d6f5d1a37a0e76131ad01c9c8309..ac56a96109638400613d9c8f1b44dc7bb0c18f2d 100644 (file)
--- a/apropos.c
+++ b/apropos.c
@@ -1,6 +1,6 @@
-/*     $Id: apropos.c,v 1.2 2011/10/07 13:22:33 kristaps Exp $ */
+/*     $Id: apropos.c,v 1.33 2012/06/09 17:49:13 kristaps Exp $ */
  /*
  /*
-* Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+ * Copyright (c) 2012 Kristaps Dzonsons <kristaps@bsd.lv>
   *
   * Permission to use, copy, modify, and distribute this software for any
   * purpose with or without fee is hereby granted, provided that the above
   *
   * Permission to use, copy, modify, and distribute this software for any
   * purpose with or without fee is hereby granted, provided that the above
@@ -17,158 +17,31 @@
  #ifdef HAVE_CONFIG_H
  #include "config.h"
  #endif
  #ifdef HAVE_CONFIG_H
  #include "config.h"
  #endif
-
-#include <sys/types.h>
+#include <sys/param.h>
  
  #include <assert.h>
  
  #include <assert.h>
-#include <errno.h>
-#include <fcntl.h>
  #include <getopt.h>
  #include <getopt.h>
-#include <limits.h>
-#include <regex.h>
-#include <stdarg.h>
  #include <stdint.h>
  #include <stdio.h>
  #include <stdlib.h>
  #include <string.h>
  #include <unistd.h>
  
  #include <stdint.h>
  #include <stdio.h>
  #include <stdlib.h>
  #include <string.h>
  #include <unistd.h>
  
-#ifdef __linux__
-# include <db_185.h>
-#else
-# include <db.h>
-#endif
-
-#include "mandoc.h"
-
-#define        MAXRESULTS       256
-
-/* Bit-fields.  See mandocdb.8. */
-
-#define TYPE_NAME        0x01
-#define TYPE_FUNCTION    0x02
-#define TYPE_UTILITY     0x04
-#define TYPE_INCLUDES    0x08
-#define TYPE_VARIABLE    0x10
-#define TYPE_STANDARD    0x20
-#define TYPE_AUTHOR      0x40
-#define TYPE_CONFIG      0x80
-#define TYPE_DESC        0x100
-#define TYPE_XREF        0x200
-#define TYPE_PATH        0x400
-#define TYPE_ENV         0x800
-#define TYPE_ERR         0x1000
-
-enum   match {
-       MATCH_SUBSTR = 0,
-       MATCH_REGEX,
-       MATCH_EXACT
-};
-
-enum   sort {
-       SORT_TITLE = 0,
-       SORT_CAT,
-       SORT__MAX
-};
-
-struct opts {
-       enum sort        sort; /* output sorting */
-       const char      *arch; /* restrict to architecture */
-       const char      *cat; /* restrict to category */
-       int              types; /* only types in bitmask */
-       int              insens; /* case-insensitive match */
-       enum match       match; /* match type */
-};
-
-struct type {
-       int              mask;
-       const char      *name; /* command-line type name */
-};
-
-struct rec {
-       char            *file; /* file in file-system */
-       char            *cat; /* category (3p, 3, etc.) */
-       char            *title; /* title (FOO, etc.) */
-       char            *arch; /* arch (or empty string) */
-       char            *desc; /* description (from Nd) */
-       recno_t          rec; /* record in index */
-};
-
-struct res {
-       char            *arch; /* architecture */
-       char            *desc; /* free-form description */
-       char            *keyword; /* matched keyword */
-       int              types; /* bitmask of field selectors */
-       char            *cat; /* manual section */
-       char            *title; /* manual section */
-       char            *uri; /* formatted uri of file */
-       recno_t          rec; /* unique id of underlying manual */
-};
-
-struct state {
-       DB               *db; /* database */
-       DB               *idx; /* index */
-       const char       *dbf; /* database name */
-       const char       *idxf; /* index name */
-};
-
-static const char * const sorts[SORT__MAX] = {
-       "cat", /* SORT_CAT */
-       "title", /* SORT_TITLE */
-};
-
-static const struct type types[] = {
-       { TYPE_NAME, "name" },
-       { TYPE_FUNCTION, "func" },
-       { TYPE_UTILITY, "utility" },
-       { TYPE_INCLUDES, "incl" },
-       { TYPE_VARIABLE, "var" },
-       { TYPE_STANDARD, "stand" },
-       { TYPE_AUTHOR, "auth" },
-       { TYPE_CONFIG, "conf" },
-       { TYPE_DESC, "desc" },
-       { TYPE_XREF, "xref" },
-       { TYPE_PATH, "path" },
-       { TYPE_ENV, "env" },
-       { TYPE_ERR, "err" },
-       { INT_MAX, "all" },
-       { 0, NULL }
-};
-
-static void     buf_alloc(char **, size_t *, size_t);
-static void     buf_dup(struct mchars *, char **, const char *);
-static void     buf_redup(struct mchars *, char **, 
-                       size_t *, const char *);
-static int      sort_cat(const void *, const void *);
-static int      sort_title(const void *, const void *);
-static int      state_getrecord(struct state *, 
-                       recno_t, struct rec *);
-static void     state_output(const struct res *, int);
-static void     state_search(struct state *, 
-                       const struct opts *, char *);
-static void     usage(void);
-
-static char    *progname;
+#include "manpath.h"
+#include "mansearch.h"
  
  int
  main(int argc, char *argv[])
  {
  
  int
  main(int argc, char *argv[])
  {
-       BTREEINFO        info;
-       int              ch, i, rc;
-       const char      *dbf, *idxf;
-       struct state     state;
-       char            *q, *v;
-       struct opts      opts;
-       extern int       optind;
+       int              ch, whatis;
+       struct mansearch search;
+       size_t           i, sz;
+       struct manpage  *res;
+       char            *conf_file, *defpaths, *auxpaths;
+       struct manpaths  paths;
+       char            *progname;
         extern char     *optarg;
         extern char     *optarg;
-
-       memset(&opts, 0, sizeof(struct opts));
-       memset(&state, 0, sizeof(struct state));
-
-       dbf = "mandoc.db";
-       idxf = "mandoc.index";
-       q = NULL;
-       rc = EXIT_FAILURE;
+       extern int       optind;
  
         progname = strrchr(argv[0], '/');
         if (progname == NULL)
  
         progname = strrchr(argv[0], '/');
         if (progname == NULL)
@@ -176,494 +49,62 @@ main(int argc, char *argv[])
         else
                 ++progname;
  
         else
                 ++progname;
  
-       opts.match = MATCH_SUBSTR;
+       auxpaths = defpaths = conf_file = NULL;
+       memset(&paths, 0, sizeof(struct manpaths));
+       memset(&search, 0, sizeof(struct mansearch));
+       whatis = (0 == strcmp(progname, "whatis"));
  
  
-       while (-1 != (ch = getopt(argc, argv, "a:c:eIrs:t:"))) 
+       while (-1 != (ch = getopt(argc, argv, "C:M:m:S:s:")))
                 switch (ch) {
                 switch (ch) {
-               case ('a'):
-                       opts.arch = optarg;
-                       break;
-               case ('c'):
-                       opts.cat = optarg;
+               case ('C'):
+                       conf_file = optarg;
                         break;
                         break;
-               case ('e'):
-                       opts.match = MATCH_EXACT;
+               case ('M'):
+                       defpaths = optarg;
                         break;
                         break;
-               case ('I'):
-                       opts.insens = 1;
+               case ('m'):
+                       auxpaths = optarg;
                         break;
                         break;
-               case ('r'):
-                       opts.match = MATCH_REGEX;
+               case ('S'):
+                       search.arch = optarg;
                         break;
                 case ('s'):
                         break;
                 case ('s'):
-                       for (i = 0; i < SORT__MAX; i++) {
-                               if (strcmp(optarg, sorts[i])) 
-                                       continue;
-                               opts.sort = (enum sort)i;
-                               break;
-                       }
-
-                       if (i < SORT__MAX)
-                               break;
-
-                       fprintf(stderr, "%s: Bad sort\n", optarg);
-                       return(EXIT_FAILURE);
-               case ('t'):
-                       while (NULL != (v = strsep(&optarg, ","))) {
-                               if ('\0' == *v)
-                                       continue;
-                               for (i = 0; types[i].mask; i++) {
-                                       if (strcmp(types[i].name, v))
-                                               continue;
-                                       break;
-                               }
-                               if (0 == types[i].mask)
-                                       break;
-                               opts.types |= types[i].mask;
-                       }
-                       if (NULL == v)
-                               break;
-                       
-                       fprintf(stderr, "%s: Bad type\n", v);
-                       return(EXIT_FAILURE);
+                       search.sec = optarg;
+                       break;
                 default:
                 default:
-                       usage();
-                       return(EXIT_FAILURE);
+                       goto usage;
                 }
  
         argc -= optind;
         argv += optind;
  
                 }
  
         argc -= optind;
         argv += optind;
  
-       if (0 == argc || '\0' == **argv) {
-               usage();
-               goto out;
-       } else
-               q = *argv;
-
-       if (0 == opts.types)
-               opts.types = TYPE_NAME | TYPE_DESC;
+       if (0 == argc)
+               goto usage;
  
  
-       /*
-        * Configure databases.
-        * The keyword database is a btree that allows for duplicate
-        * entries.
-        * The index database is a recno.
-        */
+       search.deftype = whatis ? TYPE_Nm : TYPE_Nm | TYPE_Nd;
+       search.flags = whatis ? MANSEARCH_WHATIS : 0;
  
  
-       memset(&info, 0, sizeof(BTREEINFO));
-       info.flags = R_DUP;
+       manpath_parse(&paths, conf_file, defpaths, auxpaths);
+       ch = mansearch(&search, &paths, argc, argv, &res, &sz);
+       manpath_free(&paths);
  
  
-       state.db = dbopen(dbf, O_RDONLY, 0, DB_BTREE, &info);
-       if (NULL == state.db) {
-               perror(dbf);
-               goto out;
-       }
+       if (0 == ch)
+               goto usage;
  
  
-       state.idx = dbopen(idxf, O_RDONLY, 0, DB_RECNO, NULL);
-       if (NULL == state.idx) {
-               perror(idxf);
-               goto out;
+       for (i = 0; i < sz; i++) {
+               printf("%s - %s\n", res[i].file, res[i].desc);
+               free(res[i].desc);
         }
  
         }
  
-       /* Main search function. */
-
-       state_search(&state, &opts, q);
-
-       rc = EXIT_SUCCESS;
-out:
-       if (state.db)
-               (*state.db->close)(state.db);
-       if (state.idx)
-               (*state.idx->close)(state.idx);
-
-       return(rc);
-}
-
-static void
-state_search(struct state *p, const struct opts *opts, char *q)
-{
-       int              i, len, ch, rflags, dflag;
-       struct mchars   *mc;
-       char            *buf;
-       size_t           bufsz;
-       recno_t          rec;
-       uint32_t         fl;
-       DBT              key, val;
-       struct res       res[MAXRESULTS];
-       regex_t          reg;
-       regex_t         *regp;
-       char             filebuf[10];
-       struct rec       record;
-
-       len = 0;
-       buf = NULL;
-       bufsz = 0;
-       ch = 0;
-       regp = NULL;
-
-       /*
-        * Configure how we scan through results to see if we match:
-        * whether by regexp or exact matches.
-        */
-
-       switch (opts->match) {
-       case (MATCH_REGEX):
-               rflags = REG_EXTENDED | REG_NOSUB | 
-                       (opts->insens ? REG_ICASE : 0);
-
-               if (0 != regcomp(&reg, q, rflags)) {
-                       fprintf(stderr, "%s: Bad pattern\n", q);
-                       return;
-               }
-
-               regp = &reg;
-               dflag = R_FIRST;
-               break;
-       case (MATCH_EXACT):
-               key.data = q;
-               key.size = strlen(q) + 1;
-               dflag = R_CURSOR;
-               break;
-       default:
-               dflag = R_FIRST;
-               break;
-       }
-
-       if (NULL == (mc = mchars_alloc())) {
-               perror(NULL);
-               exit(EXIT_FAILURE);
-       }
-
-       /*
-        * Iterate over the entire keyword database.
-        * For each record, we must first translate the key into UTF-8.
-        * Following that, make sure it's acceptable.
-        * Lastly, add it to the available records.
-        */
-
-       while (len < MAXRESULTS) {
-               if ((ch = (*p->db->seq)(p->db, &key, &val, dflag)))
-                       break;
-
-               dflag = R_NEXT;
-
-               /* 
-                * Keys must be sized as such: the keyword must be
-                * non-empty (nil terminator plus one character) and the
-                * value must be 8 (recno_t---uint32_t---index reference
-                * and a uint32_t flag field).
-                */
-
-               if (key.size < 2 || 8 != val.size) {
-                       fprintf(stderr, "%s: Corrupt database\n", p->dbf);
-                       exit(EXIT_FAILURE);
-               }
-
-               buf_redup(mc, &buf, &bufsz, (char *)key.data);
-
-               fl = *(uint32_t *)val.data;
-
-               if ( ! (fl & opts->types))
-                       continue;
-
-               switch (opts->match) {
-               case (MATCH_REGEX):
-                       if (regexec(regp, buf, 0, NULL, 0))
-                               continue;
-                       break;
-               case (MATCH_EXACT):
-                       if (opts->insens && strcasecmp(buf, q))
-                               goto send;
-                       if ( ! opts->insens && strcmp(buf, q))
-                               goto send;
-                       break;
-               default:
-                       if (opts->insens && NULL == strcasestr(buf, q))
-                               continue;
-                       if ( ! opts->insens && NULL == strstr(buf, q))
-                               continue;
-                       break;
-               }
-
-               /*
-                * Now look up the file itself in our index.  The file's
-                * indexed by its recno for fast lookups.
-                */
-
-               memcpy(&rec, val.data + 4, sizeof(recno_t));
-
-               if ( ! state_getrecord(p, rec, &record))
-                       exit(EXIT_FAILURE);
-
-               /* If we're in a different section, skip... */
-
-               if (opts->cat && strcasecmp(opts->cat, record.cat))
-                       continue;
-               if (opts->arch && strcasecmp(opts->arch, record.arch))
-                       continue;
-
-               /* FIXME: this needs to be changed.  Ugh.  Linear. */
-
-               for (i = 0; i < len; i++)
-                       if (res[i].rec == record.rec)
-                               break;
-
-               if (i < len)
-                       continue;
-
-               /*
-                * Now we have our filename, keywords, types, and all
-                * other necessary information.  
-                * Process it and add it to our list of results.
-                */
-
-               filebuf[9] = '\0';
-               snprintf(filebuf, 10, "%u", record.rec);
-               assert('\0' == filebuf[9]);
-
-               res[len].rec = record.rec;
-               res[len].types = fl;
-
-               buf_dup(mc, &res[len].keyword, buf);
-               buf_dup(mc, &res[len].uri, filebuf);
-               buf_dup(mc, &res[len].cat, record.cat);
-               buf_dup(mc, &res[len].arch, record.arch);
-               buf_dup(mc, &res[len].title, record.title);
-               buf_dup(mc, &res[len].desc, record.desc);
-               len++;
-       }
-
-send:
-       if (ch < 0) {
-               perror(p->dbf);
-               exit(EXIT_FAILURE);
-       } 
-
-       /*
-        * Sort our results.
-        * We do this post-scan (instead of an in-line sort) because
-        * it's more or less the same in terms of run-time.  Assuming we
-        * sort in-line with a tree versus post:
-        * 
-        *  In-place: n * O(lg n)
-        *  After: n + O(n lg n)
-        *
-        * Whatever.  This also buys us simplicity.
-        */
-
-       switch (opts->sort) {
-       case (SORT_CAT):
-               qsort(res, len, sizeof(struct res), sort_cat);
-               break;
-       default:
-               qsort(res, len, sizeof(struct res), sort_title);
-               break;
-       }
-
-       state_output(res, len);
-
-       for (len-- ; len >= 0; len--) {
-               free(res[len].keyword);
-               free(res[len].title);
-               free(res[len].cat);
-               free(res[len].arch);
-               free(res[len].desc);
-               free(res[len].uri);
-       }
-
-       free(buf);
-       mchars_free(mc);
-
-       if (regp)
-               regfree(regp);
-}
-
-/*
- * Track allocated buffer size for buf_redup().
- */
-static inline void
-buf_alloc(char **buf, size_t *bufsz, size_t sz)
-{
-
-       if (sz < *bufsz) 
-               return;
-
-       *bufsz = sz + 1024;
-       if (NULL == (*buf = realloc(*buf, *bufsz))) {
-               perror(NULL);
-               exit(EXIT_FAILURE);
-       }
-}
-
-/*
- * Like buf_redup() but throwing away the buffer size.
- */
-static void
-buf_dup(struct mchars *mc, char **buf, const char *val)
-{
-       size_t           bufsz;
-
-       bufsz = 0;
-       *buf = NULL;
-       buf_redup(mc, buf, &bufsz, val);
-}
-
-/*
- * Normalise strings from the index and database.
- * These strings are escaped as defined by mandoc_char(7) along with
- * other goop in mandoc.h (e.g., soft hyphens).
- */
-static void
-buf_redup(struct mchars *mc, char **buf, 
-               size_t *bufsz, const char *val)
-{
-       size_t           sz;
-       const char      *seq, *cpp;
-       int              len, pos;
-       enum mandoc_esc  esc;
-       const char       rsv[] = { '\\', ASCII_NBRSP, ASCII_HYPH, '\0' };
-
-       /* Pre-allocate by the length of the input */
-
-       buf_alloc(buf, bufsz, strlen(val) + 1);
-
-       pos = 0;
-
-       while ('\0' != *val) {
-               /*
-                * Halt on the first escape sequence.
-                * This also halts on the end of string, in which case
-                * we just copy, fallthrough, and exit the loop.
-                */
-               if ((sz = strcspn(val, rsv)) > 0) {
-                       memcpy(&(*buf)[pos], val, sz);
-                       pos += (int)sz;
-                       val += (int)sz;
-               }
-
-               if (ASCII_HYPH == *val) {
-                       (*buf)[pos++] = '-';
-                       val++;
-                       continue;
-               } else if (ASCII_NBRSP == *val) {
-                       (*buf)[pos++] = ' ';
-                       val++;
-                       continue;
-               } else if ('\\' != *val)
-                       break;
-
-               /* Read past the slash. */
-
-               val++;
-
-               /*
-                * Parse the escape sequence and see if it's a
-                * predefined character or special character.
-                */
-
-               esc = mandoc_escape(&val, &seq, &len);
-               if (ESCAPE_ERROR == esc)
-                       break;
-
-               cpp = ESCAPE_SPECIAL == esc ? 
-                       mchars_spec2str(mc, seq, len, &sz) : NULL;
-
-               if (NULL == cpp)
-                       continue;
-
-               /* Copy the rendered glyph into the stream. */
-
-               buf_alloc(buf, bufsz, sz);
-
-               memcpy(&(*buf)[pos], cpp, sz);
-               pos += (int)sz;
-       }
-
-       (*buf)[pos] = '\0';
-}
-
-static void
-state_output(const struct res *res, int sz)
-{
-       int              i;
-
-       for (i = 0; i < sz; i++)
-               printf("%s(%s%s%s) - %s\n", res[i].title, 
-                               res[i].cat, 
-                               *res[i].arch ? "/" : "",
-                               *res[i].arch ? res[i].arch : "",
-                               res[i].desc);
-}
-
-static void
-usage(void)
-{
-
-       fprintf(stderr, "usage: %s "
-                       "[-eIr] "
-                       "[-a arch] "
-                       "[-c cat] "
-                       "[-s sort] "
-                       "[-t type[,...]] "
-                       "key\n", progname);
-}
-
-static int
-state_getrecord(struct state *p, recno_t rec, struct rec *rp)
-{
-       DBT             key, val;
-       size_t          sz;
-       int             rc;
-
-       key.data = &rec;
-       key.size = sizeof(recno_t);
-
-       rc = (*p->idx->get)(p->idx, &key, &val, 0);
-       if (rc < 0) {
-               perror(p->idxf);
-               return(0);
-       } else if (rc > 0)
-               goto err;
-
-       rp->file = (char *)val.data;
-       if ((sz = strlen(rp->file) + 1) >= val.size)
-               goto err;
-
-       rp->cat = (char *)val.data + (int)sz;
-       if ((sz += strlen(rp->cat) + 1) >= val.size)
-               goto err;
-
-       rp->title = (char *)val.data + (int)sz;
-       if ((sz += strlen(rp->title) + 1) >= val.size)
-               goto err;
-
-       rp->arch = (char *)val.data + (int)sz;
-       if ((sz += strlen(rp->arch) + 1) >= val.size)
-               goto err;
-
-       rp->desc = (char *)val.data + (int)sz;
-       rp->rec = rec;
-       return(1);
-err:
-       fprintf(stderr, "%s: Corrupt index\n", p->idxf);
-       return(0);
-}
-
-static int
-sort_title(const void *p1, const void *p2)
-{
-
-       return(strcmp(((const struct res *)p1)->title,
-                     ((const struct res *)p2)->title));
-}
-
-static int
-sort_cat(const void *p1, const void *p2)
-{
-       int              rc;
-
-       rc = strcmp(((const struct res *)p1)->cat,
-                       ((const struct res *)p2)->cat);
-
-       return(0 == rc ? sort_title(p1, p2) : rc);
+       free(res);
+       return(sz ? EXIT_SUCCESS : EXIT_FAILURE);
+usage:
+       fprintf(stderr, "usage: %s [-C conf] "
+                                 "[-M paths] "
+                                 "[-m paths] "
+                                 "[-S arch] "
+                                 "[-s section] "
+                                 "expr ...\n", 
+                                 progname);
+       return(EXIT_FAILURE);
  }
  }