apropos_db.c

   1 /*      $Id: apropos_db.c,v 1.18 2011/12/01 23:55:58 kristaps Exp $ */
   2 /*
   3  * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
   4  * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
   5  *
   6  * Permission to use, copy, modify, and distribute this software for any
   7  * purpose with or without fee is hereby granted, provided that the above
   8  * copyright notice and this permission notice appear in all copies.
   9  *
  10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  17  */
  18 #include <assert.h>
  19 #include <fcntl.h>
  20 #include <regex.h>
  21 #include <stdarg.h>
  22 #include <stdint.h>
  23 #include <stdlib.h>
  24 #include <string.h>
  25 #include <unistd.h>
  26
  27 #ifdef __linux__
  28 # include <db_185.h>
  29 #else
  30 # include <db.h>
  31 #endif
  32
  33 #include "mandocdb.h"
  34 #include "apropos_db.h"
  35 #include "mandoc.h"
  36
  37 struct  rec {
  38         struct res       res; /* resulting record info */
  39         /*
  40          * Maintain a binary tree for checking the uniqueness of `rec'
  41          * when adding elements to the results array.
  42          * Since the results array is dynamic, use offset in the array
  43          * instead of a pointer to the structure.
  44          */
  45         int              lhs;
  46         int              rhs;
  47         int              matched; /* expression is true */
  48         int             *matches; /* partial truth evaluations */
  49 };
  50
  51 struct  expr {
  52         int              regex; /* is regex? */
  53         int              index; /* index in match array */
  54         uint64_t         mask; /* type-mask */
  55         int              and; /* is rhs of logical AND? */
  56         char            *v; /* search value */
  57         regex_t          re; /* compiled re, if regex */
  58         struct expr     *next; /* next in sequence */
  59         struct expr     *subexpr;
  60 };
  61
  62 struct  type {
  63         uint64_t         mask;
  64         const char      *name;
  65 };
  66
  67 struct  rectree {
  68         struct rec      *node; /* record array for dir tree */
  69         int              len; /* length of record array */
  70 };
  71
  72 static  const struct type types[] = {
  73         { TYPE_An, "An" },
  74         { TYPE_Ar, "Ar" },
  75         { TYPE_At, "At" },
  76         { TYPE_Bsx, "Bsx" },
  77         { TYPE_Bx, "Bx" },
  78         { TYPE_Cd, "Cd" },
  79         { TYPE_Cm, "Cm" },
  80         { TYPE_Dv, "Dv" },
  81         { TYPE_Dx, "Dx" },
  82         { TYPE_Em, "Em" },
  83         { TYPE_Er, "Er" },
  84         { TYPE_Ev, "Ev" },
  85         { TYPE_Fa, "Fa" },
  86         { TYPE_Fl, "Fl" },
  87         { TYPE_Fn, "Fn" },
  88         { TYPE_Fn, "Fo" },
  89         { TYPE_Ft, "Ft" },
  90         { TYPE_Fx, "Fx" },
  91         { TYPE_Ic, "Ic" },
  92         { TYPE_In, "In" },
  93         { TYPE_Lb, "Lb" },
  94         { TYPE_Li, "Li" },
  95         { TYPE_Lk, "Lk" },
  96         { TYPE_Ms, "Ms" },
  97         { TYPE_Mt, "Mt" },
  98         { TYPE_Nd, "Nd" },
  99         { TYPE_Nm, "Nm" },
 100         { TYPE_Nx, "Nx" },
 101         { TYPE_Ox, "Ox" },
 102         { TYPE_Pa, "Pa" },
 103         { TYPE_Rs, "Rs" },
 104         { TYPE_Sh, "Sh" },
 105         { TYPE_Ss, "Ss" },
 106         { TYPE_St, "St" },
 107         { TYPE_Sy, "Sy" },
 108         { TYPE_Tn, "Tn" },
 109         { TYPE_Va, "Va" },
 110         { TYPE_Va, "Vt" },
 111         { TYPE_Xr, "Xr" },
 112         { INT_MAX, "any" },
 113         { 0, NULL }
 114 };
 115
 116 static  DB      *btree_open(void);
 117 static  int      btree_read(const DBT *, const DBT *,
 118                         const struct mchars *,
 119                         struct db_val *, char **);
 120 static  int      expreval(const struct expr *, int *);
 121 static  void     exprexec(const struct expr *,
 122                         const char *, uint64_t, struct rec *);
 123 static  int      exprmark(const struct expr *,
 124                         const char *, uint64_t, int *);
 125 static  struct expr *exprexpr(int, char *[], int *, int *, size_t *);
 126 static  struct expr *exprterm(char *, int);
 127 static  DB      *index_open(void);
 128 static  int      index_read(const DBT *, const DBT *, int,
 129                         const struct mchars *, struct rec *);
 130 static  void     norm_string(const char *,
 131                         const struct mchars *, char **);
 132 static  size_t   norm_utf8(unsigned int, char[7]);
 133 static  void     recfree(struct rec *);
 134 static  int      single_search(struct rectree *, const struct opts *,
 135                         const struct expr *, size_t terms,
 136                         struct mchars *, int);
 137
 138 /*
 139  * Open the keyword mandoc-db database.
 140  */
 141 static DB *
 142 btree_open(void)
 143 {
 144         BTREEINFO        info;
 145         DB              *db;
 146
 147         memset(&info, 0, sizeof(BTREEINFO));
 148         info.flags = R_DUP;
 149
 150         db = dbopen(MANDOC_DB, O_RDONLY, 0, DB_BTREE, &info);
 151         if (NULL != db)
 152                 return(db);
 153
 154         return(NULL);
 155 }
 156
 157 /*
 158  * Read a keyword from the database and normalise it.
 159  * Return 0 if the database is insane, else 1.
 160  */
 161 static int
 162 btree_read(const DBT *k, const DBT *v,
 163                 const struct mchars *mc,
 164                 struct db_val *dbv, char **buf)
 165 {
 166         const struct db_val *vp;
 167
 168         /* Are our sizes sane? */
 169         if (k->size < 2 || sizeof(struct db_val) != v->size)
 170                 return(0);
 171
 172         /* Is our string nil-terminated? */
 173         if ('\0' != ((const char *)k->data)[(int)k->size - 1])
 174                 return(0);
 175
 176         vp = v->data;
 177         norm_string((const char *)k->data, mc, buf);
 178         dbv->rec = betoh32(vp->rec);
 179         dbv->mask = betoh64(vp->mask);
 180         return(1);
 181 }
 182
 183 /*
 184  * Take a Unicode codepoint and produce its UTF-8 encoding.
 185  * This isn't the best way to do this, but it works.
 186  * The magic numbers are from the UTF-8 packaging.
 187  * They're not as scary as they seem: read the UTF-8 spec for details.
 188  */
 189 static size_t
 190 norm_utf8(unsigned int cp, char out[7])
 191 {
 192         size_t           rc;
 193
 194         rc = 0;
 195
 196         if (cp <= 0x0000007F) {
 197                 rc = 1;
 198                 out[0] = (char)cp;
 199         } else if (cp <= 0x000007FF) {
 200                 rc = 2;
 201                 out[0] = (cp >> 6  & 31) | 192;
 202                 out[1] = (cp       & 63) | 128;
 203         } else if (cp <= 0x0000FFFF) {
 204                 rc = 3;
 205                 out[0] = (cp >> 12 & 15) | 224;
 206                 out[1] = (cp >> 6  & 63) | 128;
 207                 out[2] = (cp       & 63) | 128;
 208         } else if (cp <= 0x001FFFFF) {
 209                 rc = 4;
 210                 out[0] = (cp >> 18 & 7) | 240;
 211                 out[1] = (cp >> 12 & 63) | 128;
 212                 out[2] = (cp >> 6  & 63) | 128;
 213                 out[3] = (cp       & 63) | 128;
 214         } else if (cp <= 0x03FFFFFF) {
 215                 rc = 5;
 216                 out[0] = (cp >> 24 & 3) | 248;
 217                 out[1] = (cp >> 18 & 63) | 128;
 218                 out[2] = (cp >> 12 & 63) | 128;
 219                 out[3] = (cp >> 6  & 63) | 128;
 220                 out[4] = (cp       & 63) | 128;
 221         } else if (cp <= 0x7FFFFFFF) {
 222                 rc = 6;
 223                 out[0] = (cp >> 30 & 1) | 252;
 224                 out[1] = (cp >> 24 & 63) | 128;
 225                 out[2] = (cp >> 18 & 63) | 128;
 226                 out[3] = (cp >> 12 & 63) | 128;
 227                 out[4] = (cp >> 6  & 63) | 128;
 228                 out[5] = (cp       & 63) | 128;
 229         } else
 230                 return(0);
 231
 232         out[rc] = '\0';
 233         return(rc);
 234 }
 235
 236 /*
 237  * Normalise strings from the index and database.
 238  * These strings are escaped as defined by mandoc_char(7) along with
 239  * other goop in mandoc.h (e.g., soft hyphens).
 240  * This function normalises these into a nice UTF-8 string.
 241  * Returns 0 if the database is fucked.
 242  */
 243 static void
 244 norm_string(const char *val, const struct mchars *mc, char **buf)
 245 {
 246         size_t            sz, bsz;
 247         char              utfbuf[7];
 248         const char       *seq, *cpp;
 249         int               len, u, pos;
 250         enum mandoc_esc   esc;
 251         static const char res[] = { '\\', '\t',
 252                                 ASCII_NBRSP, ASCII_HYPH, '\0' };
 253
 254         /* Pre-allocate by the length of the input */
 255
 256         bsz = strlen(val) + 1;
 257         *buf = mandoc_realloc(*buf, bsz);
 258         pos = 0;
 259
 260         while ('\0' != *val) {
 261                 /*
 262                  * Halt on the first escape sequence.
 263                  * This also halts on the end of string, in which case
 264                  * we just copy, fallthrough, and exit the loop.
 265                  */
 266                 if ((sz = strcspn(val, res)) > 0) {
 267                         memcpy(&(*buf)[pos], val, sz);
 268                         pos += (int)sz;
 269                         val += (int)sz;
 270                 }
 271
 272                 if (ASCII_HYPH == *val) {
 273                         (*buf)[pos++] = '-';
 274                         val++;
 275                         continue;
 276                 } else if ('\t' == *val || ASCII_NBRSP == *val) {
 277                         (*buf)[pos++] = ' ';
 278                         val++;
 279                         continue;
 280                 } else if ('\\' != *val)
 281                         break;
 282
 283                 /* Read past the slash. */
 284
 285                 val++;
 286                 u = 0;
 287
 288                 /*
 289                  * Parse the escape sequence and see if it's a
 290                  * predefined character or special character.
 291                  */
 292
 293                 esc = mandoc_escape(&val, &seq, &len);
 294                 if (ESCAPE_ERROR == esc)
 295                         break;
 296
 297                 /*
 298                  * XXX - this just does UTF-8, but we need to know
 299                  * beforehand whether we should do text substitution.
 300                  */
 301
 302                 switch (esc) {
 303                 case (ESCAPE_SPECIAL):
 304                         if (0 != (u = mchars_spec2cp(mc, seq, len)))
 305                                 break;
 306                         /* FALLTHROUGH */
 307                 default:
 308                         continue;
 309                 }
 310
 311                 /*
 312                  * If we have a Unicode codepoint, try to convert that
 313                  * to a UTF-8 byte string.
 314                  */
 315
 316                 cpp = utfbuf;
 317                 if (0 == (sz = norm_utf8(u, utfbuf)))
 318                         continue;
 319
 320                 /* Copy the rendered glyph into the stream. */
 321
 322                 sz = strlen(cpp);
 323                 bsz += sz;
 324
 325                 *buf = mandoc_realloc(*buf, bsz);
 326
 327                 memcpy(&(*buf)[pos], cpp, sz);
 328                 pos += (int)sz;
 329         }
 330
 331         (*buf)[pos] = '\0';
 332 }
 333
 334 /*
 335  * Open the filename-index mandoc-db database.
 336  * Returns NULL if opening failed.
 337  */
 338 static DB *
 339 index_open(void)
 340 {
 341         DB              *db;
 342
 343         db = dbopen(MANDOC_IDX, O_RDONLY, 0, DB_RECNO, NULL);
 344         if (NULL != db)
 345                 return(db);
 346
 347         return(NULL);
 348 }
 349
 350 /*
 351  * Safely unpack from an index file record into the structure.
 352  * Returns 1 if an entry was unpacked, 0 if the database is insane.
 353  */
 354 static int
 355 index_read(const DBT *key, const DBT *val, int index,
 356                 const struct mchars *mc, struct rec *rec)
 357 {
 358         size_t           left;
 359         char            *np, *cp;
 360
 361 #define INDEX_BREAD(_dst) \
 362         do { \
 363                 if (NULL == (np = memchr(cp, '\0', left))) \
 364                         return(0); \
 365                 norm_string(cp, mc, &(_dst)); \
 366                 left -= (np - cp) + 1; \
 367                 cp = np + 1; \
 368         } while (/* CONSTCOND */ 0)
 369
 370         left = val->size;
 371         cp = (char *)val->data;
 372
 373         rec->res.rec = *(recno_t *)key->data;
 374         rec->res.volume = index;
 375
 376         INDEX_BREAD(rec->res.type);
 377         INDEX_BREAD(rec->res.file);
 378         INDEX_BREAD(rec->res.cat);
 379         INDEX_BREAD(rec->res.title);
 380         INDEX_BREAD(rec->res.arch);
 381         INDEX_BREAD(rec->res.desc);
 382         return(1);
 383 }
 384
 385 /*
 386  * Search mandocdb databases in paths for expression "expr".
 387  * Filter out by "opts".
 388  * Call "res" with the results, which may be zero.
 389  * Return 0 if there was a database error, else return 1.
 390  */
 391 int
 392 apropos_search(int pathsz, char **paths, const struct opts *opts,
 393                 const struct expr *expr, size_t terms, void *arg,
 394                 void (*res)(struct res *, size_t, void *))
 395 {
 396         struct rectree   tree;
 397         struct mchars   *mc;
 398         struct res      *ress;
 399         int              i, mlen, rc;
 400
 401         memset(&tree, 0, sizeof(struct rectree));
 402
 403         rc = 0;
 404         mc = mchars_alloc();
 405
 406         /*
 407          * Main loop.  Change into the directory containing manpage
 408          * databases.  Run our expession over each database in the set.
 409          */
 410
 411         for (i = 0; i < pathsz; i++) {
 412                 if (chdir(paths[i]))
 413                         continue;
 414                 if ( ! single_search(&tree, opts, expr, terms, mc, i))
 415                         goto out;
 416         }
 417
 418         /*
 419          * Count matching files, transfer to a "clean" array, then feed
 420          * them to the output handler.
 421          */
 422
 423         for (mlen = i = 0; i < tree.len; i++)
 424                 if (tree.node[i].matched)
 425                         mlen++;
 426
 427         ress = mandoc_malloc(mlen * sizeof(struct res));
 428
 429         for (mlen = i = 0; i < tree.len; i++)
 430                 if (tree.node[i].matched)
 431                         memcpy(&ress[mlen++], &tree.node[i].res,
 432                                         sizeof(struct res));
 433
 434         (*res)(ress, mlen, arg);
 435         free(ress);
 436
 437         rc = 1;
 438 out:
 439         for (i = 0; i < tree.len; i++)
 440                 recfree(&tree.node[i]);
 441
 442         free(tree.node);
 443         mchars_free(mc);
 444         return(rc);
 445 }
 446
 447 static int
 448 single_search(struct rectree *tree, const struct opts *opts,
 449                 const struct expr *expr, size_t terms,
 450                 struct mchars *mc, int vol)
 451 {
 452         int              root, leaf, ch;
 453         DBT              key, val;
 454         DB              *btree, *idx;
 455         char            *buf;
 456         struct rec      *rs;
 457         struct rec       r;
 458         struct db_val    vb;
 459
 460         root    = -1;
 461         leaf    = -1;
 462         btree   = NULL;
 463         idx     = NULL;
 464         buf     = NULL;
 465         rs      = tree->node;
 466
 467         memset(&r, 0, sizeof(struct rec));
 468
 469         if (NULL == (btree = btree_open()))
 470                 return(1);
 471
 472         if (NULL == (idx = index_open())) {
 473                 (*btree->close)(btree);
 474                 return(1);
 475         }
 476
 477         while (0 == (ch = (*btree->seq)(btree, &key, &val, R_NEXT))) {
 478                 if ( ! btree_read(&key, &val, mc, &vb, &buf))
 479                         break;
 480
 481                 /*
 482                  * See if this keyword record matches any of the
 483                  * expressions we have stored.
 484                  */
 485                 if ( ! exprmark(expr, buf, vb.mask, NULL))
 486                         continue;
 487
 488                 /*
 489                  * O(log n) scan for prior records.  Since a record
 490                  * number is unbounded, this has decent performance over
 491                  * a complex hash function.
 492                  */
 493
 494                 for (leaf = root; leaf >= 0; )
 495                         if (vb.rec > rs[leaf].res.rec &&
 496                                         rs[leaf].rhs >= 0)
 497                                 leaf = rs[leaf].rhs;
 498                         else if (vb.rec < rs[leaf].res.rec &&
 499                                         rs[leaf].lhs >= 0)
 500                                 leaf = rs[leaf].lhs;
 501                         else
 502                                 break;
 503
 504                 /*
 505                  * If we find a record, see if it has already evaluated
 506                  * to true.  If it has, great, just keep going.  If not,
 507                  * try to evaluate it now and continue anyway.
 508                  */
 509
 510                 if (leaf >= 0 && rs[leaf].res.rec == vb.rec) {
 511                         if (0 == rs[leaf].matched)
 512                                 exprexec(expr, buf, vb.mask, &rs[leaf]);
 513                         continue;
 514                 }
 515
 516                 /*
 517                  * We have a new file to examine.
 518                  * Extract the manpage's metadata from the index
 519                  * database, then begin partial evaluation.
 520                  */
 521
 522                 key.data = &vb.rec;
 523                 key.size = sizeof(recno_t);
 524
 525                 if (0 != (*idx->get)(idx, &key, &val, 0))
 526                         break;
 527
 528                 r.lhs = r.rhs = -1;
 529                 if ( ! index_read(&key, &val, vol, mc, &r))
 530                         break;
 531
 532                 /* XXX: this should be elsewhere, I guess? */
 533
 534                 if (opts->cat && strcasecmp(opts->cat, r.res.cat))
 535                         continue;
 536                 if (opts->arch && strcasecmp(opts->arch, r.res.arch))
 537                         continue;
 538
 539                 tree->node = rs = mandoc_realloc
 540                         (rs, (tree->len + 1) * sizeof(struct rec));
 541
 542                 memcpy(&rs[tree->len], &r, sizeof(struct rec));
 543                 rs[tree->len].matches =
 544                         mandoc_calloc(terms, sizeof(int));
 545
 546                 exprexec(expr, buf, vb.mask, &rs[tree->len]);
 547
 548                 /* Append to our tree. */
 549
 550                 if (leaf >= 0) {
 551                         if (vb.rec > rs[leaf].res.rec)
 552                                 rs[leaf].rhs = tree->len;
 553                         else
 554                                 rs[leaf].lhs = tree->len;
 555                 } else
 556                         root = tree->len;
 557
 558                 memset(&r, 0, sizeof(struct rec));
 559                 tree->len++;
 560         }
 561
 562         (*btree->close)(btree);
 563         (*idx->close)(idx);
 564
 565         free(buf);
 566         return(1 == ch);
 567 }
 568
 569 static void
 570 recfree(struct rec *rec)
 571 {
 572
 573         free(rec->res.type);
 574         free(rec->res.file);
 575         free(rec->res.cat);
 576         free(rec->res.title);
 577         free(rec->res.arch);
 578         free(rec->res.desc);
 579
 580         free(rec->matches);
 581 }
 582
 583 /*
 584  * Compile a list of straight-up terms.
 585  * The arguments are re-written into ~[[:<:]]term[[:>:]], or "term"
 586  * surrounded by word boundaries, then pumped through exprterm().
 587  * Terms are case-insensitive.
 588  * This emulates whatis(1) behaviour.
 589  */
 590 struct expr *
 591 termcomp(int argc, char *argv[], size_t *tt)
 592 {
 593         char            *buf;
 594         int              pos;
 595         struct expr     *e, *next;
 596         size_t           sz;
 597
 598         buf = NULL;
 599         e = NULL;
 600         *tt = 0;
 601
 602         for (pos = argc - 1; pos >= 0; pos--) {
 603                 sz = strlen(argv[pos]) + 18;
 604                 buf = mandoc_realloc(buf, sz);
 605                 strlcpy(buf, "Nm~[[:<:]]", sz);
 606                 strlcat(buf, argv[pos], sz);
 607                 strlcat(buf, "[[:>:]]", sz);
 608                 if (NULL == (next = exprterm(buf, 0))) {
 609                         free(buf);
 610                         exprfree(e);
 611                         return(NULL);
 612                 }
 613                 next->next = e;
 614                 e = next;
 615                 (*tt)++;
 616         }
 617
 618         free(buf);
 619         return(e);
 620 }
 621
 622 /*
 623  * Compile a sequence of logical expressions.
 624  * See apropos.1 for a grammar of this sequence.
 625  */
 626 struct expr *
 627 exprcomp(int argc, char *argv[], size_t *tt)
 628 {
 629         int              pos, lvl;
 630         struct expr     *e;
 631
 632         pos = lvl = 0;
 633         *tt = 0;
 634
 635         e = exprexpr(argc, argv, &pos, &lvl, tt);
 636
 637         if (0 == lvl && pos >= argc)
 638                 return(e);
 639
 640         exprfree(e);
 641         return(NULL);
 642 }
 643
 644 /*
 645  * Compile an array of tokens into an expression.
 646  * An informal expression grammar is defined in apropos(1).
 647  * Return NULL if we fail doing so.  All memory will be cleaned up.
 648  * Return the root of the expression sequence if alright.
 649  */
 650 static struct expr *
 651 exprexpr(int argc, char *argv[], int *pos, int *lvl, size_t *tt)
 652 {
 653         struct expr     *e, *first, *next;
 654         int              log;
 655
 656         first = next = NULL;
 657
 658         for ( ; *pos < argc; (*pos)++) {
 659                 e = next;
 660
 661                 /*
 662                  * Close out a subexpression.
 663                  */
 664
 665                 if (NULL != e && 0 == strcmp(")", argv[*pos])) {
 666                         if (--(*lvl) < 0)
 667                                 goto err;
 668                         break;
 669                 }
 670
 671                 /*
 672                  * Small note: if we're just starting, don't let "-a"
 673                  * and "-o" be considered logical operators: they're
 674                  * just tokens unless pairwise joining, in which case we
 675                  * record their existence (or assume "OR").
 676                  */
 677                 log = 0;
 678
 679                 if (NULL != e && 0 == strcmp("-a", argv[*pos]))
 680                         log = 1;
 681                 else if (NULL != e && 0 == strcmp("-o", argv[*pos]))
 682                         log = 2;
 683
 684                 if (log > 0 && ++(*pos) >= argc)
 685                         goto err;
 686
 687                 /*
 688                  * Now we parse the term part.  This can begin with
 689                  * "-i", in which case the expression is case
 690                  * insensitive.
 691                  */
 692
 693                 if (0 == strcmp("(", argv[*pos])) {
 694                         ++(*pos);
 695                         ++(*lvl);
 696                         next = mandoc_calloc(1, sizeof(struct expr));
 697                         next->subexpr = exprexpr(argc, argv, pos, lvl, tt);
 698                         if (NULL == next->subexpr) {
 699                                 free(next);
 700                                 next = NULL;
 701                         }
 702                 } else if (0 == strcmp("-i", argv[*pos])) {
 703                         if (++(*pos) >= argc)
 704                                 goto err;
 705                         next = exprterm(argv[*pos], 0);
 706                 } else
 707                         next = exprterm(argv[*pos], 1);
 708
 709                 if (NULL == next)
 710                         goto err;
 711
 712                 next->and = log == 1;
 713                 next->index = (int)(*tt)++;
 714
 715                 /* Append to our chain of expressions. */
 716
 717                 if (NULL == first) {
 718                         assert(NULL == e);
 719                         first = next;
 720                 } else {
 721                         assert(NULL != e);
 722                         e->next = next;
 723                 }
 724         }
 725
 726         return(first);
 727 err:
 728         exprfree(first);
 729         return(NULL);
 730 }
 731
 732 /*
 733  * Parse a terminal expression with the grammar as defined in
 734  * apropos(1).
 735  * Return NULL if we fail the parse.
 736  */
 737 static struct expr *
 738 exprterm(char *buf, int cs)
 739 {
 740         struct expr      e;
 741         struct expr     *p;
 742         char            *key;
 743         int              i;
 744
 745         memset(&e, 0, sizeof(struct expr));
 746
 747         /* Choose regex or substring match. */
 748
 749         if (NULL == (e.v = strpbrk(buf, "=~"))) {
 750                 e.regex = 0;
 751                 e.v = buf;
 752         } else {
 753                 e.regex = '~' == *e.v;
 754                 *e.v++ = '\0';
 755         }
 756
 757         /* Determine the record types to search for. */
 758
 759         e.mask = 0;
 760         if (buf < e.v) {
 761                 while (NULL != (key = strsep(&buf, ","))) {
 762                         i = 0;
 763                         while (types[i].mask &&
 764                                         strcmp(types[i].name, key))
 765                                 i++;
 766                         e.mask |= types[i].mask;
 767                 }
 768         }
 769         if (0 == e.mask)
 770                 e.mask = TYPE_Nm | TYPE_Nd;
 771
 772         if (e.regex) {
 773                 i = REG_EXTENDED | REG_NOSUB | (cs ? 0 : REG_ICASE);
 774                 if (regcomp(&e.re, e.v, i))
 775                         return(NULL);
 776         }
 777
 778         e.v = mandoc_strdup(e.v);
 779
 780         p = mandoc_calloc(1, sizeof(struct expr));
 781         memcpy(p, &e, sizeof(struct expr));
 782         return(p);
 783 }
 784
 785 void
 786 exprfree(struct expr *p)
 787 {
 788         struct expr     *pp;
 789
 790         while (NULL != p) {
 791                 if (p->subexpr)
 792                         exprfree(p->subexpr);
 793                 if (p->regex)
 794                         regfree(&p->re);
 795                 free(p->v);
 796                 pp = p->next;
 797                 free(p);
 798                 p = pp;
 799         }
 800 }
 801
 802 static int
 803 exprmark(const struct expr *p, const char *cp,
 804                 uint64_t mask, int *ms)
 805 {
 806
 807         for ( ; p; p = p->next) {
 808                 if (p->subexpr) {
 809                         if (exprmark(p->subexpr, cp, mask, ms))
 810                                 return(1);
 811                         continue;
 812                 } else if ( ! (mask & p->mask))
 813                         continue;
 814
 815                 if (p->regex) {
 816                         if (regexec(&p->re, cp, 0, NULL, 0))
 817                                 continue;
 818                 } else if (NULL == strcasestr(cp, p->v))
 819                         continue;
 820
 821                 if (NULL == ms)
 822                         return(1);
 823                 else
 824                         ms[p->index] = 1;
 825         }
 826
 827         return(0);
 828 }
 829
 830 static int
 831 expreval(const struct expr *p, int *ms)
 832 {
 833         int              match;
 834
 835         /*
 836          * AND has precedence over OR.  Analysis is left-right, though
 837          * it doesn't matter because there are no side-effects.
 838          * Thus, step through pairwise ANDs and accumulate their Boolean
 839          * evaluation.  If we encounter a single true AND collection or
 840          * standalone term, the whole expression is true (by definition
 841          * of OR).
 842          */
 843
 844         for (match = 0; p && ! match; p = p->next) {
 845                 /* Evaluate a subexpression, if applicable. */
 846                 if (p->subexpr && ! ms[p->index])
 847                         ms[p->index] = expreval(p->subexpr, ms);
 848
 849                 match = ms[p->index];
 850                 for ( ; p->next && p->next->and; p = p->next) {
 851                         /* Evaluate a subexpression, if applicable. */
 852                         if (p->next->subexpr && ! ms[p->next->index])
 853                                 ms[p->next->index] =
 854                                         expreval(p->next->subexpr, ms);
 855                         match = match && ms[p->next->index];
 856                 }
 857         }
 858
 859         return(match);
 860 }
 861
 862 /*
 863  * First, update the array of terms for which this expression evaluates
 864  * to true.
 865  * Second, logically evaluate all terms over the updated array of truth
 866  * values.
 867  * If this evaluates to true, mark the expression as satisfied.
 868  */
 869 static void
 870 exprexec(const struct expr *e, const char *cp,
 871                 uint64_t mask, struct rec *r)
 872 {
 873
 874         assert(0 == r->matched);
 875         exprmark(e, cp, mask, r->matches);
 876         r->matched = expreval(e, r->matches);
 877 }