apropos_db.c

   1 /*      $Id: apropos_db.c,v 1.9 2011/11/20 15:45:37 kristaps Exp $ */
   2 /*
   3  * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
   4  * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
   5  *
   6  * Permission to use, copy, modify, and distribute this software for any
   7  * purpose with or without fee is hereby granted, provided that the above
   8  * copyright notice and this permission notice appear in all copies.
   9  *
  10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  17  */
  18 #include <assert.h>
  19 #include <fcntl.h>
  20 #include <regex.h>
  21 #include <stdarg.h>
  22 #include <stdint.h>
  23 #include <stdlib.h>
  24 #include <string.h>
  25 #include <unistd.h>
  26
  27 #ifdef __linux__
  28 # include <db_185.h>
  29 #else
  30 # include <db.h>
  31 #endif
  32
  33 #include "mandocdb.h"
  34 #include "apropos_db.h"
  35 #include "mandoc.h"
  36
  37 struct  rec {
  38         struct res       res; /* resulting record info */
  39         /*
  40          * Maintain a binary tree for checking the uniqueness of `rec'
  41          * when adding elements to the results array.
  42          * Since the results array is dynamic, use offset in the array
  43          * instead of a pointer to the structure.
  44          */
  45         int              lhs;
  46         int              rhs;
  47         int              matched; /* expression is true */
  48         int             *matches; /* partial truth evaluations */
  49 };
  50
  51 struct  expr {
  52         int              regex; /* is regex? */
  53         int              index; /* index in match array */
  54         uint64_t         mask; /* type-mask */
  55         int              cs; /* is case-sensitive? */
  56         int              and; /* is rhs of logical AND? */
  57         char            *v; /* search value */
  58         regex_t          re; /* compiled re, if regex */
  59         struct expr     *next; /* next in sequence */
  60         struct expr     *subexpr;
  61 };
  62
  63 struct  type {
  64         uint64_t         mask;
  65         const char      *name;
  66 };
  67
  68 struct  rectree {
  69         struct rec      *node; /* record array for dir tree */
  70         int              len; /* length of record array */
  71 };
  72
  73 static  const struct type types[] = {
  74         { TYPE_An, "An" },
  75         { TYPE_Ar, "Ar" },
  76         { TYPE_At, "At" },
  77         { TYPE_Bsx, "Bsx" },
  78         { TYPE_Bx, "Bx" },
  79         { TYPE_Cd, "Cd" },
  80         { TYPE_Cm, "Cm" },
  81         { TYPE_Dv, "Dv" },
  82         { TYPE_Dx, "Dx" },
  83         { TYPE_Em, "Em" },
  84         { TYPE_Er, "Er" },
  85         { TYPE_Ev, "Ev" },
  86         { TYPE_Fa, "Fa" },
  87         { TYPE_Fl, "Fl" },
  88         { TYPE_Fn, "Fn" },
  89         { TYPE_Fn, "Fo" },
  90         { TYPE_Ft, "Ft" },
  91         { TYPE_Fx, "Fx" },
  92         { TYPE_Ic, "Ic" },
  93         { TYPE_In, "In" },
  94         { TYPE_Lb, "Lb" },
  95         { TYPE_Li, "Li" },
  96         { TYPE_Lk, "Lk" },
  97         { TYPE_Ms, "Ms" },
  98         { TYPE_Mt, "Mt" },
  99         { TYPE_Nd, "Nd" },
 100         { TYPE_Nm, "Nm" },
 101         { TYPE_Nx, "Nx" },
 102         { TYPE_Ox, "Ox" },
 103         { TYPE_Pa, "Pa" },
 104         { TYPE_Rs, "Rs" },
 105         { TYPE_Sh, "Sh" },
 106         { TYPE_Ss, "Ss" },
 107         { TYPE_St, "St" },
 108         { TYPE_Sy, "Sy" },
 109         { TYPE_Tn, "Tn" },
 110         { TYPE_Va, "Va" },
 111         { TYPE_Va, "Vt" },
 112         { TYPE_Xr, "Xr" },
 113         { INT_MAX, "any" },
 114         { 0, NULL }
 115 };
 116
 117 static  DB      *btree_open(void);
 118 static  int      btree_read(const DBT *,
 119                         const struct mchars *, char **);
 120 static  int      expreval(const struct expr *, int *);
 121 static  void     exprexec(const struct expr *,
 122                         const char *, uint64_t, struct rec *);
 123 static  int      exprmark(const struct expr *,
 124                         const char *, uint64_t, int *);
 125 static  struct expr *exprexpr(int, char *[], int *, int *, size_t *);
 126 static  struct expr *exprterm(char *, int);
 127 static  DB      *index_open(void);
 128 static  int      index_read(const DBT *, const DBT *,
 129                         const struct mchars *, struct rec *);
 130 static  void     norm_string(const char *,
 131                         const struct mchars *, char **);
 132 static  size_t   norm_utf8(unsigned int, char[7]);
 133 static  void     recfree(struct rec *);
 134 static  int      single_search(struct rectree *, const struct opts *,
 135                         const struct expr *, size_t terms,
 136                         struct mchars *);
 137
 138 /*
 139  * Open the keyword mandoc-db database.
 140  */
 141 static DB *
 142 btree_open(void)
 143 {
 144         BTREEINFO        info;
 145         DB              *db;
 146
 147         memset(&info, 0, sizeof(BTREEINFO));
 148         info.flags = R_DUP;
 149
 150         db = dbopen(MANDOC_DB, O_RDONLY, 0, DB_BTREE, &info);
 151         if (NULL != db)
 152                 return(db);
 153
 154         return(NULL);
 155 }
 156
 157 /*
 158  * Read a keyword from the database and normalise it.
 159  * Return 0 if the database is insane, else 1.
 160  */
 161 static int
 162 btree_read(const DBT *v, const struct mchars *mc, char **buf)
 163 {
 164
 165         /* Sanity: are we nil-terminated? */
 166
 167         assert(v->size > 0);
 168
 169         if ('\0' != ((char *)v->data)[(int)v->size - 1])
 170                 return(0);
 171
 172         norm_string((char *)v->data, mc, buf);
 173         return(1);
 174 }
 175
 176 /*
 177  * Take a Unicode codepoint and produce its UTF-8 encoding.
 178  * This isn't the best way to do this, but it works.
 179  * The magic numbers are from the UTF-8 packaging.
 180  * They're not as scary as they seem: read the UTF-8 spec for details.
 181  */
 182 static size_t
 183 norm_utf8(unsigned int cp, char out[7])
 184 {
 185         size_t           rc;
 186
 187         rc = 0;
 188
 189         if (cp <= 0x0000007F) {
 190                 rc = 1;
 191                 out[0] = (char)cp;
 192         } else if (cp <= 0x000007FF) {
 193                 rc = 2;
 194                 out[0] = (cp >> 6  & 31) | 192;
 195                 out[1] = (cp       & 63) | 128;
 196         } else if (cp <= 0x0000FFFF) {
 197                 rc = 3;
 198                 out[0] = (cp >> 12 & 15) | 224;
 199                 out[1] = (cp >> 6  & 63) | 128;
 200                 out[2] = (cp       & 63) | 128;
 201         } else if (cp <= 0x001FFFFF) {
 202                 rc = 4;
 203                 out[0] = (cp >> 18 & 7) | 240;
 204                 out[1] = (cp >> 12 & 63) | 128;
 205                 out[2] = (cp >> 6  & 63) | 128;
 206                 out[3] = (cp       & 63) | 128;
 207         } else if (cp <= 0x03FFFFFF) {
 208                 rc = 5;
 209                 out[0] = (cp >> 24 & 3) | 248;
 210                 out[1] = (cp >> 18 & 63) | 128;
 211                 out[2] = (cp >> 12 & 63) | 128;
 212                 out[3] = (cp >> 6  & 63) | 128;
 213                 out[4] = (cp       & 63) | 128;
 214         } else if (cp <= 0x7FFFFFFF) {
 215                 rc = 6;
 216                 out[0] = (cp >> 30 & 1) | 252;
 217                 out[1] = (cp >> 24 & 63) | 128;
 218                 out[2] = (cp >> 18 & 63) | 128;
 219                 out[3] = (cp >> 12 & 63) | 128;
 220                 out[4] = (cp >> 6  & 63) | 128;
 221                 out[5] = (cp       & 63) | 128;
 222         } else
 223                 return(0);
 224
 225         out[rc] = '\0';
 226         return(rc);
 227 }
 228
 229 /*
 230  * Normalise strings from the index and database.
 231  * These strings are escaped as defined by mandoc_char(7) along with
 232  * other goop in mandoc.h (e.g., soft hyphens).
 233  * This function normalises these into a nice UTF-8 string.
 234  * Returns 0 if the database is fucked.
 235  */
 236 static void
 237 norm_string(const char *val, const struct mchars *mc, char **buf)
 238 {
 239         size_t            sz, bsz;
 240         char              utfbuf[7];
 241         const char       *seq, *cpp;
 242         int               len, u, pos;
 243         enum mandoc_esc   esc;
 244         static const char res[] = { '\\', '\t',
 245                                 ASCII_NBRSP, ASCII_HYPH, '\0' };
 246
 247         /* Pre-allocate by the length of the input */
 248
 249         bsz = strlen(val) + 1;
 250         *buf = mandoc_realloc(*buf, bsz);
 251         pos = 0;
 252
 253         while ('\0' != *val) {
 254                 /*
 255                  * Halt on the first escape sequence.
 256                  * This also halts on the end of string, in which case
 257                  * we just copy, fallthrough, and exit the loop.
 258                  */
 259                 if ((sz = strcspn(val, res)) > 0) {
 260                         memcpy(&(*buf)[pos], val, sz);
 261                         pos += (int)sz;
 262                         val += (int)sz;
 263                 }
 264
 265                 if (ASCII_HYPH == *val) {
 266                         (*buf)[pos++] = '-';
 267                         val++;
 268                         continue;
 269                 } else if ('\t' == *val || ASCII_NBRSP == *val) {
 270                         (*buf)[pos++] = ' ';
 271                         val++;
 272                         continue;
 273                 } else if ('\\' != *val)
 274                         break;
 275
 276                 /* Read past the slash. */
 277
 278                 val++;
 279                 u = 0;
 280
 281                 /*
 282                  * Parse the escape sequence and see if it's a
 283                  * predefined character or special character.
 284                  */
 285
 286                 esc = mandoc_escape(&val, &seq, &len);
 287                 if (ESCAPE_ERROR == esc)
 288                         break;
 289
 290                 /*
 291                  * XXX - this just does UTF-8, but we need to know
 292                  * beforehand whether we should do text substitution.
 293                  */
 294
 295                 switch (esc) {
 296                 case (ESCAPE_SPECIAL):
 297                         if (0 != (u = mchars_spec2cp(mc, seq, len)))
 298                                 break;
 299                         /* FALLTHROUGH */
 300                 default:
 301                         continue;
 302                 }
 303
 304                 /*
 305                  * If we have a Unicode codepoint, try to convert that
 306                  * to a UTF-8 byte string.
 307                  */
 308
 309                 cpp = utfbuf;
 310                 if (0 == (sz = norm_utf8(u, utfbuf)))
 311                         continue;
 312
 313                 /* Copy the rendered glyph into the stream. */
 314
 315                 sz = strlen(cpp);
 316                 bsz += sz;
 317
 318                 *buf = mandoc_realloc(*buf, bsz);
 319
 320                 memcpy(&(*buf)[pos], cpp, sz);
 321                 pos += (int)sz;
 322         }
 323
 324         (*buf)[pos] = '\0';
 325 }
 326
 327 /*
 328  * Open the filename-index mandoc-db database.
 329  * Returns NULL if opening failed.
 330  */
 331 static DB *
 332 index_open(void)
 333 {
 334         DB              *db;
 335
 336         db = dbopen(MANDOC_IDX, O_RDONLY, 0, DB_RECNO, NULL);
 337         if (NULL != db)
 338                 return(db);
 339
 340         return(NULL);
 341 }
 342
 343 /*
 344  * Safely unpack from an index file record into the structure.
 345  * Returns 1 if an entry was unpacked, 0 if the database is insane.
 346  */
 347 static int
 348 index_read(const DBT *key, const DBT *val,
 349                 const struct mchars *mc, struct rec *rec)
 350 {
 351         size_t           left;
 352         char            *np, *cp;
 353
 354 #define INDEX_BREAD(_dst) \
 355         do { \
 356                 if (NULL == (np = memchr(cp, '\0', left))) \
 357                         return(0); \
 358                 norm_string(cp, mc, &(_dst)); \
 359                 left -= (np - cp) + 1; \
 360                 cp = np + 1; \
 361         } while (/* CONSTCOND */ 0)
 362
 363         left = val->size;
 364         cp = (char *)val->data;
 365
 366         rec->res.rec = *(recno_t *)key->data;
 367
 368         INDEX_BREAD(rec->res.file);
 369         INDEX_BREAD(rec->res.cat);
 370         INDEX_BREAD(rec->res.title);
 371         INDEX_BREAD(rec->res.arch);
 372         INDEX_BREAD(rec->res.desc);
 373         return(1);
 374 }
 375
 376 /*
 377  * Search mandocdb databases in argv (size argc) for the expression
 378  * "expr".
 379  * Filter out by "opts".
 380  * Call "res" with the results, which may be zero.
 381  * Return 0 if there was a database error, else return 1.
 382  */
 383 int
 384 apropos_search(int argc, char *argv[], const struct opts *opts,
 385                 const struct expr *expr, size_t terms, void *arg,
 386                 void (*res)(struct res *, size_t, void *))
 387 {
 388         struct rectree   tree;
 389         struct mchars   *mc;
 390         struct res      *ress;
 391         int              i, mlen, rc;
 392
 393         memset(&tree, 0, sizeof(struct rectree));
 394
 395         mc = mchars_alloc();
 396
 397         for (rc = 1, i = 0; rc && i < argc; i++) {
 398                 /* FIXME: ugly warning: we shouldn't get here! */
 399                 if (chdir(argv[i]))
 400                         continue;
 401                 rc = single_search(&tree, opts, expr, terms, mc);
 402                 /* FIXME: warn and continue... ? */
 403         }
 404
 405         /*
 406          * Count the matching files
 407          * and feed them to the output handler.
 408          */
 409
 410         for (mlen = i = 0; i < tree.len; i++)
 411                 if (tree.node[i].matched)
 412                         mlen++;
 413
 414         ress = mandoc_malloc(mlen * sizeof(struct res));
 415
 416         for (mlen = i = 0; i < tree.len; i++)
 417                 if (tree.node[i].matched)
 418                         memcpy(&ress[mlen++], &tree.node[i].res,
 419                                         sizeof(struct res));
 420
 421         (*res)(ress, mlen, arg);
 422         free(ress);
 423
 424         for (i = 0; i < tree.len; i++)
 425                 recfree(&tree.node[i]);
 426
 427         free(tree.node);
 428         mchars_free(mc);
 429         return(rc);
 430 }
 431
 432 static int
 433 single_search(struct rectree *tree, const struct opts *opts,
 434                 const struct expr *expr, size_t terms,
 435                 struct mchars *mc)
 436 {
 437         int              root, leaf, ch;
 438         uint64_t         mask;
 439         DBT              key, val;
 440         DB              *btree, *idx;
 441         char            *buf;
 442         recno_t          rec;
 443         struct rec      *rs;
 444         struct rec       r;
 445         struct db_val   *vbuf;
 446
 447         root    = -1;
 448         leaf    = -1;
 449         btree   = NULL;
 450         idx     = NULL;
 451         buf     = NULL;
 452         rs      = tree->node;
 453
 454         memset(&r, 0, sizeof(struct rec));
 455
 456         if (NULL == (btree = btree_open()))
 457                 return(0);
 458
 459         if (NULL == (idx = index_open())) {
 460                 (*btree->close)(btree);
 461                 return(0);
 462         }
 463
 464         while (0 == (ch = (*btree->seq)(btree, &key, &val, R_NEXT))) {
 465                 if (key.size < 2 || sizeof(struct db_val) != val.size)
 466                         break;
 467                 if ( ! btree_read(&key, mc, &buf))
 468                         break;
 469
 470                 vbuf = val.data;
 471                 rec = vbuf->rec;
 472                 mask = vbuf->mask;
 473
 474                 /*
 475                  * See if this keyword record matches any of the
 476                  * expressions we have stored.
 477                  */
 478                 if ( ! exprmark(expr, buf, mask, NULL))
 479                         continue;
 480
 481                 /*
 482                  * O(log n) scan for prior records.  Since a record
 483                  * number is unbounded, this has decent performance over
 484                  * a complex hash function.
 485                  */
 486
 487                 for (leaf = root; leaf >= 0; )
 488                         if (rec > rs[leaf].res.rec &&
 489                                         rs[leaf].rhs >= 0)
 490                                 leaf = rs[leaf].rhs;
 491                         else if (rec < rs[leaf].res.rec &&
 492                                         rs[leaf].lhs >= 0)
 493                                 leaf = rs[leaf].lhs;
 494                         else
 495                                 break;
 496
 497                 /*
 498                  * If we find a record, see if it has already evaluated
 499                  * to true.  If it has, great, just keep going.  If not,
 500                  * try to evaluate it now and continue anyway.
 501                  */
 502
 503                 if (leaf >= 0 && rs[leaf].res.rec == rec) {
 504                         if (0 == rs[leaf].matched)
 505                                 exprexec(expr, buf, mask, &rs[leaf]);
 506                         continue;
 507                 }
 508
 509                 /*
 510                  * We have a new file to examine.
 511                  * Extract the manpage's metadata from the index
 512                  * database, then begin partial evaluation.
 513                  */
 514
 515                 key.data = &rec;
 516                 key.size = sizeof(recno_t);
 517
 518                 if (0 != (*idx->get)(idx, &key, &val, 0))
 519                         break;
 520
 521                 r.lhs = r.rhs = -1;
 522                 if ( ! index_read(&key, &val, mc, &r))
 523                         break;
 524
 525                 /* XXX: this should be elsewhere, I guess? */
 526
 527                 if (opts->cat && strcasecmp(opts->cat, r.res.cat))
 528                         continue;
 529                 if (opts->arch && strcasecmp(opts->arch, r.res.arch))
 530                         continue;
 531
 532                 tree->node = rs = mandoc_realloc
 533                         (rs, (tree->len + 1) * sizeof(struct rec));
 534
 535                 memcpy(&rs[tree->len], &r, sizeof(struct rec));
 536                 rs[tree->len].matches =
 537                         mandoc_calloc(terms, sizeof(int));
 538
 539                 exprexec(expr, buf, mask, &rs[tree->len]);
 540                 /* Append to our tree. */
 541
 542                 if (leaf >= 0) {
 543                         if (rec > rs[leaf].res.rec)
 544                                 rs[leaf].rhs = tree->len;
 545                         else
 546                                 rs[leaf].lhs = tree->len;
 547                 } else
 548                         root = tree->len;
 549
 550                 memset(&r, 0, sizeof(struct rec));
 551                 tree->len++;
 552         }
 553
 554         (*btree->close)(btree);
 555         (*idx->close)(idx);
 556
 557         free(buf);
 558         return(1 == ch);
 559 }
 560
 561 static void
 562 recfree(struct rec *rec)
 563 {
 564
 565         free(rec->res.file);
 566         free(rec->res.cat);
 567         free(rec->res.title);
 568         free(rec->res.arch);
 569         free(rec->res.desc);
 570
 571         free(rec->matches);
 572 }
 573
 574 struct expr *
 575 exprcomp(int argc, char *argv[], size_t *tt)
 576 {
 577         int              pos, lvl;
 578         struct expr     *e;
 579
 580         pos = lvl = 0;
 581         *tt = 0;
 582
 583         e = exprexpr(argc, argv, &pos, &lvl, tt);
 584
 585         if (0 == lvl && pos >= argc)
 586                 return(e);
 587
 588         exprfree(e);
 589         return(NULL);
 590 }
 591
 592 /*
 593  * Compile an array of tokens into an expression.
 594  * An informal expression grammar is defined in apropos(1).
 595  * Return NULL if we fail doing so.  All memory will be cleaned up.
 596  * Return the root of the expression sequence if alright.
 597  */
 598 static struct expr *
 599 exprexpr(int argc, char *argv[], int *pos, int *lvl, size_t *tt)
 600 {
 601         struct expr     *e, *first, *next;
 602         int              log;
 603
 604         first = next = NULL;
 605
 606         for ( ; *pos < argc; (*pos)++) {
 607                 e = next;
 608
 609                 /*
 610                  * Close out a subexpression.
 611                  */
 612
 613                 if (NULL != e && 0 == strcmp(")", argv[*pos])) {
 614                         if (--(*lvl) < 0)
 615                                 goto err;
 616                         break;
 617                 }
 618
 619                 /*
 620                  * Small note: if we're just starting, don't let "-a"
 621                  * and "-o" be considered logical operators: they're
 622                  * just tokens unless pairwise joining, in which case we
 623                  * record their existence (or assume "OR").
 624                  */
 625                 log = 0;
 626
 627                 if (NULL != e && 0 == strcmp("-a", argv[*pos]))
 628                         log = 1;
 629                 else if (NULL != e && 0 == strcmp("-o", argv[*pos]))
 630                         log = 2;
 631
 632                 if (log > 0 && ++(*pos) >= argc)
 633                         goto err;
 634
 635                 /*
 636                  * Now we parse the term part.  This can begin with
 637                  * "-i", in which case the expression is case
 638                  * insensitive.
 639                  */
 640
 641                 if (0 == strcmp("(", argv[*pos])) {
 642                         ++(*pos);
 643                         ++(*lvl);
 644                         next = mandoc_calloc(1, sizeof(struct expr));
 645                         next->cs = 1;
 646                         next->subexpr = exprexpr(argc, argv, pos, lvl, tt);
 647                         if (NULL == next->subexpr) {
 648                                 free(next);
 649                                 next = NULL;
 650                         }
 651                 } else if (0 == strcmp("-i", argv[*pos])) {
 652                         if (++(*pos) >= argc)
 653                                 goto err;
 654                         next = exprterm(argv[*pos], 0);
 655                 } else
 656                         next = exprterm(argv[*pos], 1);
 657
 658                 if (NULL == next)
 659                         goto err;
 660
 661                 next->and = log == 1;
 662                 next->index = (int)(*tt)++;
 663
 664                 /* Append to our chain of expressions. */
 665
 666                 if (NULL == first) {
 667                         assert(NULL == e);
 668                         first = next;
 669                 } else {
 670                         assert(NULL != e);
 671                         e->next = next;
 672                 }
 673         }
 674
 675         return(first);
 676 err:
 677         exprfree(first);
 678         return(NULL);
 679 }
 680
 681 /*
 682  * Parse a terminal expression with the grammar as defined in
 683  * apropos(1).
 684  * Return NULL if we fail the parse.
 685  */
 686 static struct expr *
 687 exprterm(char *buf, int cs)
 688 {
 689         struct expr      e;
 690         struct expr     *p;
 691         char            *key;
 692         int              i;
 693
 694         memset(&e, 0, sizeof(struct expr));
 695
 696         e.cs = cs;
 697
 698         /* Choose regex or substring match. */
 699
 700         if (NULL == (e.v = strpbrk(buf, "=~"))) {
 701                 e.regex = 0;
 702                 e.v = buf;
 703         } else {
 704                 e.regex = '~' == *e.v;
 705                 *e.v++ = '\0';
 706         }
 707
 708         /* Determine the record types to search for. */
 709
 710         e.mask = 0;
 711         if (buf < e.v) {
 712                 while (NULL != (key = strsep(&buf, ","))) {
 713                         i = 0;
 714                         while (types[i].mask &&
 715                                         strcmp(types[i].name, key))
 716                                 i++;
 717                         e.mask |= types[i].mask;
 718                 }
 719         }
 720         if (0 == e.mask)
 721                 e.mask = TYPE_Nm | TYPE_Nd;
 722
 723         if (e.regex) {
 724                 i = REG_EXTENDED | REG_NOSUB | cs ? 0 : REG_ICASE;
 725                 if (regcomp(&e.re, e.v, i))
 726                         return(NULL);
 727         }
 728
 729         e.v = mandoc_strdup(e.v);
 730
 731         p = mandoc_calloc(1, sizeof(struct expr));
 732         memcpy(p, &e, sizeof(struct expr));
 733         return(p);
 734 }
 735
 736 void
 737 exprfree(struct expr *p)
 738 {
 739         struct expr     *pp;
 740
 741         while (NULL != p) {
 742                 if (p->subexpr)
 743                         exprfree(p->subexpr);
 744                 if (p->regex)
 745                         regfree(&p->re);
 746                 free(p->v);
 747                 pp = p->next;
 748                 free(p);
 749                 p = pp;
 750         }
 751 }
 752
 753 static int
 754 exprmark(const struct expr *p, const char *cp,
 755                 uint64_t mask, int *ms)
 756 {
 757
 758         for ( ; p; p = p->next) {
 759                 if (p->subexpr) {
 760                         if (exprmark(p->subexpr, cp, mask, ms))
 761                                 return(1);
 762                         continue;
 763                 } else if ( ! (mask & p->mask))
 764                         continue;
 765
 766                 if (p->regex) {
 767                         if (regexec(&p->re, cp, 0, NULL, 0))
 768                                 continue;
 769                 } else if (p->cs) {
 770                         if (NULL == strstr(cp, p->v))
 771                                 continue;
 772                 } else {
 773                         if (NULL == strcasestr(cp, p->v))
 774                                 continue;
 775                 }
 776
 777                 if (NULL == ms)
 778                         return(1);
 779                 else
 780                         ms[p->index] = 1;
 781         }
 782
 783         return(0);
 784 }
 785
 786 static int
 787 expreval(const struct expr *p, int *ms)
 788 {
 789         int              match;
 790
 791         /*
 792          * AND has precedence over OR.  Analysis is left-right, though
 793          * it doesn't matter because there are no side-effects.
 794          * Thus, step through pairwise ANDs and accumulate their Boolean
 795          * evaluation.  If we encounter a single true AND collection or
 796          * standalone term, the whole expression is true (by definition
 797          * of OR).
 798          */
 799
 800         for (match = 0; p && ! match; p = p->next) {
 801                 /* Evaluate a subexpression, if applicable. */
 802                 if (p->subexpr && ! ms[p->index])
 803                         ms[p->index] = expreval(p->subexpr, ms);
 804
 805                 match = ms[p->index];
 806                 for ( ; p->next && p->next->and; p = p->next) {
 807                         /* Evaluate a subexpression, if applicable. */
 808                         if (p->next->subexpr && ! ms[p->next->index])
 809                                 ms[p->next->index] =
 810                                         expreval(p->next->subexpr, ms);
 811                         match = match && ms[p->next->index];
 812                 }
 813         }
 814
 815         return(match);
 816 }
 817
 818 /*
 819  * First, update the array of terms for which this expression evaluates
 820  * to true.
 821  * Second, logically evaluate all terms over the updated array of truth
 822  * values.
 823  * If this evaluates to true, mark the expression as satisfied.
 824  */
 825 static void
 826 exprexec(const struct expr *p, const char *cp,
 827                 uint64_t mask, struct rec *r)
 828 {
 829
 830         assert(0 == r->matched);
 831         exprmark(p, cp, mask, r->matches);
 832         r->matched = expreval(p, r->matches);
 833 }