apropos_db.c

   1 /*      $Id: apropos_db.c,v 1.11 2011/11/23 09:55:28 kristaps Exp $ */
   2 /*
   3  * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
   4  * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
   5  *
   6  * Permission to use, copy, modify, and distribute this software for any
   7  * purpose with or without fee is hereby granted, provided that the above
   8  * copyright notice and this permission notice appear in all copies.
   9  *
  10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  17  */
  18 #include <assert.h>
  19 #include <fcntl.h>
  20 #include <regex.h>
  21 #include <stdarg.h>
  22 #include <stdint.h>
  23 #include <stdlib.h>
  24 #include <string.h>
  25 #include <unistd.h>
  26
  27 #ifdef __linux__
  28 # include <db_185.h>
  29 #else
  30 # include <db.h>
  31 #endif
  32
  33 #include "mandocdb.h"
  34 #include "apropos_db.h"
  35 #include "mandoc.h"
  36
  37 struct  rec {
  38         struct res       res; /* resulting record info */
  39         /*
  40          * Maintain a binary tree for checking the uniqueness of `rec'
  41          * when adding elements to the results array.
  42          * Since the results array is dynamic, use offset in the array
  43          * instead of a pointer to the structure.
  44          */
  45         int              lhs;
  46         int              rhs;
  47         int              matched; /* expression is true */
  48         int             *matches; /* partial truth evaluations */
  49 };
  50
  51 struct  expr {
  52         int              regex; /* is regex? */
  53         int              index; /* index in match array */
  54         uint64_t         mask; /* type-mask */
  55         int              cs; /* is case-sensitive? */
  56         int              and; /* is rhs of logical AND? */
  57         char            *v; /* search value */
  58         regex_t          re; /* compiled re, if regex */
  59         struct expr     *next; /* next in sequence */
  60         struct expr     *subexpr;
  61 };
  62
  63 struct  type {
  64         uint64_t         mask;
  65         const char      *name;
  66 };
  67
  68 struct  rectree {
  69         struct rec      *node; /* record array for dir tree */
  70         int              len; /* length of record array */
  71 };
  72
  73 static  const struct type types[] = {
  74         { TYPE_An, "An" },
  75         { TYPE_Ar, "Ar" },
  76         { TYPE_At, "At" },
  77         { TYPE_Bsx, "Bsx" },
  78         { TYPE_Bx, "Bx" },
  79         { TYPE_Cd, "Cd" },
  80         { TYPE_Cm, "Cm" },
  81         { TYPE_Dv, "Dv" },
  82         { TYPE_Dx, "Dx" },
  83         { TYPE_Em, "Em" },
  84         { TYPE_Er, "Er" },
  85         { TYPE_Ev, "Ev" },
  86         { TYPE_Fa, "Fa" },
  87         { TYPE_Fl, "Fl" },
  88         { TYPE_Fn, "Fn" },
  89         { TYPE_Fn, "Fo" },
  90         { TYPE_Ft, "Ft" },
  91         { TYPE_Fx, "Fx" },
  92         { TYPE_Ic, "Ic" },
  93         { TYPE_In, "In" },
  94         { TYPE_Lb, "Lb" },
  95         { TYPE_Li, "Li" },
  96         { TYPE_Lk, "Lk" },
  97         { TYPE_Ms, "Ms" },
  98         { TYPE_Mt, "Mt" },
  99         { TYPE_Nd, "Nd" },
 100         { TYPE_Nm, "Nm" },
 101         { TYPE_Nx, "Nx" },
 102         { TYPE_Ox, "Ox" },
 103         { TYPE_Pa, "Pa" },
 104         { TYPE_Rs, "Rs" },
 105         { TYPE_Sh, "Sh" },
 106         { TYPE_Ss, "Ss" },
 107         { TYPE_St, "St" },
 108         { TYPE_Sy, "Sy" },
 109         { TYPE_Tn, "Tn" },
 110         { TYPE_Va, "Va" },
 111         { TYPE_Va, "Vt" },
 112         { TYPE_Xr, "Xr" },
 113         { INT_MAX, "any" },
 114         { 0, NULL }
 115 };
 116
 117 static  DB      *btree_open(void);
 118 static  int      btree_read(const DBT *,
 119                         const struct mchars *, char **);
 120 static  int      expreval(const struct expr *, int *);
 121 static  void     exprexec(const struct expr *,
 122                         const char *, uint64_t, struct rec *);
 123 static  int      exprmark(const struct expr *,
 124                         const char *, uint64_t, int *);
 125 static  struct expr *exprexpr(int, char *[], int *, int *, size_t *);
 126 static  struct expr *exprterm(char *, int);
 127 static  DB      *index_open(void);
 128 static  int      index_read(const DBT *, const DBT *, int,
 129                         const struct mchars *, struct rec *);
 130 static  void     norm_string(const char *,
 131                         const struct mchars *, char **);
 132 static  size_t   norm_utf8(unsigned int, char[7]);
 133 static  void     recfree(struct rec *);
 134 static  int      single_search(struct rectree *, const struct opts *,
 135                         const struct expr *, size_t terms,
 136                         struct mchars *, int);
 137
 138 /*
 139  * Open the keyword mandoc-db database.
 140  */
 141 static DB *
 142 btree_open(void)
 143 {
 144         BTREEINFO        info;
 145         DB              *db;
 146
 147         memset(&info, 0, sizeof(BTREEINFO));
 148         info.flags = R_DUP;
 149
 150         db = dbopen(MANDOC_DB, O_RDONLY, 0, DB_BTREE, &info);
 151         if (NULL != db)
 152                 return(db);
 153
 154         return(NULL);
 155 }
 156
 157 /*
 158  * Read a keyword from the database and normalise it.
 159  * Return 0 if the database is insane, else 1.
 160  */
 161 static int
 162 btree_read(const DBT *v, const struct mchars *mc, char **buf)
 163 {
 164
 165         /* Sanity: are we nil-terminated? */
 166
 167         assert(v->size > 0);
 168
 169         if ('\0' != ((char *)v->data)[(int)v->size - 1])
 170                 return(0);
 171
 172         norm_string((char *)v->data, mc, buf);
 173         return(1);
 174 }
 175
 176 /*
 177  * Take a Unicode codepoint and produce its UTF-8 encoding.
 178  * This isn't the best way to do this, but it works.
 179  * The magic numbers are from the UTF-8 packaging.
 180  * They're not as scary as they seem: read the UTF-8 spec for details.
 181  */
 182 static size_t
 183 norm_utf8(unsigned int cp, char out[7])
 184 {
 185         size_t           rc;
 186
 187         rc = 0;
 188
 189         if (cp <= 0x0000007F) {
 190                 rc = 1;
 191                 out[0] = (char)cp;
 192         } else if (cp <= 0x000007FF) {
 193                 rc = 2;
 194                 out[0] = (cp >> 6  & 31) | 192;
 195                 out[1] = (cp       & 63) | 128;
 196         } else if (cp <= 0x0000FFFF) {
 197                 rc = 3;
 198                 out[0] = (cp >> 12 & 15) | 224;
 199                 out[1] = (cp >> 6  & 63) | 128;
 200                 out[2] = (cp       & 63) | 128;
 201         } else if (cp <= 0x001FFFFF) {
 202                 rc = 4;
 203                 out[0] = (cp >> 18 & 7) | 240;
 204                 out[1] = (cp >> 12 & 63) | 128;
 205                 out[2] = (cp >> 6  & 63) | 128;
 206                 out[3] = (cp       & 63) | 128;
 207         } else if (cp <= 0x03FFFFFF) {
 208                 rc = 5;
 209                 out[0] = (cp >> 24 & 3) | 248;
 210                 out[1] = (cp >> 18 & 63) | 128;
 211                 out[2] = (cp >> 12 & 63) | 128;
 212                 out[3] = (cp >> 6  & 63) | 128;
 213                 out[4] = (cp       & 63) | 128;
 214         } else if (cp <= 0x7FFFFFFF) {
 215                 rc = 6;
 216                 out[0] = (cp >> 30 & 1) | 252;
 217                 out[1] = (cp >> 24 & 63) | 128;
 218                 out[2] = (cp >> 18 & 63) | 128;
 219                 out[3] = (cp >> 12 & 63) | 128;
 220                 out[4] = (cp >> 6  & 63) | 128;
 221                 out[5] = (cp       & 63) | 128;
 222         } else
 223                 return(0);
 224
 225         out[rc] = '\0';
 226         return(rc);
 227 }
 228
 229 /*
 230  * Normalise strings from the index and database.
 231  * These strings are escaped as defined by mandoc_char(7) along with
 232  * other goop in mandoc.h (e.g., soft hyphens).
 233  * This function normalises these into a nice UTF-8 string.
 234  * Returns 0 if the database is fucked.
 235  */
 236 static void
 237 norm_string(const char *val, const struct mchars *mc, char **buf)
 238 {
 239         size_t            sz, bsz;
 240         char              utfbuf[7];
 241         const char       *seq, *cpp;
 242         int               len, u, pos;
 243         enum mandoc_esc   esc;
 244         static const char res[] = { '\\', '\t',
 245                                 ASCII_NBRSP, ASCII_HYPH, '\0' };
 246
 247         /* Pre-allocate by the length of the input */
 248
 249         bsz = strlen(val) + 1;
 250         *buf = mandoc_realloc(*buf, bsz);
 251         pos = 0;
 252
 253         while ('\0' != *val) {
 254                 /*
 255                  * Halt on the first escape sequence.
 256                  * This also halts on the end of string, in which case
 257                  * we just copy, fallthrough, and exit the loop.
 258                  */
 259                 if ((sz = strcspn(val, res)) > 0) {
 260                         memcpy(&(*buf)[pos], val, sz);
 261                         pos += (int)sz;
 262                         val += (int)sz;
 263                 }
 264
 265                 if (ASCII_HYPH == *val) {
 266                         (*buf)[pos++] = '-';
 267                         val++;
 268                         continue;
 269                 } else if ('\t' == *val || ASCII_NBRSP == *val) {
 270                         (*buf)[pos++] = ' ';
 271                         val++;
 272                         continue;
 273                 } else if ('\\' != *val)
 274                         break;
 275
 276                 /* Read past the slash. */
 277
 278                 val++;
 279                 u = 0;
 280
 281                 /*
 282                  * Parse the escape sequence and see if it's a
 283                  * predefined character or special character.
 284                  */
 285
 286                 esc = mandoc_escape(&val, &seq, &len);
 287                 if (ESCAPE_ERROR == esc)
 288                         break;
 289
 290                 /*
 291                  * XXX - this just does UTF-8, but we need to know
 292                  * beforehand whether we should do text substitution.
 293                  */
 294
 295                 switch (esc) {
 296                 case (ESCAPE_SPECIAL):
 297                         if (0 != (u = mchars_spec2cp(mc, seq, len)))
 298                                 break;
 299                         /* FALLTHROUGH */
 300                 default:
 301                         continue;
 302                 }
 303
 304                 /*
 305                  * If we have a Unicode codepoint, try to convert that
 306                  * to a UTF-8 byte string.
 307                  */
 308
 309                 cpp = utfbuf;
 310                 if (0 == (sz = norm_utf8(u, utfbuf)))
 311                         continue;
 312
 313                 /* Copy the rendered glyph into the stream. */
 314
 315                 sz = strlen(cpp);
 316                 bsz += sz;
 317
 318                 *buf = mandoc_realloc(*buf, bsz);
 319
 320                 memcpy(&(*buf)[pos], cpp, sz);
 321                 pos += (int)sz;
 322         }
 323
 324         (*buf)[pos] = '\0';
 325 }
 326
 327 /*
 328  * Open the filename-index mandoc-db database.
 329  * Returns NULL if opening failed.
 330  */
 331 static DB *
 332 index_open(void)
 333 {
 334         DB              *db;
 335
 336         db = dbopen(MANDOC_IDX, O_RDONLY, 0, DB_RECNO, NULL);
 337         if (NULL != db)
 338                 return(db);
 339
 340         return(NULL);
 341 }
 342
 343 /*
 344  * Safely unpack from an index file record into the structure.
 345  * Returns 1 if an entry was unpacked, 0 if the database is insane.
 346  */
 347 static int
 348 index_read(const DBT *key, const DBT *val, int index,
 349                 const struct mchars *mc, struct rec *rec)
 350 {
 351         size_t           left;
 352         char            *np, *cp;
 353
 354 #define INDEX_BREAD(_dst) \
 355         do { \
 356                 if (NULL == (np = memchr(cp, '\0', left))) \
 357                         return(0); \
 358                 norm_string(cp, mc, &(_dst)); \
 359                 left -= (np - cp) + 1; \
 360                 cp = np + 1; \
 361         } while (/* CONSTCOND */ 0)
 362
 363         left = val->size;
 364         cp = (char *)val->data;
 365
 366         rec->res.rec = *(recno_t *)key->data;
 367         rec->res.volume = index;
 368
 369         INDEX_BREAD(rec->res.file);
 370         INDEX_BREAD(rec->res.cat);
 371         INDEX_BREAD(rec->res.title);
 372         INDEX_BREAD(rec->res.arch);
 373         INDEX_BREAD(rec->res.desc);
 374         return(1);
 375 }
 376
 377 /*
 378  * Search mandocdb databases in paths for expression "expr".
 379  * Filter out by "opts".
 380  * Call "res" with the results, which may be zero.
 381  * Return 0 if there was a database error, else return 1.
 382  */
 383 int
 384 apropos_search(int pathsz, char **paths, const struct opts *opts,
 385                 const struct expr *expr, size_t terms, void *arg,
 386                 void (*res)(struct res *, size_t, void *))
 387 {
 388         struct rectree   tree;
 389         struct mchars   *mc;
 390         struct res      *ress;
 391         int              i, mlen, rc;
 392
 393         memset(&tree, 0, sizeof(struct rectree));
 394
 395         rc = 0;
 396         mc = mchars_alloc();
 397
 398         /*
 399          * Main loop.  Change into the directory containing manpage
 400          * databases.  Run our expession over each database in the set.
 401          */
 402
 403         for (i = 0; i < pathsz; i++) {
 404                 if (chdir(paths[i]))
 405                         continue;
 406                 if ( ! single_search(&tree, opts, expr, terms, mc, i))
 407                         goto out;
 408         }
 409
 410         /*
 411          * Count matching files, transfer to a "clean" array, then feed
 412          * them to the output handler.
 413          */
 414
 415         for (mlen = i = 0; i < tree.len; i++)
 416                 if (tree.node[i].matched)
 417                         mlen++;
 418
 419         ress = mandoc_malloc(mlen * sizeof(struct res));
 420
 421         for (mlen = i = 0; i < tree.len; i++)
 422                 if (tree.node[i].matched)
 423                         memcpy(&ress[mlen++], &tree.node[i].res,
 424                                         sizeof(struct res));
 425
 426         (*res)(ress, mlen, arg);
 427         free(ress);
 428
 429         rc = 1;
 430 out:
 431         for (i = 0; i < tree.len; i++)
 432                 recfree(&tree.node[i]);
 433
 434         free(tree.node);
 435         mchars_free(mc);
 436         return(rc);
 437 }
 438
 439 static int
 440 single_search(struct rectree *tree, const struct opts *opts,
 441                 const struct expr *expr, size_t terms,
 442                 struct mchars *mc, int vol)
 443 {
 444         int              root, leaf, ch;
 445         uint64_t         mask;
 446         DBT              key, val;
 447         DB              *btree, *idx;
 448         char            *buf;
 449         recno_t          rec;
 450         struct rec      *rs;
 451         struct rec       r;
 452         struct db_val   *vbuf;
 453
 454         root    = -1;
 455         leaf    = -1;
 456         btree   = NULL;
 457         idx     = NULL;
 458         buf     = NULL;
 459         rs      = tree->node;
 460
 461         memset(&r, 0, sizeof(struct rec));
 462
 463         if (NULL == (btree = btree_open()))
 464                 return(1);
 465
 466         if (NULL == (idx = index_open())) {
 467                 (*btree->close)(btree);
 468                 return(1);
 469         }
 470
 471         while (0 == (ch = (*btree->seq)(btree, &key, &val, R_NEXT))) {
 472                 if (key.size < 2 || sizeof(struct db_val) != val.size)
 473                         break;
 474                 if ( ! btree_read(&key, mc, &buf))
 475                         break;
 476
 477                 vbuf = val.data;
 478                 rec = vbuf->rec;
 479                 mask = vbuf->mask;
 480
 481                 /*
 482                  * See if this keyword record matches any of the
 483                  * expressions we have stored.
 484                  */
 485                 if ( ! exprmark(expr, buf, mask, NULL))
 486                         continue;
 487
 488                 /*
 489                  * O(log n) scan for prior records.  Since a record
 490                  * number is unbounded, this has decent performance over
 491                  * a complex hash function.
 492                  */
 493
 494                 for (leaf = root; leaf >= 0; )
 495                         if (rec > rs[leaf].res.rec &&
 496                                         rs[leaf].rhs >= 0)
 497                                 leaf = rs[leaf].rhs;
 498                         else if (rec < rs[leaf].res.rec &&
 499                                         rs[leaf].lhs >= 0)
 500                                 leaf = rs[leaf].lhs;
 501                         else
 502                                 break;
 503
 504                 /*
 505                  * If we find a record, see if it has already evaluated
 506                  * to true.  If it has, great, just keep going.  If not,
 507                  * try to evaluate it now and continue anyway.
 508                  */
 509
 510                 if (leaf >= 0 && rs[leaf].res.rec == rec) {
 511                         if (0 == rs[leaf].matched)
 512                                 exprexec(expr, buf, mask, &rs[leaf]);
 513                         continue;
 514                 }
 515
 516                 /*
 517                  * We have a new file to examine.
 518                  * Extract the manpage's metadata from the index
 519                  * database, then begin partial evaluation.
 520                  */
 521
 522                 key.data = &rec;
 523                 key.size = sizeof(recno_t);
 524
 525                 if (0 != (*idx->get)(idx, &key, &val, 0))
 526                         break;
 527
 528                 r.lhs = r.rhs = -1;
 529                 if ( ! index_read(&key, &val, vol, mc, &r))
 530                         break;
 531
 532                 /* XXX: this should be elsewhere, I guess? */
 533
 534                 if (opts->cat && strcasecmp(opts->cat, r.res.cat))
 535                         continue;
 536                 if (opts->arch && strcasecmp(opts->arch, r.res.arch))
 537                         continue;
 538
 539                 tree->node = rs = mandoc_realloc
 540                         (rs, (tree->len + 1) * sizeof(struct rec));
 541
 542                 memcpy(&rs[tree->len], &r, sizeof(struct rec));
 543                 rs[tree->len].matches =
 544                         mandoc_calloc(terms, sizeof(int));
 545
 546                 exprexec(expr, buf, mask, &rs[tree->len]);
 547                 /* Append to our tree. */
 548
 549                 if (leaf >= 0) {
 550                         if (rec > rs[leaf].res.rec)
 551                                 rs[leaf].rhs = tree->len;
 552                         else
 553                                 rs[leaf].lhs = tree->len;
 554                 } else
 555                         root = tree->len;
 556
 557                 memset(&r, 0, sizeof(struct rec));
 558                 tree->len++;
 559         }
 560
 561         (*btree->close)(btree);
 562         (*idx->close)(idx);
 563
 564         free(buf);
 565         return(1 == ch);
 566 }
 567
 568 static void
 569 recfree(struct rec *rec)
 570 {
 571
 572         free(rec->res.file);
 573         free(rec->res.cat);
 574         free(rec->res.title);
 575         free(rec->res.arch);
 576         free(rec->res.desc);
 577
 578         free(rec->matches);
 579 }
 580
 581 struct expr *
 582 exprcomp(int argc, char *argv[], size_t *tt)
 583 {
 584         int              pos, lvl;
 585         struct expr     *e;
 586
 587         pos = lvl = 0;
 588         *tt = 0;
 589
 590         e = exprexpr(argc, argv, &pos, &lvl, tt);
 591
 592         if (0 == lvl && pos >= argc)
 593                 return(e);
 594
 595         exprfree(e);
 596         return(NULL);
 597 }
 598
 599 /*
 600  * Compile an array of tokens into an expression.
 601  * An informal expression grammar is defined in apropos(1).
 602  * Return NULL if we fail doing so.  All memory will be cleaned up.
 603  * Return the root of the expression sequence if alright.
 604  */
 605 static struct expr *
 606 exprexpr(int argc, char *argv[], int *pos, int *lvl, size_t *tt)
 607 {
 608         struct expr     *e, *first, *next;
 609         int              log;
 610
 611         first = next = NULL;
 612
 613         for ( ; *pos < argc; (*pos)++) {
 614                 e = next;
 615
 616                 /*
 617                  * Close out a subexpression.
 618                  */
 619
 620                 if (NULL != e && 0 == strcmp(")", argv[*pos])) {
 621                         if (--(*lvl) < 0)
 622                                 goto err;
 623                         break;
 624                 }
 625
 626                 /*
 627                  * Small note: if we're just starting, don't let "-a"
 628                  * and "-o" be considered logical operators: they're
 629                  * just tokens unless pairwise joining, in which case we
 630                  * record their existence (or assume "OR").
 631                  */
 632                 log = 0;
 633
 634                 if (NULL != e && 0 == strcmp("-a", argv[*pos]))
 635                         log = 1;
 636                 else if (NULL != e && 0 == strcmp("-o", argv[*pos]))
 637                         log = 2;
 638
 639                 if (log > 0 && ++(*pos) >= argc)
 640                         goto err;
 641
 642                 /*
 643                  * Now we parse the term part.  This can begin with
 644                  * "-i", in which case the expression is case
 645                  * insensitive.
 646                  */
 647
 648                 if (0 == strcmp("(", argv[*pos])) {
 649                         ++(*pos);
 650                         ++(*lvl);
 651                         next = mandoc_calloc(1, sizeof(struct expr));
 652                         next->cs = 1;
 653                         next->subexpr = exprexpr(argc, argv, pos, lvl, tt);
 654                         if (NULL == next->subexpr) {
 655                                 free(next);
 656                                 next = NULL;
 657                         }
 658                 } else if (0 == strcmp("-i", argv[*pos])) {
 659                         if (++(*pos) >= argc)
 660                                 goto err;
 661                         next = exprterm(argv[*pos], 0);
 662                 } else
 663                         next = exprterm(argv[*pos], 1);
 664
 665                 if (NULL == next)
 666                         goto err;
 667
 668                 next->and = log == 1;
 669                 next->index = (int)(*tt)++;
 670
 671                 /* Append to our chain of expressions. */
 672
 673                 if (NULL == first) {
 674                         assert(NULL == e);
 675                         first = next;
 676                 } else {
 677                         assert(NULL != e);
 678                         e->next = next;
 679                 }
 680         }
 681
 682         return(first);
 683 err:
 684         exprfree(first);
 685         return(NULL);
 686 }
 687
 688 /*
 689  * Parse a terminal expression with the grammar as defined in
 690  * apropos(1).
 691  * Return NULL if we fail the parse.
 692  */
 693 static struct expr *
 694 exprterm(char *buf, int cs)
 695 {
 696         struct expr      e;
 697         struct expr     *p;
 698         char            *key;
 699         int              i;
 700
 701         memset(&e, 0, sizeof(struct expr));
 702
 703         e.cs = cs;
 704
 705         /* Choose regex or substring match. */
 706
 707         if (NULL == (e.v = strpbrk(buf, "=~"))) {
 708                 e.regex = 0;
 709                 e.v = buf;
 710         } else {
 711                 e.regex = '~' == *e.v;
 712                 *e.v++ = '\0';
 713         }
 714
 715         /* Determine the record types to search for. */
 716
 717         e.mask = 0;
 718         if (buf < e.v) {
 719                 while (NULL != (key = strsep(&buf, ","))) {
 720                         i = 0;
 721                         while (types[i].mask &&
 722                                         strcmp(types[i].name, key))
 723                                 i++;
 724                         e.mask |= types[i].mask;
 725                 }
 726         }
 727         if (0 == e.mask)
 728                 e.mask = TYPE_Nm | TYPE_Nd;
 729
 730         if (e.regex) {
 731                 i = REG_EXTENDED | REG_NOSUB | cs ? 0 : REG_ICASE;
 732                 if (regcomp(&e.re, e.v, i))
 733                         return(NULL);
 734         }
 735
 736         e.v = mandoc_strdup(e.v);
 737
 738         p = mandoc_calloc(1, sizeof(struct expr));
 739         memcpy(p, &e, sizeof(struct expr));
 740         return(p);
 741 }
 742
 743 void
 744 exprfree(struct expr *p)
 745 {
 746         struct expr     *pp;
 747
 748         while (NULL != p) {
 749                 if (p->subexpr)
 750                         exprfree(p->subexpr);
 751                 if (p->regex)
 752                         regfree(&p->re);
 753                 free(p->v);
 754                 pp = p->next;
 755                 free(p);
 756                 p = pp;
 757         }
 758 }
 759
 760 static int
 761 exprmark(const struct expr *p, const char *cp,
 762                 uint64_t mask, int *ms)
 763 {
 764
 765         for ( ; p; p = p->next) {
 766                 if (p->subexpr) {
 767                         if (exprmark(p->subexpr, cp, mask, ms))
 768                                 return(1);
 769                         continue;
 770                 } else if ( ! (mask & p->mask))
 771                         continue;
 772
 773                 if (p->regex) {
 774                         if (regexec(&p->re, cp, 0, NULL, 0))
 775                                 continue;
 776                 } else if (p->cs) {
 777                         if (NULL == strstr(cp, p->v))
 778                                 continue;
 779                 } else {
 780                         if (NULL == strcasestr(cp, p->v))
 781                                 continue;
 782                 }
 783
 784                 if (NULL == ms)
 785                         return(1);
 786                 else
 787                         ms[p->index] = 1;
 788         }
 789
 790         return(0);
 791 }
 792
 793 static int
 794 expreval(const struct expr *p, int *ms)
 795 {
 796         int              match;
 797
 798         /*
 799          * AND has precedence over OR.  Analysis is left-right, though
 800          * it doesn't matter because there are no side-effects.
 801          * Thus, step through pairwise ANDs and accumulate their Boolean
 802          * evaluation.  If we encounter a single true AND collection or
 803          * standalone term, the whole expression is true (by definition
 804          * of OR).
 805          */
 806
 807         for (match = 0; p && ! match; p = p->next) {
 808                 /* Evaluate a subexpression, if applicable. */
 809                 if (p->subexpr && ! ms[p->index])
 810                         ms[p->index] = expreval(p->subexpr, ms);
 811
 812                 match = ms[p->index];
 813                 for ( ; p->next && p->next->and; p = p->next) {
 814                         /* Evaluate a subexpression, if applicable. */
 815                         if (p->next->subexpr && ! ms[p->next->index])
 816                                 ms[p->next->index] =
 817                                         expreval(p->next->subexpr, ms);
 818                         match = match && ms[p->next->index];
 819                 }
 820         }
 821
 822         return(match);
 823 }
 824
 825 /*
 826  * First, update the array of terms for which this expression evaluates
 827  * to true.
 828  * Second, logically evaluate all terms over the updated array of truth
 829  * values.
 830  * If this evaluates to true, mark the expression as satisfied.
 831  */
 832 static void
 833 exprexec(const struct expr *p, const char *cp,
 834                 uint64_t mask, struct rec *r)
 835 {
 836
 837         assert(0 == r->matched);
 838         exprmark(p, cp, mask, r->matches);
 839         r->matched = expreval(p, r->matches);
 840 }