From f23f76a0eac229ba8c7c6bdbd9e0f474b32251d2 Mon Sep 17 00:00:00 2001
From: Kristaps Dzonsons <kristaps@bsd.lv>
Date: Tue, 12 Jul 2011 15:26:35 +0000
Subject: [PATCH] Re-ordered logic in makewhatis to iterate over the index file
 only once. This is much more efficient.

---
 makewhatis.c | 180 +++++++++++++++++++++++----------------------------
 1 file changed, 82 insertions(+), 98 deletions(-)

diff --git a/makewhatis.c b/makewhatis.c
index a8219407..4dc500ef 100644
--- a/makewhatis.c
+++ b/makewhatis.c
@@ -1,4 +1,4 @@
-/*	$Id: makewhatis.c,v 1.20 2011/07/12 10:03:02 kristaps Exp $ */
+/*	$Id: makewhatis.c,v 1.21 2011/07/12 15:26:35 kristaps Exp $ */
 /*
  * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
  *
@@ -42,6 +42,7 @@
 #define	MANDOC_IDX	 "mandoc.index"
 #define	MANDOC_BUFSZ	  BUFSIZ
 #define	MANDOC_FLAGS	  O_CREAT|O_TRUNC|O_RDWR
+#define	MANDOC_SLOP	  1024
 
 /* Bit-fields.  See makewhatis.1. */
 
@@ -93,8 +94,6 @@ static	void		  buf_appendb(struct buf *,
 static	void		  dbt_put(DB *, const char *, DBT *, DBT *);
 static	void		  hash_put(DB *, const struct buf *, int);
 static	void		  hash_reset(DB **);
-static	void		  op_delete(const char *, int, DB *, 
-				const char *, DB *, const char *);
 static	int		  pman_node(MAN_ARGS);
 static	void		  pmdoc_node(MDOC_ARGS);
 static	void		  pmdoc_An(MDOC_ARGS);
@@ -248,7 +247,7 @@ main(int argc, char *argv[])
 	struct mparse	*mp; /* parse sequence */
 	struct mdoc	*mdoc; /* resulting mdoc */
 	struct man	*man; /* resulting man */
-	enum op		 op;
+	enum op		 op; /* current operation */
 	char		*fn; /* current file being parsed */
 	const char	*msec, /* manual section */
 	      	 	*mtitle, /* manual title */
@@ -257,18 +256,19 @@ main(int argc, char *argv[])
 	char		 ibuf[MAXPATHLEN], /* index fname */
 			 fbuf[MAXPATHLEN],  /* btree fname */
 			 vbuf[8]; /* stringified record number */
-	int		 ch, seq, verb, i;
+	int		 ch, seq, sseq, verb, i;
 	DB		*idx, /* index database */
 			*db, /* keyword database */
 			*hash; /* temporary keyword hashtable */
 	DBT		 key, val;
-	enum mandoclevel ec;
+	enum mandoclevel ec; /* exit status */
 	size_t		 sv;
 	BTREEINFO	 info; /* btree configuration */
-	recno_t		 rec, /* current record number */
-			 maxrec;
-	recno_t		*recs;
-	size_t		 recsz;
+	recno_t		 rec,
+			 maxrec; /* supremum of all records */
+	recno_t		*recs; /* buffer of empty records */
+	size_t		 recsz, /* buffer size of recs */
+			 reccur; /* valid number of recs */
 	struct buf	 buf, /* keyword buffer */
 			 dbuf; /* description buffer */
 	extern int	 optind;
@@ -286,7 +286,7 @@ main(int argc, char *argv[])
 	mp = NULL;
 	hash = NULL;
 	recs = NULL;
-	recsz = 0;
+	recsz = reccur = 0;
 	maxrec = 0;
 	op = OP_NEW;
 	ec = MANDOCLEVEL_SYSERR;
@@ -359,47 +359,82 @@ main(int argc, char *argv[])
 
 	/*
 	 * If we're going to delete or update a database, remove the
-	 * entries now.  This doesn't actually remove them; it only sets
-	 * their record value lengths to zero.
+	 * entries now (both the index and all keywords pointing to it).
+	 * This doesn't actually remove them: it only sets their record
+	 * value lengths to zero.
+	 * While doing so, add the empty records to a list we'll access
+	 * later in re-adding entries to the database.
 	 */
 
-	if (OP_DELETE == op || OP_UPDATE == op)
-		for (i = 0; i < argc; i++)
-			op_delete(argv[i], verb, idx, ibuf, db, fbuf);
-
-	if (OP_DELETE == op) {
-		ec = MANDOCLEVEL_OK;
-		goto out;
-	}
-
-	/*
-	 * Compile a list of all available "empty" records to use.  This
-	 * keeps the size of the database small.
-	 */
-
-	if (OP_UPDATE == op) {
-		i = 0;
+	if (OP_DELETE == op || OP_UPDATE == op) {
 		seq = R_FIRST;
 		while (0 == (ch = (*idx->seq)(idx, &key, &val, seq))) {
 			seq = R_NEXT;
 			maxrec = *(recno_t *)key.data;
-			if (val.size > 0)
+			if (0 == val.size && OP_UPDATE == op) {
+				if (reccur >= recsz) {
+					recsz += MANDOC_SLOP;
+					recs = mandoc_realloc
+						(recs, recsz * sizeof(recno_t));
+				}
+				recs[(int)reccur] = maxrec;
+				reccur++;
 				continue;
-			if ((size_t)i >= recsz) {
-				recsz += 1024;
-				recs = mandoc_realloc
-					(recs, recsz * sizeof(recno_t));
 			}
-			recs[i++] = maxrec;
-		}
-		if (ch < 0) {
-			perror(ibuf);
-			exit((int)MANDOCLEVEL_SYSERR);
+
+			fn = (char *)val.data;
+			for (i = 0; i < argc; i++)
+				if (0 == strcmp(fn, argv[i]))
+					break;
+
+			if (i == argc)
+				continue;
+
+			sseq = R_FIRST;
+			while (0 == (ch = (*db->seq)(db, &key, &val, sseq))) {
+				sseq = R_NEXT;
+				assert(8 == val.size);
+				if (maxrec != *(recno_t *)(val.data + 4))
+					continue;
+				if (verb > 1)
+					printf("%s: Deleted keyword: %s\n", 
+						fn, (char *)key.data);
+				ch = (*db->del)(db, &key, R_CURSOR);
+				if (ch < 0)
+					break;
+			}
+			if (ch < 0) {
+				perror(fbuf);
+				exit((int)MANDOCLEVEL_SYSERR);
+			}
+
+			if (verb)
+				printf("%s: Deleted index\n", fn);
+
+			val.size = 0;
+			ch = (*idx->put)(idx, &key, &val, R_CURSOR);
+			if (ch < 0) {
+				perror(ibuf);
+				exit((int)MANDOCLEVEL_SYSERR);
+			}
+
+			if (OP_UPDATE == op) {
+				if (reccur >= recsz) {
+					recsz += MANDOC_SLOP;
+					recs = mandoc_realloc
+						(recs, recsz * sizeof(recno_t));
+				}
+				recs[(int)reccur] = maxrec;
+				reccur++;
+			}
 		}
-		recsz = (size_t)i;
 		maxrec++;
-		assert(recsz < maxrec);
-	} 
+	}
+
+	if (OP_DELETE == op) {
+		ec = MANDOCLEVEL_OK;
+		goto out;
+	}
 
 	/*
 	 * Add records to the database.
@@ -418,9 +453,9 @@ main(int argc, char *argv[])
 	for (rec = 0, i = 0; i < argc; i++) {
 		fn = argv[i];
 		if (OP_UPDATE == op) {
-			if (recsz > 0) {
-				--recsz;
-				rec = recs[(int)recsz];
+			if (reccur > 0) {
+				--reccur;
+				rec = recs[(int)reccur];
 			} else if (maxrec > 0) {
 				rec = maxrec;
 				maxrec = 0;
@@ -491,7 +526,7 @@ main(int argc, char *argv[])
 			val.data = vbuf;
 
 			if (verb > 1)
-				printf("Indexed: %s, %s, 0x%x\n", 
+				printf("%s: Added keyword: %s, 0x%x\n", 
 					fn, (char *)key.data, 
 					*(int *)val.data);
 			dbt_put(db, fbuf, &key, &val);
@@ -516,7 +551,7 @@ main(int argc, char *argv[])
 		val.size = dbuf.len;
 
 		if (verb > 0)
-			printf("Indexed: %s\n", fn);
+			printf("%s: Added index\n", fn);
 
 		dbt_put(idx, ibuf, &key, &val);
 	}
@@ -539,57 +574,6 @@ out:
 	return((int)ec);
 }
 
-static void
-op_delete(const char *fn, int verb, DB *idx, 
-		const char *ibuf, DB *db, const char *fbuf)
-{
-	int		 ch;
-	DBT		 key, val;
-	recno_t		 rec;
-	unsigned int	 seq, sseq;
-
-	seq = R_FIRST;
-	while (0 == (ch = (*idx->seq)(idx, &key, &val, seq))) {
-		seq = R_NEXT;
-		if (0 == val.size)
-			continue;
-		if (strcmp((char *)val.data, fn))
-			continue;
-
-		rec = *(recno_t *)key.data;
-
-		sseq = R_FIRST;
-		while (0 == (ch = (*db->seq)(db, &key, &val, sseq))) {
-			sseq = R_NEXT;
-			assert(8 == val.size);
-			if (rec != *(recno_t *)(val.data + 4))
-				continue;
-			if (verb > 1)
-				printf("Deleted: %s, %s\n", 
-					fn, (char *)key.data);
-			ch = (*db->del)(db, &key, R_CURSOR);
-			if (ch < 0)
-				break;
-		}
-		if (ch < 0) {
-			perror(fbuf);
-			exit((int)MANDOCLEVEL_SYSERR);
-		}
-
-		val.size = 0;
-		if (verb)
-			printf("Deleted: %s\n", fn);
-		ch = (*idx->put)
-			(idx, &key, &val, R_CURSOR);
-		if (ch < 0)
-			break;
-	}
-	if (ch < 0) {
-		perror(ibuf);
-		exit((int)MANDOCLEVEL_SYSERR);
-	}
-}
-
 /*
  * Grow the buffer (if necessary) and copy in a binary string.
  */
-- 
2.47.1