X-Git-Url: https://git.cameronkatri.com/mandoc.git/blobdiff_plain/31fa98f26903a8f1fcb3f43804b00c176eaabf78..b463a4303970dddae777871d1306b7f625dd12b8:/mandoc.c?ds=inline

diff --git a/mandoc.c b/mandoc.c
index 37216da6..56a187de 100644
--- a/mandoc.c
+++ b/mandoc.c
@@ -1,6 +1,6 @@
-/*	$Id: mandoc.c,v 1.13 2010/05/14 14:09:13 kristaps Exp $ */
+/*	$Id: mandoc.c,v 1.35 2010/09/04 20:18:53 kristaps Exp $ */
 /*
- * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
+ * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -27,165 +27,201 @@
 #include <string.h>
 #include <time.h>
 
+#include "mandoc.h"
 #include "libmandoc.h"
 
-static int	 a2time(time_t *, const char *, const char *);
+static	int	 a2time(time_t *, const char *, const char *);
 
 
 int
-mandoc_special(const char *p)
+mandoc_special(char *p)
 {
-	int		 terminator;	/* Terminator for \s. */
-	int		 lim;		/* Limit for N in \s. */
-	int		 c, i;
+	int		 len, i;
+	char		 term;
+	char		*sv;
 	
-	if ('\\' != *p++)
-		return(0);
+	len = 0;
+	term = '\0';
+	sv = p;
+
+	assert('\\' == *p);
+	p++;
 
-	switch (*p) {
-	case ('\''):
+	switch (*p++) {
+#if 0
+	case ('Z'):
+		/* FALLTHROUGH */
+	case ('X'):
+		/* FALLTHROUGH */
+	case ('x'):
+		/* FALLTHROUGH */
+	case ('S'):
 		/* FALLTHROUGH */
-	case ('`'):
+	case ('R'):
 		/* FALLTHROUGH */
-	case ('q'):
+	case ('N'):
 		/* FALLTHROUGH */
-	case ('-'):
+	case ('l'):
 		/* FALLTHROUGH */
-	case ('~'):
+	case ('L'):
 		/* FALLTHROUGH */
-	case ('^'):
+	case ('H'):
 		/* FALLTHROUGH */
-	case ('%'):
+	case ('h'):
 		/* FALLTHROUGH */
-	case ('0'):
+	case ('D'):
 		/* FALLTHROUGH */
-	case (' '):
+	case ('C'):
 		/* FALLTHROUGH */
-	case ('|'):
+	case ('b'):
 		/* FALLTHROUGH */
-	case ('&'):
+	case ('B'):
 		/* FALLTHROUGH */
-	case ('.'):
+	case ('a'):
 		/* FALLTHROUGH */
-	case (':'):
+	case ('A'):
+		if (*p++ != '\'')
+			return(0);
+		term = '\'';
+		break;
+#endif
+	case ('h'):
+		/* FALLTHROUGH */
+	case ('v'):
 		/* FALLTHROUGH */
-	case ('c'):
-		return(2);
-	case ('e'):
-		return(2);
 	case ('s'):
-		if ('\0' == *++p)
-			return(2);
-
-		c = 2;
-		terminator = 0;
-		lim = 1;
-
-		if (*p == '\'') {
-			lim = 0;
-			terminator = 1;
-			++p;
-			++c;
-		} else if (*p == '[') {
-			lim = 0;
-			terminator = 2;
-			++p;
-			++c;
-		} else if (*p == '(') {
-			lim = 2;
-			terminator = 3;
-			++p;
-			++c;
-		}
+		if (ASCII_HYPH == *p)
+			*p = '-';
 
-		if (*p == '+' || *p == '-') {
-			++p;
-			++c;
+		i = 0;
+		if ('+' == *p || '-' == *p) {
+			p++;
+			i = 1;
 		}
 
-		if (*p == '\'') {
-			if (terminator)
-				return(0);
-			lim = 0;
-			terminator = 1;
-			++p;
-			++c;
-		} else if (*p == '[') {
-			if (terminator)
-				return(0);
-			lim = 0;
-			terminator = 2;
-			++p;
-			++c;
-		} else if (*p == '(') {
-			if (terminator)
-				return(0);
-			lim = 2;
-			terminator = 3;
-			++p;
-			++c;
+		switch (*p++) {
+		case ('('):
+			len = 2;
+			break;
+		case ('['):
+			term = ']';
+			break;
+		case ('\''):
+			term = '\'';
+			break;
+		case ('0'):
+			i = 1;
+			/* FALLTHROUGH */
+		default:
+			len = 1;
+			p--;
+			break;
 		}
 
-		/* TODO: needs to handle floating point. */
-
-		if ( ! isdigit((u_char)*p))
-			return(0);
-
-		for (i = 0; isdigit((u_char)*p); i++) {
-			if (lim && i >= lim)
+		if (ASCII_HYPH == *p)
+			*p = '-';
+		if ('+' == *p || '-' == *p) {
+			if (i)
+				return(0);
+			p++;
+		} 
+		
+		/* Handle embedded numerical subexp or escape. */
+
+		if ('(' == *p) {
+			while (*p && ')' != *p)
+				if ('\\' == *p++) {
+					i = mandoc_special(--p);
+					if (0 == i)
+						return(0);
+					p += i;
+				}
+
+			if (')' == *p++)
 				break;
-			++p;
-			++c;
-		}
 
-		if (terminator && terminator < 3) {
-			if (1 == terminator && *p != '\'')
-				return(0);
-			if (2 == terminator && *p != ']')
+			return(0);
+		} else if ('\\' == *p) {
+			if (0 == (i = mandoc_special(p)))
 				return(0);
-			++p;
-			++c;
+			p += i;
 		}
 
-		return(c);
+		break;
+#if 0
+	case ('Y'):
+		/* FALLTHROUGH */
+	case ('V'):
+		/* FALLTHROUGH */
+	case ('$'):
+		/* FALLTHROUGH */
+	case ('n'):
+		/* FALLTHROUGH */
+#endif
+	case ('k'):
+		/* FALLTHROUGH */
+	case ('M'):
+		/* FALLTHROUGH */
+	case ('m'):
+		/* FALLTHROUGH */
 	case ('f'):
 		/* FALLTHROUGH */
 	case ('F'):
 		/* FALLTHROUGH */
 	case ('*'):
-		if (0 == *++p || ! isgraph((u_char)*p))
-			return(0);
-		switch (*p) {
+		switch (*p++) {
 		case ('('):
-			if (0 == *++p || ! isgraph((u_char)*p))
-				return(0);
-			return(4);
+			len = 2;
+			break;
 		case ('['):
-			for (c = 3, p++; *p && ']' != *p; p++, c++)
-				if ( ! isgraph((u_char)*p))
-					break;
-			return(*p == ']' ? c : 0);
+			term = ']';
+			break;
 		default:
+			len = 1;
+			p--;
 			break;
 		}
-		return(3);
+		break;
 	case ('('):
-		if (0 == *++p || ! isgraph((u_char)*p))
-			return(0);
-		if (0 == *++p || ! isgraph((u_char)*p))
-			return(0);
-		return(4);
+		len = 2;
+		break;
 	case ('['):
+		term = ']';
 		break;
+	case ('z'):
+		len = 1;
+		if ('\\' == *p) {
+			if (0 == (i = mandoc_special(p)))
+				return(0);
+			p += i;
+			return(*p ? (int)(p - sv) : 0);
+		}
+		break;
+	case ('o'):
+		/* FALLTHROUGH */
+	case ('w'):
+		if ('\'' == *p++) {
+			term = '\'';
+			break;
+		}
+		/* FALLTHROUGH */
 	default:
-		return(0);
+		len = 1;
+		p--;
+		break;
 	}
 
-	for (c = 3, p++; *p && ']' != *p; p++, c++)
-		if ( ! isgraph((u_char)*p))
-			break;
+	if (term) {
+		for ( ; *p && term != *p; p++)
+			if (ASCII_HYPH == *p)
+				*p = '-';
+		return(*p ? (int)(p - sv) : 0);
+	}
 
-	return(*p == ']' ? c : 0);
+	for (i = 0; *p && i < len; i++, p++)
+		if (ASCII_HYPH == *p)
+			*p = '-';
+	return(i == len ? (int)(p - sv) : 0);
 }
 
 
@@ -197,7 +233,7 @@ mandoc_calloc(size_t num, size_t size)
 	ptr = calloc(num, size);
 	if (NULL == ptr) {
 		perror(NULL);
-		exit(EXIT_FAILURE);
+		exit((int)MANDOCLEVEL_SYSERR);
 	}
 
 	return(ptr);
@@ -212,7 +248,7 @@ mandoc_malloc(size_t size)
 	ptr = malloc(size);
 	if (NULL == ptr) {
 		perror(NULL);
-		exit(EXIT_FAILURE);
+		exit((int)MANDOCLEVEL_SYSERR);
 	}
 
 	return(ptr);
@@ -226,7 +262,7 @@ mandoc_realloc(void *ptr, size_t size)
 	ptr = realloc(ptr, size);
 	if (NULL == ptr) {
 		perror(NULL);
-		exit(EXIT_FAILURE);
+		exit((int)MANDOCLEVEL_SYSERR);
 	}
 
 	return(ptr);
@@ -241,7 +277,7 @@ mandoc_strdup(const char *ptr)
 	p = strdup(ptr);
 	if (NULL == p) {
 		perror(NULL);
-		exit(EXIT_FAILURE);
+		exit((int)MANDOCLEVEL_SYSERR);
 	}
 
 	return(p);
@@ -302,25 +338,72 @@ mandoc_a2time(int flags, const char *p)
 
 
 int
-mandoc_eos(const char *p, size_t sz)
+mandoc_eos(const char *p, size_t sz, int enclosed)
 {
+	const char *q;
+	int found;
 
 	if (0 == sz)
 		return(0);
 
-	switch (p[(int)sz - 1]) {
-	case ('.'):
-		/* Escaped periods. */
-		if (sz > 1 && '\\' == p[(int)sz - 2])
-			return(0);
-		/* FALLTHROUGH */
-	case ('!'):
-		/* FALLTHROUGH */
-	case ('?'):
-		break;
-	default:
-		return(0);
+	/*
+	 * End-of-sentence recognition must include situations where
+	 * some symbols, such as `)', allow prior EOS punctuation to
+	 * propogate outward.
+	 */
+
+	found = 0;
+	for (q = p + (int)sz - 1; q >= p; q--) {
+		switch (*q) {
+		case ('\"'):
+			/* FALLTHROUGH */
+		case ('\''):
+			/* FALLTHROUGH */
+		case (']'):
+			/* FALLTHROUGH */
+		case (')'):
+			if (0 == found)
+				enclosed = 1;
+			break;
+		case ('.'):
+			/* FALLTHROUGH */
+		case ('!'):
+			/* FALLTHROUGH */
+		case ('?'):
+			found = 1;
+			break;
+		default:
+			return(found && (!enclosed || isalnum((unsigned char)*q)));
+		}
 	}
 
+	return(found && !enclosed);
+}
+
+
+int
+mandoc_hyph(const char *start, const char *c)
+{
+
+	/*
+	 * Choose whether to break at a hyphenated character.  We only
+	 * do this if it's free-standing within a word.
+	 */
+
+	/* Skip first/last character of buffer. */
+	if (c == start || '\0' == *(c + 1))
+		return(0);
+	/* Skip first/last character of word. */
+	if ('\t' == *(c + 1) || '\t' == *(c - 1))
+		return(0);
+	if (' ' == *(c + 1) || ' ' == *(c - 1))
+		return(0);
+	/* Skip double invocations. */
+	if ('-' == *(c + 1) || '-' == *(c - 1))
+		return(0);
+	/* Skip escapes. */
+	if ('\\' == *(c - 1))
+		return(0);
+
 	return(1);
 }