From d53a7449dca8ec558ca0b6c322c6d3351afe1c8b Mon Sep 17 00:00:00 2001 From: Ingo Schwarze Date: Fri, 6 Nov 2015 21:19:09 +0000 Subject: In ./configure, select a RE syntax for word boundaries supported by libc; issue reported by Svyatoslav Mishyn, Peter Bray, and Daniel Levai. --- TODO | 8 +------- configure | 6 ++++++ mansearch.c | 9 ++++++++- test-rewb-bsd.c | 26 ++++++++++++++++++++++++++ test-rewb-sysv.c | 26 ++++++++++++++++++++++++++ 5 files changed, 67 insertions(+), 8 deletions(-) create mode 100644 test-rewb-bsd.c create mode 100644 test-rewb-sysv.c diff --git a/TODO b/TODO index 7be67398..f851e9fd 100644 --- a/TODO +++ b/TODO @@ -1,6 +1,6 @@ ************************************************************************ * Official mandoc TODO. -* $Id: TODO,v 1.214 2015/09/29 18:45:36 schwarze Exp $ +* $Id: TODO,v 1.215 2015/11/06 21:19:09 schwarze Exp $ ************************************************************************ Many issues are annotated for difficulty as follows: @@ -527,12 +527,6 @@ are mere guesses, and some may be wrong. * portability ************************************************************************ -- word boundaries in regular expressions for whatis(1) - set up config tests to use [[:<:]], \<, or nothing - Svyatoslav Mishyn Wed, 17 Dec 2014 11:07:10 +0200 - reminded by Peter Bray Fri, 03 Apr 2015 23:02:16 +1100 - loc * exist * algo * size * imp * - - systems having UTF-8 but not en_US.UTF-8 call locale(1) from ./configure, select a UTF-8-locale, and use that for test-wchar.c and term_ascii.c diff --git a/configure b/configure index 28435810..882b8f4b 100755 --- a/configure +++ b/configure @@ -53,6 +53,8 @@ HAVE_MMAP= HAVE_PLEDGE= HAVE_PROGNAME= HAVE_REALLOCARRAY= +HAVE_REWB_BSD= +HAVE_REWB_SYSV= HAVE_STRCASESTR= HAVE_STRINGLIST= HAVE_STRLCAT= @@ -184,6 +186,8 @@ runtest mmap MMAP || true runtest pledge PLEDGE || true runtest progname PROGNAME || true runtest reallocarray REALLOCARRAY || true +runtest rewb-bsd REWB_BSD || true +runtest rewb-sysv REWB_SYSV || true runtest strcasestr STRCASESTR || true runtest stringlist STRINGLIST || true runtest strlcat STRLCAT || true @@ -306,6 +310,8 @@ cat << __HEREDOC__ #define HAVE_PLEDGE ${HAVE_PLEDGE} #define HAVE_PROGNAME ${HAVE_PROGNAME} #define HAVE_REALLOCARRAY ${HAVE_REALLOCARRAY} +#define HAVE_REWB_BSD ${HAVE_REWB_BSD} +#define HAVE_REWB_SYSV ${HAVE_REWB_SYSV} #define HAVE_STRCASESTR ${HAVE_STRCASESTR} #define HAVE_STRINGLIST ${HAVE_STRINGLIST} #define HAVE_STRLCAT ${HAVE_STRLCAT} diff --git a/mansearch.c b/mansearch.c index 41fdb499..756a73f9 100644 --- a/mansearch.c +++ b/mansearch.c @@ -1,4 +1,4 @@ -/* $Id: mansearch.c,v 1.60 2015/10/13 15:53:05 schwarze Exp $ */ +/* $Id: mansearch.c,v 1.61 2015/11/06 21:19:09 schwarze Exp $ */ /* * Copyright (c) 2012 Kristaps Dzonsons * Copyright (c) 2013, 2014, 2015 Ingo Schwarze @@ -766,7 +766,14 @@ exprterm(const struct mansearch *search, char *buf, int cs) if (search->argmode == ARG_WORD) { e->bits = TYPE_Nm; e->substr = NULL; +#if HAVE_REWB_BSD mandoc_asprintf(&val, "[[:<:]]%s[[:>:]]", buf); +#elif HAVE_REWB_SYSV + mandoc_asprintf(&val, "\\<%s\\>", buf); +#else + mandoc_asprintf(&val, + "(^|[^a-zA-Z01-9_])%s([^a-zA-Z01-9_]|$)", buf); +#endif cs = 0; } else if ((val = strpbrk(buf, "=~")) == NULL) { e->bits = TYPE_Nm | TYPE_Nd; diff --git a/test-rewb-bsd.c b/test-rewb-bsd.c new file mode 100644 index 00000000..88d3d357 --- /dev/null +++ b/test-rewb-bsd.c @@ -0,0 +1,26 @@ +#include +#include + +int +main(void) +{ + regex_t re; + + if (regcomp(&re, "[[:<:]]word[[:>:]]", REG_EXTENDED | REG_NOSUB)) + return 1; + if (regexec(&re, "the word is here", 0, NULL, 0)) + return 2; + if (regexec(&re, "same word", 0, NULL, 0)) + return 3; + if (regexec(&re, "word again", 0, NULL, 0)) + return 4; + if (regexec(&re, "word", 0, NULL, 0)) + return 5; + if (regexec(&re, "wordy", 0, NULL, 0) != REG_NOMATCH) + return 6; + if (regexec(&re, "sword", 0, NULL, 0) != REG_NOMATCH) + return 7; + if (regexec(&re, "reworded", 0, NULL, 0) != REG_NOMATCH) + return 8; + return 0; +} diff --git a/test-rewb-sysv.c b/test-rewb-sysv.c new file mode 100644 index 00000000..cb35c544 --- /dev/null +++ b/test-rewb-sysv.c @@ -0,0 +1,26 @@ +#include +#include + +int +main(void) +{ + regex_t re; + + if (regcomp(&re, "\\", REG_EXTENDED | REG_NOSUB)) + return 1; + if (regexec(&re, "the word is here", 0, NULL, 0)) + return 2; + if (regexec(&re, "same word", 0, NULL, 0)) + return 3; + if (regexec(&re, "word again", 0, NULL, 0)) + return 4; + if (regexec(&re, "word", 0, NULL, 0)) + return 5; + if (regexec(&re, "wordy", 0, NULL, 0) != REG_NOMATCH) + return 6; + if (regexec(&re, "sword", 0, NULL, 0) != REG_NOMATCH) + return 7; + if (regexec(&re, "reworded", 0, NULL, 0) != REG_NOMATCH) + return 8; + return 0; +} -- cgit v1.2.3