From 5fd83771641d15c418f747bd343ba6738d3875f7 Mon Sep 17 00:00:00 2001 From: Cameron Katri Date: Sun, 9 May 2021 14:20:58 -0400 Subject: Import macOS userland adv_cmds-176 basic_cmds-55 bootstrap_cmds-116.100.1 developer_cmds-66 diskdev_cmds-667.40.1 doc_cmds-53.60.1 file_cmds-321.40.3 mail_cmds-35 misc_cmds-34 network_cmds-606.40.1 patch_cmds-17 remote_cmds-63 shell_cmds-216.60.1 system_cmds-880.60.2 text_cmds-106 --- text_cmds/wc/wc.1 | 163 ++++++++++++++++++++++++++++++ text_cmds/wc/wc.c | 294 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 457 insertions(+) create mode 100644 text_cmds/wc/wc.1 create mode 100644 text_cmds/wc/wc.c (limited to 'text_cmds/wc') diff --git a/text_cmds/wc/wc.1 b/text_cmds/wc/wc.1 new file mode 100644 index 0000000..d0902e9 --- /dev/null +++ b/text_cmds/wc/wc.1 @@ -0,0 +1,163 @@ +.\" Copyright (c) 1991, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" This code is derived from software contributed to Berkeley by +.\" the Institute of Electrical and Electronics Engineers, Inc. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed by the University of +.\" California, Berkeley and its contributors. +.\" 4. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)wc.1 8.2 (Berkeley) 4/19/94 +.\" $FreeBSD: src/usr.bin/wc/wc.1,v 1.23 2005/02/26 04:14:20 trhodes Exp $ +.\" +.Dd February 23, 2005 +.Dt WC 1 +.Os +.Sh NAME +.Nm wc +.Nd word, line, character, and byte count +.Sh SYNOPSIS +.Nm +.Op Fl clmw +.Op Ar +.Sh DESCRIPTION +The +.Nm +utility displays the number of lines, words, and bytes contained in each +input +.Ar file , +or standard input (if no file is specified) to the standard output. +A line is defined as a string of characters delimited by a +.Aq newline +character. +Characters beyond the final +.Aq newline +character will not be included +in the line count. +.Pp +A word is defined as a string of characters delimited by white space +characters. +White space characters are the set of characters for which the +.Xr iswspace 3 +function returns true. +If more than one input file is specified, a line of cumulative counts +for all the files is displayed on a separate line after the output for +the last file. +.Pp +The following options are available: +.Bl -tag -width Ds +.It Fl c +The number of bytes in each input file +is written to the standard output. +This will cancel out any prior usage of the +.Fl m +option. +.It Fl l +The number of lines in each input file +is written to the standard output. +.It Fl m +The number of characters in each input file is written to the standard output. +If the current locale does not support multibyte characters, this +is equivalent to the +.Fl c +option. +This will cancel out any prior usage of the +.Fl c +option. +.It Fl w +The number of words in each input file +is written to the standard output. +.El +.Pp +When an option is specified, +.Nm +only reports the information requested by that option. +The order of output always takes the form of line, word, +byte, and file name. +The default action is equivalent to specifying the +.Fl c , l +and +.Fl w +options. +.Pp +If no files are specified, the standard input is used and no +file name is displayed. +The prompt will accept input until receiving EOF, or +.Bq ^D +in most environments. +.Sh ENVIRONMENT +The +.Ev LANG , LC_ALL +and +.Ev LC_CTYPE +environment variables affect the execution of +.Nm +as described in +.Xr environ 7 . +.Sh EXIT STATUS +.Ex -std +.Sh EXAMPLES +Count the number of characters, words and lines in each of the files +.Pa report1 +and +.Pa report2 +as well as the totals for both: +.Pp +.Dl "wc -mlw report1 report2" +.Sh COMPATIBILITY +Historically, the +.Nm +utility was documented to define a word as a ``maximal string of +characters delimited by , or characters''. +The implementation, however, did not handle non-printing characters +correctly so that +.Dq Li " ^D^E " +counted as 6 spaces, while +.Dq Li foo^D^Ebar +counted as 8 characters. +.Bx 4 +systems after +.Bx 4.3 +modified the implementation to be consistent +with the documentation. +This implementation defines a ``word'' in terms of the +.Xr iswspace 3 +function, as required by +.St -p1003.2 . +.Sh SEE ALSO +.Xr iswspace 3 +.Sh STANDARDS +The +.Nm +utility conforms to +.St -p1003.1-2001 . +.Sh HISTORY +A +.Nm +command appeared in +.At v1 . diff --git a/text_cmds/wc/wc.c b/text_cmds/wc/wc.c new file mode 100644 index 0000000..cb25729 --- /dev/null +++ b/text_cmds/wc/wc.c @@ -0,0 +1,294 @@ +/* + * Copyright (c) 1980, 1987, 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +static const char copyright[] = +"@(#) Copyright (c) 1980, 1987, 1991, 1993\n\ + The Regents of the University of California. All rights reserved.\n"; +#endif /* not lint */ + +#if 0 +#ifndef lint +static char sccsid[] = "@(#)wc.c 8.1 (Berkeley) 6/6/93"; +#endif /* not lint */ +#endif + +#include +__FBSDID("$FreeBSD: src/usr.bin/wc/wc.c,v 1.21 2004/12/27 22:27:56 josef Exp $"); + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* We allocte this much memory statically, and use it as a fallback for + malloc failure, or statfs failure. So it should be small, but not + "too small" */ +#define SMALL_BUF_SIZE (1024 * 8) + +uintmax_t tlinect, twordct, tcharct; +int doline, doword, dochar, domulti; + +static int cnt(const char *); +static void usage(void); + +int +main(int argc, char *argv[]) +{ + int ch, errors, total; + + (void) setlocale(LC_CTYPE, ""); + + while ((ch = getopt(argc, argv, "clmw")) != -1) + switch((char)ch) { + case 'l': + doline = 1; + break; + case 'w': + doword = 1; + break; + case 'c': + dochar = 1; + domulti = 0; + break; + case 'm': + domulti = 1; + dochar = 0; + break; + case '?': + default: + usage(); + } + argv += optind; + argc -= optind; + + /* Wc's flags are on by default. */ + if (doline + doword + dochar + domulti == 0) + doline = doword = dochar = 1; + + errors = 0; + total = 0; + if (!*argv) { + if (cnt((char *)NULL) != 0) + ++errors; + else + (void)printf("\n"); + } + else do { + if (cnt(*argv) != 0) + ++errors; + else + (void)printf(" %s\n", *argv); + ++total; + } while(*++argv); + + if (total > 1) { + if (doline) + (void)printf(" %7ju", tlinect); + if (doword) + (void)printf(" %7ju", twordct); + if (dochar || domulti) + (void)printf(" %7ju", tcharct); + (void)printf(" total\n"); + } + exit(errors == 0 ? 0 : 1); +} + +static int +cnt(const char *file) +{ + struct stat sb; + struct statfs fsb; + uintmax_t linect, wordct, charct; + int fd, len, warned; + int stat_ret; + size_t clen; + short gotsp; + u_char *p; + static u_char small_buf[SMALL_BUF_SIZE]; + static u_char *buf = small_buf; + static off_t buf_size = SMALL_BUF_SIZE; + wchar_t wch; + mbstate_t mbs; + + linect = wordct = charct = 0; + if (file == NULL) { + file = "stdin"; + fd = STDIN_FILENO; + } else { + if ((fd = open(file, O_RDONLY, 0)) < 0) { + warn("%s: open", file); + return (1); + } + } + + if (fstatfs(fd, &fsb)) { + fsb.f_iosize = SMALL_BUF_SIZE; + } + if (fsb.f_iosize != buf_size) { + if (buf != small_buf) { + free(buf); + } + if (fsb.f_iosize == SMALL_BUF_SIZE || !(buf = malloc(fsb.f_iosize))) { + buf = small_buf; + buf_size = SMALL_BUF_SIZE; + } else { + buf_size = fsb.f_iosize; + } + } + + if (doword || (domulti && MB_CUR_MAX != 1)) + goto word; + /* + * Line counting is split out because it's a lot faster to get + * lines than to get words, since the word count requires some + * logic. + */ + if (doline) { + while ((len = read(fd, buf, buf_size))) { + if (len == -1) { + warn("%s: read", file); + (void)close(fd); + return (1); + } + charct += len; + for (p = buf; len--; ++p) + if (*p == '\n') + ++linect; + } + tlinect += linect; + (void)printf(" %7ju", linect); + if (dochar) { + tcharct += charct; + (void)printf(" %7ju", charct); + } + (void)close(fd); + return (0); + } + /* + * If all we need is the number of characters and it's a + * regular file, just stat the puppy. + */ + if (dochar || domulti) { + if (fstat(fd, &sb)) { + warn("%s: fstat", file); + (void)close(fd); + return (1); + } + if (S_ISREG(sb.st_mode)) { + (void)printf(" %7lld", (long long)sb.st_size); + tcharct += sb.st_size; + (void)close(fd); + return (0); + } + } + + /* Do it the hard way... */ +word: gotsp = 1; + warned = 0; + memset(&mbs, 0, sizeof(mbs)); + while ((len = read(fd, buf, buf_size)) != 0) { + if (len == -1) { + warn("%s: read", file); + (void)close(fd); + return (1); + } + p = buf; + while (len > 0) { + if (!domulti || MB_CUR_MAX == 1) { + clen = 1; + wch = (unsigned char)*p; + } else if ((clen = mbrtowc(&wch, p, len, &mbs)) == + (size_t)-1) { + if (!warned) { + errno = EILSEQ; + warn("%s", file); + warned = 1; + } + memset(&mbs, 0, sizeof(mbs)); + clen = 1; + wch = (unsigned char)*p; + } else if (clen == (size_t)-2) + break; + else if (clen == 0) + clen = 1; + charct++; + len -= clen; + p += clen; + if (wch == L'\n') + ++linect; + if (iswspace(wch)) + gotsp = 1; + else if (gotsp) { + gotsp = 0; + ++wordct; + } + } + } + if (domulti && MB_CUR_MAX > 1) + if (mbrtowc(NULL, NULL, 0, &mbs) == (size_t)-1 && !warned) + warn("%s", file); + if (doline) { + tlinect += linect; + (void)printf(" %7ju", linect); + } + if (doword) { + twordct += wordct; + (void)printf(" %7ju", wordct); + } + if (dochar || domulti) { + tcharct += charct; + (void)printf(" %7ju", charct); + } + (void)close(fd); + return (0); +} + +static void +usage() +{ + (void)fprintf(stderr, "usage: wc [-clmw] [file ...]\n"); + exit(1); +} -- cgit v1.2.3-56-ge451