diff options
author | Cameron Katri <me@cameronkatri.com> | 2021-05-09 14:20:58 -0400 |
---|---|---|
committer | Cameron Katri <me@cameronkatri.com> | 2021-05-09 14:20:58 -0400 |
commit | 5fd83771641d15c418f747bd343ba6738d3875f7 (patch) | |
tree | 5abf0f78f680d9837dbd93d4d4c3933bb7509599 /text_cmds/cut | |
download | apple_cmds-5fd83771641d15c418f747bd343ba6738d3875f7.tar.gz apple_cmds-5fd83771641d15c418f747bd343ba6738d3875f7.tar.zst apple_cmds-5fd83771641d15c418f747bd343ba6738d3875f7.zip |
Import macOS userland
adv_cmds-176
basic_cmds-55
bootstrap_cmds-116.100.1
developer_cmds-66
diskdev_cmds-667.40.1
doc_cmds-53.60.1
file_cmds-321.40.3
mail_cmds-35
misc_cmds-34
network_cmds-606.40.1
patch_cmds-17
remote_cmds-63
shell_cmds-216.60.1
system_cmds-880.60.2
text_cmds-106
Diffstat (limited to 'text_cmds/cut')
-rw-r--r-- | text_cmds/cut/cut.1 | 166 | ||||
-rw-r--r-- | text_cmds/cut/cut.c | 468 |
2 files changed, 634 insertions, 0 deletions
diff --git a/text_cmds/cut/cut.1 b/text_cmds/cut/cut.1 new file mode 100644 index 0000000..1565876 --- /dev/null +++ b/text_cmds/cut/cut.1 @@ -0,0 +1,166 @@ +.\" Copyright (c) 1989, 1990, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" This code is derived from software contributed to Berkeley by +.\" the Institute of Electrical and Electronics Engineers, Inc. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed by the University of +.\" California, Berkeley and its contributors. +.\" 4. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)cut.1 8.1 (Berkeley) 6/6/93 +.\" $FreeBSD: src/usr.bin/cut/cut.1,v 1.32 2007/02/28 10:13:32 ru Exp $ +.\" +.Dd December 21, 2006 +.Dt CUT 1 +.Os +.Sh NAME +.Nm cut +.Nd cut out selected portions of each line of a file +.Sh SYNOPSIS +.Nm +.Fl b Ar list +.Op Fl n +.Op Ar +.Nm +.Fl c Ar list +.Op Ar +.Nm +.Fl f Ar list +.Op Fl d Ar delim +.Op Fl s +.Op Ar +.Sh DESCRIPTION +The +.Nm +utility cuts out selected portions of each line (as specified by +.Ar list ) +from each +.Ar file +and writes them to the standard output. +If no +.Ar file +arguments are specified, or a file argument is a single dash +.Pq Sq Fl , +.Nm +reads from the standard input. +The items specified by +.Ar list +can be in terms of column position or in terms of fields delimited +by a special character. +Column numbering starts from 1. +.Pp +The +.Ar list +option argument +is a comma or whitespace separated set of numbers and/or +number ranges. +Number ranges consist of a number, a dash +.Pq Sq \- , +and a second number +and select the fields or columns from the first number to the second, +inclusive. +Numbers or number ranges may be preceded by a dash, which selects all +fields or columns from 1 to the last number. +Numbers or number ranges may be followed by a dash, which selects all +fields or columns from the last number to the end of the line. +Numbers and number ranges may be repeated, overlapping, and in any order. +If a field or column is specified multiple times, it will appear only +once in the output. +It is not an error to select fields or columns not present in the +input line. +.Pp +The options are as follows: +.Bl -tag -width indent +.It Fl b Ar list +The +.Ar list +specifies byte positions. +.It Fl c Ar list +The +.Ar list +specifies character positions. +.It Fl d Ar delim +Use +.Ar delim +as the field delimiter character instead of the tab character. +.It Fl f Ar list +The +.Ar list +specifies fields, separated in the input by the field delimiter character +(see the +.Fl d +option.) +Output fields are separated by a single occurrence of the field delimiter +character. +.It Fl n +Do not split multi-byte characters. +Characters will only be output if at least one byte is selected, and, +after a prefix of zero or more unselected bytes, the rest of the bytes +that form the character are selected. +.It Fl s +Suppress lines with no field delimiter characters. +Unless specified, lines with no delimiters are passed through unmodified. +.El +.Sh ENVIRONMENT +The +.Ev LANG , LC_ALL +and +.Ev LC_CTYPE +environment variables affect the execution of +.Nm +as described in +.Xr environ 7 . +.Sh EXIT STATUS +.Ex -std +.Sh EXAMPLES +Extract users' login names and shells from the system +.Xr passwd 5 +file as +.Dq name:shell +pairs: +.Pp +.Dl "cut -d : -f 1,7 /etc/passwd" +.Pp +Show the names and login times of the currently logged in users: +.Pp +.Dl "who | cut -c 1-16,26-38" +.Sh SEE ALSO +.Xr colrm 1 , +.Xr paste 1 +.Sh STANDARDS +The +.Nm +utility conforms to +.St -p1003.2-92 . +.Sh HISTORY +A +.Nm +command appeared in +.Tn AT&T +System III +.Ux . diff --git a/text_cmds/cut/cut.c b/text_cmds/cut/cut.c new file mode 100644 index 0000000..82fdc06 --- /dev/null +++ b/text_cmds/cut/cut.c @@ -0,0 +1,468 @@ +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +static const char copyright[] = +"@(#) Copyright (c) 1989, 1993\n\ + The Regents of the University of California. All rights reserved.\n"; +static const char sccsid[] = "@(#)cut.c 8.3 (Berkeley) 5/4/95"; +#endif /* not lint */ +#include <sys/cdefs.h> +__FBSDID("$FreeBSD: src/usr.bin/cut/cut.c,v 1.30 2004/11/05 10:45:23 tjr Exp $"); + +#include <ctype.h> +#include <err.h> +#include <errno.h> +#include <limits.h> +#include <locale.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <wchar.h> +#include <sysexits.h> + +int bflag; +int cflag; +wchar_t dchar; +char dcharmb[MB_LEN_MAX + 1]; +int dflag; +int fflag; +int nflag; +int sflag; + +size_t autostart, autostop, maxval; +char * positions; + +int b_cut(FILE *, const char *); +int b_n_cut(FILE *, const char *); +int c_cut(FILE *, const char *); +int f_cut(FILE *, const char *); +void get_list(char *); +void needpos(size_t); +static void usage(void); + +int +main(int argc, char *argv[]) +{ + FILE *fp; + int (*fcn)(FILE *, const char *); + int ch, rval; + size_t n; + + setlocale(LC_ALL, ""); + + fcn = NULL; + dchar = '\t'; /* default delimiter is \t */ + strcpy(dcharmb, "\t"); + + while ((ch = getopt(argc, argv, "b:c:d:f:sn")) != -1) + switch(ch) { + case 'b': + get_list(optarg); + bflag = 1; + break; + case 'c': + get_list(optarg); + cflag = 1; + break; + case 'd': + n = mbrtowc(&dchar, optarg, MB_LEN_MAX, NULL); + if (dchar == '\0' || n != strlen(optarg)) + errx(1, "bad delimiter"); + strcpy(dcharmb, optarg); + dflag = 1; + break; + case 'f': + get_list(optarg); + fflag = 1; + break; + case 's': + sflag = 1; + break; + case 'n': + nflag = 1; + break; + case '?': + default: + usage(); + } + argc -= optind; + argv += optind; + + if (fflag) { + if (bflag || cflag || nflag) + usage(); + } else if (!(bflag || cflag) || dflag || sflag) + usage(); + else if (!bflag && nflag) + usage(); + + if (fflag) + fcn = f_cut; + else if (cflag) + fcn = MB_CUR_MAX > 1 ? c_cut : b_cut; + else if (bflag) + fcn = nflag && MB_CUR_MAX > 1 ? b_n_cut : b_cut; + + rval = 0; + if (*argv) + for (; *argv; ++argv) { + if (strcmp(*argv, "-") == 0) { + rval |= fcn(stdin, "stdin"); + if (ferror(stdin)) { + errx(EX_IOERR, "Error reading stdin"); + } + } else { + if (!(fp = fopen(*argv, "r"))) { + warn("%s", *argv); + rval = 1; + continue; + } + fcn(fp, *argv); + if (ferror(fp)) { + errx(EX_IOERR, "Error reading %s", *argv); + } + (void)fclose(fp); + } + } + else + rval = fcn(stdin, "stdin"); + exit(rval); +} + +void +get_list(char *list) +{ + size_t setautostart, start, stop; + char *pos; + char *p; + + /* + * set a byte in the positions array to indicate if a field or + * column is to be selected; use +1, it's 1-based, not 0-based. + * Numbers and number ranges may be overlapping, repeated, and in + * any order. We handle "-3-5" although there's no real reason too. + */ + for (; (p = strsep(&list, ", \t")) != NULL;) { + setautostart = start = stop = 0; + if (*p == '-') { + ++p; + setautostart = 1; + } + if (isdigit((unsigned char)*p)) { + start = stop = strtol(p, &p, 10); + if (setautostart && start > autostart) + autostart = start; + } + if (*p == '-') { + if (isdigit((unsigned char)p[1])) + stop = strtol(p + 1, &p, 10); + if (*p == '-') { + ++p; + if (!autostop || autostop > stop) + autostop = stop; + } + } + if (*p) + errx(1, "[-cf] list: illegal list value"); + if (!stop || !start) + errx(1, "[-cf] list: values may not include zero"); + if (maxval < stop) { + maxval = stop; + needpos(maxval + 1); + } + for (pos = positions + start; start++ <= stop; *pos++ = 1); + } + + /* overlapping ranges */ + if (autostop && maxval > autostop) { + maxval = autostop; + needpos(maxval + 1); + } + + /* set autostart */ + if (autostart) + memset(positions + 1, '1', autostart); +} + +void +needpos(size_t n) +{ + static size_t npos; + size_t oldnpos; + + /* Grow the positions array to at least the specified size. */ + if (n > npos) { + oldnpos = npos; + if (npos == 0) + npos = n; + while (n > npos) + npos *= 2; + if ((positions = realloc(positions, npos)) == NULL) + err(1, "realloc"); + memset((char *)positions + oldnpos, 0, npos - oldnpos); + } +} + +int +b_cut(FILE *fp, const char *fname __unused) +{ + int ch, col; + char *pos; + + ch = 0; + for (;;) { + pos = positions + 1; + for (col = maxval; col; --col) { + if ((ch = getc(fp)) == EOF) + return (0); + if (ch == '\n') + break; + if (*pos++) + (void)putchar(ch); + } + if (ch != '\n') { + if (autostop) + while ((ch = getc(fp)) != EOF && ch != '\n') + (void)putchar(ch); + else + while ((ch = getc(fp)) != EOF && ch != '\n'); + } + (void)putchar('\n'); + } + return (0); +} + +/* + * Cut based on byte positions, taking care not to split multibyte characters. + * Although this function also handles the case where -n is not specified, + * b_cut() ought to be much faster. + */ +int +b_n_cut(FILE *fp, const char *fname) +{ + size_t col, i, lbuflen; + char *lbuf; + int canwrite, clen, warned; + mbstate_t mbs; + + memset(&mbs, 0, sizeof(mbs)); + warned = 0; + while ((lbuf = fgetln(fp, &lbuflen)) != NULL) { + for (col = 0; lbuflen > 0; col += clen) { + if ((clen = mbrlen(lbuf, lbuflen, &mbs)) < 0) { + if (!warned) { + warn("%s", fname); + warned = 1; + } + memset(&mbs, 0, sizeof(mbs)); + clen = 1; + } + if (clen == 0 || *lbuf == '\n') + break; + if (col < maxval && !positions[1 + col]) { + /* + * Print the character if (1) after an initial + * segment of un-selected bytes, the rest of + * it is selected, and (2) the last byte is + * selected. + */ + i = col; + while (i < col + clen && i < maxval && + !positions[1 + i]) + i++; + canwrite = i < col + clen; + for (; i < col + clen && i < maxval; i++) + canwrite &= positions[1 + i]; + if (canwrite) + fwrite(lbuf, 1, clen, stdout); + } else { + /* + * Print the character if all of it has + * been selected. + */ + canwrite = 1; + for (i = col; i < col + clen; i++) + if ((i >= maxval && !autostop) || + (i < maxval && !positions[1 + i])) { + canwrite = 0; + break; + } + if (canwrite) + fwrite(lbuf, 1, clen, stdout); + } + lbuf += clen; + lbuflen -= clen; + } + if (lbuflen > 0) + putchar('\n'); + } + return (warned); +} + +int +c_cut(FILE *fp, const char *fname) +{ + wint_t ch; + int col; + char *pos; + + ch = 0; + for (;;) { + pos = positions + 1; + for (col = maxval; col; --col) { + if ((ch = getwc(fp)) == WEOF) + goto out; + if (ch == '\n') + break; + if (*pos++) + (void)putwchar(ch); + } + if (ch != '\n') { + if (autostop) + while ((ch = getwc(fp)) != WEOF && ch != '\n') + (void)putwchar(ch); + else + while ((ch = getwc(fp)) != WEOF && ch != '\n'); + } + (void)putwchar('\n'); + } +out: + if (ferror(fp)) { + warn("%s", fname); + return (1); + } + return (0); +} + +int +f_cut(FILE *fp, const char *fname) +{ + wchar_t ch; + int field, i, isdelim; + char *pos, *p; + wchar_t sep; + int output; + char *lbuf, *mlbuf; + size_t clen, lbuflen, reallen; + + mlbuf = NULL; + for (sep = dchar; (lbuf = fgetln(fp, &lbuflen)) != NULL;) { + reallen = lbuflen; + /* Assert EOL has a newline. */ + if (*(lbuf + lbuflen - 1) != '\n') { + /* Can't have > 1 line with no trailing newline. */ + mlbuf = malloc(lbuflen + 1); + if (mlbuf == NULL) + err(1, "malloc"); + memcpy(mlbuf, lbuf, lbuflen); + *(mlbuf + lbuflen) = '\n'; + lbuf = mlbuf; + reallen++; + } + output = 0; + for (isdelim = 0, p = lbuf;; p += clen) { + clen = mbrtowc(&ch, p, lbuf + reallen - p, NULL); + if (clen == (size_t)-1 || clen == (size_t)-2) { + warnc(EILSEQ, "%s", fname); + free(mlbuf); + return (1); + } + if (clen == 0) + clen = 1; + /* this should work if newline is delimiter */ + if (ch == sep) + isdelim = 1; + if (ch == '\n') { + if (!isdelim && !sflag) + (void)fwrite(lbuf, lbuflen, 1, stdout); + break; + } + } + if (!isdelim) + continue; + + pos = positions + 1; + for (field = maxval, p = lbuf; field; --field, ++pos) { + if (*pos && output++) + for (i = 0; dcharmb[i] != '\0'; i++) + putchar(dcharmb[i]); + for (;;) { + clen = mbrtowc(&ch, p, lbuf + reallen - p, + NULL); + if (clen == (size_t)-1 || clen == (size_t)-2) { + warnc(EILSEQ, "%s", fname); + free(mlbuf); + return (1); + } + if (clen == 0) + clen = 1; + p += clen; + if (ch == '\n' || ch == sep) + break; + if (*pos) + for (i = 0; i < (int)clen; i++) + putchar(p[i - clen]); + } + if (ch == '\n') + break; + } + if (ch != '\n') { + if (autostop) { + if (output) + for (i = 0; dcharmb[i] != '\0'; i++) + putchar(dcharmb[i]); + for (; (ch = *p) != '\n'; ++p) + (void)putchar(ch); + } else + for (; (ch = *p) != '\n'; ++p); + } + (void)putchar('\n'); + } + free(mlbuf); + return (0); +} + +static void +usage(void) +{ + (void)fprintf(stderr, "%s\n%s\n%s\n", + "usage: cut -b list [-n] [file ...]", + " cut -c list [file ...]", + " cut -f list [-s] [-d delim] [file ...]"); + exit(1); +} |