diff options
Diffstat (limited to 'text_cmds/comm')
-rw-r--r-- | text_cmds/comm/comm.1 | 124 | ||||
-rw-r--r-- | text_cmds/comm/comm.c | 220 |
2 files changed, 344 insertions, 0 deletions
diff --git a/text_cmds/comm/comm.1 b/text_cmds/comm/comm.1 new file mode 100644 index 0000000..45a726d --- /dev/null +++ b/text_cmds/comm/comm.1 @@ -0,0 +1,124 @@ +.\" Copyright (c) 1989, 1990, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" This code is derived from software contributed to Berkeley by +.\" the Institute of Electrical and Electronics Engineers, Inc. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed by the University of +.\" California, Berkeley and its contributors. +.\" 4. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" From: @(#)comm.1 8.1 (Berkeley) 6/6/93 +.\" $FreeBSD: src/usr.bin/comm/comm.1,v 1.14 2005/01/25 22:28:34 tjr Exp $ +.\" +.Dd January 26, 2005 +.Os +.Dt COMM 1 +.Sh NAME +.Nm comm +.Nd select or reject lines common to two files +.Sh SYNOPSIS +.Nm +.Op Fl 123i +.Ar file1 file2 +.Sh DESCRIPTION +The +.Nm +utility reads +.Ar file1 +and +.Ar file2 , +which should be +sorted lexically, and produces three text +columns as output: lines only in +.Ar file1 ; +lines only in +.Ar file2 ; +and lines in both files. +.Pp +The filename ``-'' means the standard input. +.Pp +The following options are available: +.Bl -tag -width indent +.It Fl 1 +Suppress printing of column 1. +.It Fl 2 +Suppress printing of column 2. +.It Fl 3 +Suppress printing of column 3. +.It Fl i +Case insensitive comparison of lines. +.El +.Pp +Each column will have a number of tab characters prepended to it +equal to the number of lower numbered columns that are being printed. +For example, if column number two is being suppressed, lines printed +in column number one will not have any tabs preceding them, and lines +printed in column number three will have one. +.Pp +The +.Nm +utility assumes that the files are lexically sorted; all characters +participate in line comparisons. +.Sh ENVIRONMENT +The +.Ev LANG , +.Ev LC_ALL , +.Ev LC_COLLATE , +and +.Ev LC_CTYPE +environment variables affect the execution of +.Nm +as described in +.Xr environ 7 . +.Sh EXIT STATUS +.Ex -std +.Sh SEE ALSO +.Xr cmp 1 , +.Xr diff 1 , +.Xr sort 1 , +.Xr uniq 1 +.Sh STANDARDS +The +.Nm +utility conforms to +.St -p1003.2-92 . +.Pp +The +.Fl i +option is an extension to the +.Tn POSIX +standard. +.Sh HISTORY +A +.Nm +command appeared in +.At v4 . +.Sh BUGS +Input lines are limited to +.Dv LINE_MAX +(2048) characters in length. diff --git a/text_cmds/comm/comm.c b/text_cmds/comm/comm.c new file mode 100644 index 0000000..fc6c663 --- /dev/null +++ b/text_cmds/comm/comm.c @@ -0,0 +1,220 @@ +/* + * Copyright (c) 1989, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Case Larsen. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +static const char copyright[] = +"@(#) Copyright (c) 1989, 1993, 1994\n\ + The Regents of the University of California. All rights reserved.\n"; +#endif + +#if 0 +#ifndef lint +static char sccsid[] = "From: @(#)comm.c 8.4 (Berkeley) 5/4/95"; +#endif +#endif + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD: src/usr.bin/comm/comm.c,v 1.21 2004/07/02 22:48:29 tjr Exp $"); + +#include <err.h> +#include <limits.h> +#include <locale.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <wchar.h> +#include <wctype.h> + +#define MAXLINELEN (LINE_MAX + 1) + +const wchar_t *tabs[] = { L"", L"\t", L"\t\t" }; + +FILE *file(const char *); +void show(FILE *, const char *, const wchar_t *, wchar_t *); +int wcsicoll(const wchar_t *, const wchar_t *); +static void usage(void); + +int +main(int argc, char *argv[]) +{ + int comp, file1done = 0, file2done = 0, read1, read2; + int ch, flag1, flag2, flag3, iflag; + FILE *fp1, *fp2; + const wchar_t *col1, *col2, *col3; + wchar_t line1[MAXLINELEN], line2[MAXLINELEN]; + const wchar_t **p; + + flag1 = flag2 = flag3 = 1; + iflag = 0; + + (void) setlocale(LC_ALL, ""); + + while ((ch = getopt(argc, argv, "123i")) != -1) + switch(ch) { + case '1': + flag1 = 0; + break; + case '2': + flag2 = 0; + break; + case '3': + flag3 = 0; + break; + case 'i': + iflag = 1; + break; + case '?': + default: + usage(); + } + argc -= optind; + argv += optind; + + if (argc != 2 || !argv[0] || !argv[1]) + usage(); + + fp1 = file(argv[0]); + fp2 = file(argv[1]); + + /* for each column printed, add another tab offset */ + p = tabs; + col1 = col2 = col3 = NULL; + if (flag1) + col1 = *p++; + if (flag2) + col2 = *p++; + if (flag3) + col3 = *p; + + for (read1 = read2 = 1;;) { + /* read next line, check for EOF */ + if (read1) { + file1done = !fgetws(line1, MAXLINELEN, fp1); + if (file1done && ferror(fp1)) + err(1, "%s", argv[0]); + } + if (read2) { + file2done = !fgetws(line2, MAXLINELEN, fp2); + if (file2done && ferror(fp2)) + err(1, "%s", argv[1]); + } + + /* if one file done, display the rest of the other file */ + if (file1done) { + if (!file2done && col2) + show(fp2, argv[1], col2, line2); + break; + } + if (file2done) { + if (!file1done && col1) + show(fp1, argv[0], col1, line1); + break; + } + + /* lines are the same */ + if(iflag) + comp = wcsicoll(line1, line2); + else + comp = wcscoll(line1, line2); + + if (!comp) { + read1 = read2 = 1; + if (col3) + (void)printf("%ls%ls", col3, line1); + continue; + } + + /* lines are different */ + if (comp < 0) { + read1 = 1; + read2 = 0; + if (col1) + (void)printf("%ls%ls", col1, line1); + } else { + read1 = 0; + read2 = 1; + if (col2) + (void)printf("%ls%ls", col2, line2); + } + } + exit(0); +} + +void +show(FILE *fp, const char *fn, const wchar_t *offset, wchar_t *buf) +{ + + do { + (void)printf("%ls%ls", offset, buf); + } while (fgetws(buf, MAXLINELEN, fp)); + if (ferror(fp)) + err(1, "%s", fn); +} + +FILE * +file(const char *name) +{ + FILE *fp; + + if (!strcmp(name, "-")) + return (stdin); + if ((fp = fopen(name, "r")) == NULL) { + err(1, "%s", name); + } + return (fp); +} + +static void +usage(void) +{ + (void)fprintf(stderr, "usage: comm [-123i] file1 file2\n"); + exit(1); +} + +int +wcsicoll(const wchar_t *s1, const wchar_t *s2) +{ + wchar_t *p, line1[MAXLINELEN], line2[MAXLINELEN]; + + for (p = line1; *s1; s1++) + *p++ = towlower(*s1); + *p = '\0'; + for (p = line2; *s2; s2++) + *p++ = towlower(*s2); + *p = '\0'; + return (wcscoll(line1, line2)); +} |