summaryrefslogtreecommitdiffstats
path: root/text_cmds/comm
diff options
context:
space:
mode:
Diffstat (limited to 'text_cmds/comm')
-rw-r--r--text_cmds/comm/comm.1124
-rw-r--r--text_cmds/comm/comm.c220
2 files changed, 344 insertions, 0 deletions
diff --git a/text_cmds/comm/comm.1 b/text_cmds/comm/comm.1
new file mode 100644
index 0000000..45a726d
--- /dev/null
+++ b/text_cmds/comm/comm.1
@@ -0,0 +1,124 @@
+.\" Copyright (c) 1989, 1990, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" This code is derived from software contributed to Berkeley by
+.\" the Institute of Electrical and Electronics Engineers, Inc.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" From: @(#)comm.1 8.1 (Berkeley) 6/6/93
+.\" $FreeBSD: src/usr.bin/comm/comm.1,v 1.14 2005/01/25 22:28:34 tjr Exp $
+.\"
+.Dd January 26, 2005
+.Os
+.Dt COMM 1
+.Sh NAME
+.Nm comm
+.Nd select or reject lines common to two files
+.Sh SYNOPSIS
+.Nm
+.Op Fl 123i
+.Ar file1 file2
+.Sh DESCRIPTION
+The
+.Nm
+utility reads
+.Ar file1
+and
+.Ar file2 ,
+which should be
+sorted lexically, and produces three text
+columns as output: lines only in
+.Ar file1 ;
+lines only in
+.Ar file2 ;
+and lines in both files.
+.Pp
+The filename ``-'' means the standard input.
+.Pp
+The following options are available:
+.Bl -tag -width indent
+.It Fl 1
+Suppress printing of column 1.
+.It Fl 2
+Suppress printing of column 2.
+.It Fl 3
+Suppress printing of column 3.
+.It Fl i
+Case insensitive comparison of lines.
+.El
+.Pp
+Each column will have a number of tab characters prepended to it
+equal to the number of lower numbered columns that are being printed.
+For example, if column number two is being suppressed, lines printed
+in column number one will not have any tabs preceding them, and lines
+printed in column number three will have one.
+.Pp
+The
+.Nm
+utility assumes that the files are lexically sorted; all characters
+participate in line comparisons.
+.Sh ENVIRONMENT
+The
+.Ev LANG ,
+.Ev LC_ALL ,
+.Ev LC_COLLATE ,
+and
+.Ev LC_CTYPE
+environment variables affect the execution of
+.Nm
+as described in
+.Xr environ 7 .
+.Sh EXIT STATUS
+.Ex -std
+.Sh SEE ALSO
+.Xr cmp 1 ,
+.Xr diff 1 ,
+.Xr sort 1 ,
+.Xr uniq 1
+.Sh STANDARDS
+The
+.Nm
+utility conforms to
+.St -p1003.2-92 .
+.Pp
+The
+.Fl i
+option is an extension to the
+.Tn POSIX
+standard.
+.Sh HISTORY
+A
+.Nm
+command appeared in
+.At v4 .
+.Sh BUGS
+Input lines are limited to
+.Dv LINE_MAX
+(2048) characters in length.
diff --git a/text_cmds/comm/comm.c b/text_cmds/comm/comm.c
new file mode 100644
index 0000000..fc6c663
--- /dev/null
+++ b/text_cmds/comm/comm.c
@@ -0,0 +1,220 @@
+/*
+ * Copyright (c) 1989, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Case Larsen.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef lint
+static const char copyright[] =
+"@(#) Copyright (c) 1989, 1993, 1994\n\
+ The Regents of the University of California. All rights reserved.\n";
+#endif
+
+#if 0
+#ifndef lint
+static char sccsid[] = "From: @(#)comm.c 8.4 (Berkeley) 5/4/95";
+#endif
+#endif
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/usr.bin/comm/comm.c,v 1.21 2004/07/02 22:48:29 tjr Exp $");
+
+#include <err.h>
+#include <limits.h>
+#include <locale.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <wchar.h>
+#include <wctype.h>
+
+#define MAXLINELEN (LINE_MAX + 1)
+
+const wchar_t *tabs[] = { L"", L"\t", L"\t\t" };
+
+FILE *file(const char *);
+void show(FILE *, const char *, const wchar_t *, wchar_t *);
+int wcsicoll(const wchar_t *, const wchar_t *);
+static void usage(void);
+
+int
+main(int argc, char *argv[])
+{
+ int comp, file1done = 0, file2done = 0, read1, read2;
+ int ch, flag1, flag2, flag3, iflag;
+ FILE *fp1, *fp2;
+ const wchar_t *col1, *col2, *col3;
+ wchar_t line1[MAXLINELEN], line2[MAXLINELEN];
+ const wchar_t **p;
+
+ flag1 = flag2 = flag3 = 1;
+ iflag = 0;
+
+ (void) setlocale(LC_ALL, "");
+
+ while ((ch = getopt(argc, argv, "123i")) != -1)
+ switch(ch) {
+ case '1':
+ flag1 = 0;
+ break;
+ case '2':
+ flag2 = 0;
+ break;
+ case '3':
+ flag3 = 0;
+ break;
+ case 'i':
+ iflag = 1;
+ break;
+ case '?':
+ default:
+ usage();
+ }
+ argc -= optind;
+ argv += optind;
+
+ if (argc != 2 || !argv[0] || !argv[1])
+ usage();
+
+ fp1 = file(argv[0]);
+ fp2 = file(argv[1]);
+
+ /* for each column printed, add another tab offset */
+ p = tabs;
+ col1 = col2 = col3 = NULL;
+ if (flag1)
+ col1 = *p++;
+ if (flag2)
+ col2 = *p++;
+ if (flag3)
+ col3 = *p;
+
+ for (read1 = read2 = 1;;) {
+ /* read next line, check for EOF */
+ if (read1) {
+ file1done = !fgetws(line1, MAXLINELEN, fp1);
+ if (file1done && ferror(fp1))
+ err(1, "%s", argv[0]);
+ }
+ if (read2) {
+ file2done = !fgetws(line2, MAXLINELEN, fp2);
+ if (file2done && ferror(fp2))
+ err(1, "%s", argv[1]);
+ }
+
+ /* if one file done, display the rest of the other file */
+ if (file1done) {
+ if (!file2done && col2)
+ show(fp2, argv[1], col2, line2);
+ break;
+ }
+ if (file2done) {
+ if (!file1done && col1)
+ show(fp1, argv[0], col1, line1);
+ break;
+ }
+
+ /* lines are the same */
+ if(iflag)
+ comp = wcsicoll(line1, line2);
+ else
+ comp = wcscoll(line1, line2);
+
+ if (!comp) {
+ read1 = read2 = 1;
+ if (col3)
+ (void)printf("%ls%ls", col3, line1);
+ continue;
+ }
+
+ /* lines are different */
+ if (comp < 0) {
+ read1 = 1;
+ read2 = 0;
+ if (col1)
+ (void)printf("%ls%ls", col1, line1);
+ } else {
+ read1 = 0;
+ read2 = 1;
+ if (col2)
+ (void)printf("%ls%ls", col2, line2);
+ }
+ }
+ exit(0);
+}
+
+void
+show(FILE *fp, const char *fn, const wchar_t *offset, wchar_t *buf)
+{
+
+ do {
+ (void)printf("%ls%ls", offset, buf);
+ } while (fgetws(buf, MAXLINELEN, fp));
+ if (ferror(fp))
+ err(1, "%s", fn);
+}
+
+FILE *
+file(const char *name)
+{
+ FILE *fp;
+
+ if (!strcmp(name, "-"))
+ return (stdin);
+ if ((fp = fopen(name, "r")) == NULL) {
+ err(1, "%s", name);
+ }
+ return (fp);
+}
+
+static void
+usage(void)
+{
+ (void)fprintf(stderr, "usage: comm [-123i] file1 file2\n");
+ exit(1);
+}
+
+int
+wcsicoll(const wchar_t *s1, const wchar_t *s2)
+{
+ wchar_t *p, line1[MAXLINELEN], line2[MAXLINELEN];
+
+ for (p = line1; *s1; s1++)
+ *p++ = towlower(*s1);
+ *p = '\0';
+ for (p = line2; *s2; s2++)
+ *p++ = towlower(*s2);
+ *p = '\0';
+ return (wcscoll(line1, line2));
+}