bug-coreutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

comm enhancement: --ignore-case [patch]


From: Werner LEMBERG
Subject: comm enhancement: --ignore-case [patch]
Date: Mon, 28 Feb 2005 09:41:50 +0100 (CET)

[comm 5.2.1]

Here are patches to add an option to `comm' so that it ignores case
while doing the comparisons, similar to the --ignore-case option in
`uniq'.

Most of the work is simply copy and paste from `uniq.c', so I doubt
that papers are necessary.  In case they are, I've signed a kind of
`general copyright assignment' so that should be sufficient that I
drop an email to the FSF instead of doing the complete paper work.


    Werner


======================================================================

2005-02-28  Werner Lemberg  <address@hidden>

        Make `comm' accept option `-i' and `--ignore-case', similar to
        `uniq' (but with proper collation handling).  Almost all code is
        simply taken from `uniq.c' from the corresponding places.

        * src/comm.c: Include `memcasecmp.h'.
        (ignore_case): New global variable.
        (long_options): Add entry for `--ignore-case'.
        (usage): Updated.
        (compare_files): Handle `ignore_case'.
        (main): Handle `-i'.

        * doc/coreutils.texi (comm invocation): Document `--ignore-case'.


======================================================================


--- ./src/comm.c.old    2004-02-21 10:21:40.000000000 +0100
+++ ./src/comm.c        2005-02-28 09:28:12.392622016 +0100
@@ -1,5 +1,5 @@
 /* comm -- compare two sorted files line by line.
-   Copyright (C) 86, 90, 91, 1995-2004 Free Software Foundation, Inc.
+   Copyright (C) 86, 90, 91, 1995-2005 Free Software Foundation, Inc.
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -27,6 +27,7 @@
 #include "error.h"
 #include "hard-locale.h"
 #include "xmemcoll.h"
+#include "memcasecmp.h"
 
 /* The official name of this program (e.g., no `g' prefix).  */
 #define PROGRAM_NAME "comm"
@@ -52,8 +53,12 @@
 /* If nonzero, print lines that are found in both files. */
 static int both;
 
+/* If nonzero, ignore case when comparing.  */
+static int ignore_case;
+
 static struct option const long_options[] =
 {
+  {"ignore-case", no_argument, NULL, 'i'},
   {GETOPT_HELP_OPTION_DECL},
   {GETOPT_VERSION_OPTION_DECL},
   {0, 0, 0, 0}
@@ -84,9 +89,10 @@
 "), stdout);
       fputs (_("\
 \n\
-  -1              suppress lines unique to FILE1\n\
-  -2              suppress lines unique to FILE2\n\
-  -3              suppress lines that appear in both files\n\
+  -1                  suppress lines unique to FILE1\n\
+  -2                  suppress lines unique to FILE2\n\
+  -3                  suppress lines that appear in both files\n\
+  -i, --ignore-case   ignore differences in case when comparing\n\
 "), stdout);
       fputs (HELP_OPTION_DESCRIPTION, stdout);
       fputs (VERSION_OPTION_DESCRIPTION, stdout);
@@ -185,7 +191,35 @@
        order = -1;
       else
        {
-         if (HAVE_SETLOCALE && hard_LC_COLLATE)
+         if (ignore_case)
+           {
+             if (HAVE_SETLOCALE && hard_LC_COLLATE)
+               {
+                 size_t i;
+                 size_t len0 = thisline[0]->length;
+                 size_t len1 = thisline[1]->length;
+                 char *copy0 = alloca (len0);
+                 char *copy1 = alloca (len1);
+                 /* Create buffer with uppercase characters.  */
+                 for (i = 0; i < len0; i++)
+                   copy0[i] = TOUPPER (thisline[0]->buffer[i]);
+                 for (i = 0; i < len1; i++)
+                   copy1[i] = TOUPPER (thisline[1]->buffer[i]);
+                 order = xmemcoll (copy0, len0 - 1, copy1, len1 - 1);
+               }
+             else
+               {
+                 size_t len = min (thisline[0]->length,
+                                   thisline[1]->length) - 1;
+                 order = memcasecmp (thisline[0]->buffer,
+                                     thisline[1]->buffer, len);
+               }
+             if (order == 0)
+               order = (thisline[0]->length < thisline[1]->length
+                        ? -1
+                        : thisline[0]->length != thisline[1]->length);
+           }
+         else if (HAVE_SETLOCALE && hard_LC_COLLATE)
            order = xmemcoll (thisline[0]->buffer, thisline[0]->length - 1,
                              thisline[1]->buffer, thisline[1]->length - 1);
          else
@@ -257,7 +291,7 @@
   only_file_2 = 1;
   both = 1;
 
-  while ((c = getopt_long (argc, argv, "123", long_options, NULL)) != -1)
+  while ((c = getopt_long (argc, argv, "123i", long_options, NULL)) != -1)
     switch (c)
       {
       case 0:
@@ -275,6 +309,10 @@
        both = 0;
        break;
 
+      case 'i':
+       ignore_case = 1;
+       break;
+
       case_GETOPT_HELP_CHAR;
 
       case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
--- ./doc/coreutils.texi.old    2004-03-10 18:50:52.000000000 +0100
+++ ./doc/coreutils.texi        2005-02-28 09:38:54.695977064 +0100
@@ -122,7 +122,7 @@
 This manual documents version @value{VERSION} of the @sc{gnu} core
 utilities, including the standard programs for text and file manipulation.
 
-Copyright @copyright{} 1994, 1995, 1996, 2000, 2001, 2002, 2003, 2004
+Copyright @copyright{} 1994, 1995, 1996, 2000, 2001, 2002, 2003, 2004, 2005
 Free Software Foundation, Inc.
 
 @quotation
@@ -3649,11 +3649,28 @@
 @c FIXME: when there's an option to supply an alternative separator
 @c string, append `by default' to the above sentence.
 
+The program accepts the following options.  Also see @ref{Common options}.
+
address@hidden @samp
+
address@hidden -1
address@hidden -2
address@hidden -3
 @opindex -1
 @opindex -2
 @opindex -3
 The options @option{-1}, @option{-2}, and @option{-3} suppress printing of
-the corresponding columns.  Also see @ref{Common options}.
+the corresponding columns.
+
address@hidden -i
address@hidden --ignore-case
address@hidden -i
address@hidden --ignore-case
+Ignore differences in case when comparing lines.  To use this option, the
+input files must have been prepared with a call to @samp{uniq -i} after
+sorting.
+
address@hidden table
 
 Unlike some other comparison utilities, @command{comm} has an exit
 status that does not depend on the result of the comparison.




reply via email to

[Prev in Thread] Current Thread [Next in Thread]