bug-textutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

extension to uniq: make field separator an option


From: Jochen Hein
Subject: extension to uniq: make field separator an option
Date: Sun, 21 Jul 2002 08:19:13 +0200
User-agent: Gnus/5.090006 (Oort Gnus v0.06) XEmacs/21.4 (Common Lisp, i386-debian-linux)

I sent it a while ago to Jim, this is a retry, and to get the patch
archived somewhere.  Comments are welcome.

Jochen

diff -u -r textutils-2.0.22.orig/ChangeLog textutils-2.0.22/ChangeLog
--- textutils-2.0.22.orig/ChangeLog     Sat Jul 20 16:10:11 2002
+++ textutils-2.0.22/ChangeLog  Sun Jul 21 08:05:27 2002
@@ -1,3 +1,8 @@
+2002-07-21  Jochen Hein  <address@hidden>
+ 
+       * src/uniq.c: Added option '-t' to set the field separator.
+       * tests/uniq/Test.pm: Added tests for '-t'.
+ 
 2002-07-20  Jim Meyering  <address@hidden>
 
        * Version 2.0.22.
Only in textutils-2.0.22: ChangeLog~
diff -u -r textutils-2.0.22.orig/TODO textutils-2.0.22/TODO
--- textutils-2.0.22.orig/TODO  Tue Nov 27 08:50:29 2001
+++ textutils-2.0.22/TODO       Sun Jul 21 08:01:46 2002
@@ -84,7 +84,11 @@
 
 ---------------------
 
-uniq: add a more flexible key selection mechanism
+uniq: add a more flexible key selection mechanism, see sort for
+   example (imagine first sorting and then running uniq).
+
+   Expand the -t option to accept not only strings of characters but
+   regular expressions, see awk's FS for example.
 
 ---------------------
 
diff -u -r textutils-2.0.22.orig/doc/coreutils.texi 
textutils-2.0.22/doc/coreutils.texi
--- textutils-2.0.22.orig/doc/coreutils.texi    Wed Jul 17 12:50:33 2002
+++ textutils-2.0.22/doc/coreutils.texi Sun Jul 21 08:18:13 2002
@@ -3258,6 +3258,14 @@
 @address@hidden  @acronym{POSIX} 1003.1-2001 (@pxref{Standards conformance})
 does not allow this; use @option{-s @var{n}} instead.
 
address@hidden -t @var{s}
address@hidden address@hidden
address@hidden -t
address@hidden --separator
+Use one of the characters in string @var{s} as the field separator.  
+If this option is not present a space is used as a field separator.
+This is a GNU extention.
+
 @item -c
 @itemx --count
 @opindex -c
diff -u -r textutils-2.0.22.orig/src/uniq.c textutils-2.0.22/src/uniq.c
--- textutils-2.0.22.orig/src/uniq.c    Tue Jul  2 07:15:06 2002
+++ textutils-2.0.22/src/uniq.c Sun Jul 21 08:04:32 2002
@@ -108,6 +108,9 @@
 /* Select whether/how to delimit groups of duplicate lines.  */
 static enum delimit_method delimit_groups;
 
+/* what to use as field separator.  */
+static char *separator = NULL;
+
 static struct option const longopts[] =
 {
   {"count", no_argument, NULL, 'c'},
@@ -118,6 +121,7 @@
   {"skip-fields", required_argument, NULL, 'f'},
   {"skip-chars", required_argument, NULL, 's'},
   {"check-chars", required_argument, NULL, 'w'},
+  {"separator", required_argument, NULL, 't'},
   {GETOPT_HELP_OPTION_DECL},
   {GETOPT_VERSION_OPTION_DECL},
   {NULL, 0, NULL, 0}
@@ -154,6 +158,7 @@
   -f, --skip-fields=N   avoid comparing the first N fields\n\
   -i, --ignore-case     ignore differences in case when comparing\n\
   -s, --skip-chars=N    avoid comparing the first N characters\n\
+  -t, --separator=S     use a character in string S as field separator\n\
   -u, --unique          only print unique lines\n\
 "), stdout);
      fputs (_("\
@@ -184,6 +189,28 @@
   return size;
 }
 
+/* Is ch a separator?
+   If no separator has been set, use ISBLANK(), otherwise
+   use what has been requested by the user. */
+
+static int
+is_separator (char ch)
+{
+  char *sep;
+  size_t i = 0;
+
+  if (separator == NULL)
+    return ISBLANK(ch);
+
+  while (separator[i] != '\0' )
+    {
+      if (ch == separator[i])
+       return 1;
+      i++;
+    };
+  return 0;
+}
+
 /* Given a linebuffer LINE,
    return a pointer to the beginning of the line's field to be compared. */
 
@@ -197,9 +224,9 @@
 
   for (count = 0; count < skip_fields && i < size; count++)
     {
-      while (i < size && ISBLANK (lp[i]))
+      while (i < size && is_separator(lp[i]))
        i++;
-      while (i < size && !ISBLANK (lp[i]))
+      while (i < size && !is_separator(lp[i]))
        i++;
     }
 
@@ -418,7 +445,7 @@
       if (optc == -1
          || (posixly_correct && nfiles != 0)
          || ((optc = getopt_long (argc, argv,
-                                  "-0123456789Dcdf:is:uw:", longopts, NULL))
+                                  "-0123456789Dcdf:is:t:uw:", longopts, NULL))
              == -1))
        {
          if (optind == argc)
@@ -448,6 +475,10 @@
            else
              file[nfiles++] = optarg;
          }
+         break;
+
+       case 't':
+         separator = xstrdup(optarg);
          break;
 
        case '0':
diff -u -r textutils-2.0.22.orig/tests/uniq/Test.pm 
textutils-2.0.22/tests/uniq/Test.pm
--- textutils-2.0.22.orig/tests/uniq/Test.pm    Mon Feb 18 13:39:12 2002
+++ textutils-2.0.22/tests/uniq/Test.pm Sun Jul 21 08:03:01 2002
@@ -83,6 +83,11 @@
 ['117', '--all-repeated=prepend', "a\na\nb\nc\nc\n", "\na\na\n\nc\nc\n", 0],
 ['118', '--all-repeated=prepend', "a\nb\n",          "",                 0],
 ['119', '--all-repeated=badoption', "a\n",           "",                 1],
+# test field separator
+['120', '-t : -f 1',  "a:b\na:a\n", "a:b\na:a\n",               0],
+['121', '-t : -f 2',  "a:b\nb:b\n", "a:b\n",                    0],
+['122', '-t : -f 2 -D',  "a:b\na:a\n", "a:b\na:a\n",            0],
+
 );
 
 sub test_vector

-- 
#include <~/.signature>: permission denied



reply via email to

[Prev in Thread] Current Thread [Next in Thread]