bug-textutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

PATCH -- new option for cut(1) -- whitespace delimited fields


From: Ian Bruce
Subject: PATCH -- new option for cut(1) -- whitespace delimited fields
Date: Sat, 22 Jun 2002 01:25:14 -0700

As discussed previously on this list
(http://mail.gnu.org/pipermail/bug-textutils/2002-May/001215.html),
it would be convenient if cut(1) would accept arbitrary amounts of
whitespace as a field delimiter. In fact, this is probably what you
actually want about 90% of the time, although it should not be the
default behavior for reasons of backward compatibility. The following
patch accomplishes this. It adds a new option, "-w", which functions
the same way as the "-f" option, except that it eats all spaces and tabs
between fields. The "-d" and "-s" options are not allowed in combination
with "-w".


-- Ian Bruce  <ian dot bruce at myrealbox dot com>


--- textutils-2.0.21/src/cut.c.orig     Sat Dec  1 09:29:26 2001
+++ textutils-2.0.21/src/cut.c  Tue Jun 18 07:46:45 2002
@@ -109,8 +109,11 @@
     /* Output characters that are in the given bytes. */
     byte_mode,
 
-    /* Output the given delimeter-separated fields. */
-    field_mode
+    /* Output the given delimiter-separated fields. */
+    field_mode,
+
+    /* Output the given whitespace-separated fields. */
+    field_mode_ws
   };
 
 /* The name this program was run with. */
@@ -118,12 +121,12 @@
 
 static enum operating_mode operating_mode;
 
-/* If nonzero do not output lines containing no delimeter characters.
+/* If nonzero do not output lines containing no delimiter characters.
    Otherwise, all such lines are printed.  This option is valid only
    with field mode.  */
 static int suppress_non_delimited;
 
-/* The delimeter character for field mode. */
+/* The delimiter character for field mode. */
 static int delim;
 
 /* The length of output_delimiter_string.  */
@@ -148,6 +151,7 @@
   {"bytes", required_argument, 0, 'b'},
   {"characters", required_argument, 0, 'c'},
   {"fields", required_argument, 0, 'f'},
+  {"whitespace", required_argument, 0, 'w'},
   {"delimiter", required_argument, 0, 'd'},
   {"only-delimited", no_argument, 0, 's'},
   {"output-delimiter", required_argument, 0, OUTPUT_DELIMITER_OPTION},
@@ -188,6 +192,7 @@
 "), stdout);
       fputs (_("\
   -s, --only-delimited    do not print lines not containing delimiters\n\
+  -w, --whitespace=LIST   output only these fields, delimited by whitespace\n\
       --output-delimiter=STRING  use STRING as the output delimiter\n\
                             the default is to use the input delimiter\n\
 "), stdout);
@@ -195,8 +200,8 @@
       fputs (VERSION_OPTION_DESCRIPTION, stdout);
       fputs (_("\
 \n\
-Use one, and only one of -b, -c or -f.  Each LIST is made up of one\n\
-range, or many ranges separated by commas.  Each range is one of:\n\
+Use one, and only one of -b, -c, -f, or -w.  Each LIST is made up of\n\
+one range, or many ranges separated by commas.  Each range is one of:\n\
 \n\
   N     N'th byte, character or field, counted from 1\n\
   N-    from N'th byte, character or field, to end of line\n\
@@ -540,13 +545,78 @@
     }
 }
 
+/* Read from stream STREAM, printing to standard output any selected fields.
+   Fields are delimited by arbitrary amounts of spaces and tabs.  */
+
+static void
+cut_fields_ws (FILE *stream)
+{
+  int c;
+  int in_field = 0;
+  int select_field = 0;
+  unsigned int field_idx = 0;
+  int found_any_selected_field = 0;
+
+  while ((c = getc (stream)) != EOF)
+    {
+      if (c == ' ' || c == '\t' || c == '\v')
+       {
+         in_field = 0;
+       }
+
+      else if (c == '\n' || c == '\r' || c == '\f')
+       {
+         if (found_any_selected_field)
+           putchar ('\n');
+         field_idx = 0;
+         in_field = 0;
+         found_any_selected_field = 0;
+       }
+
+      else
+       {
+         if (in_field)
+           {
+             if (select_field)
+               putchar (c);
+           }
+         else
+           {
+             if (print_kth (++field_idx))
+               {
+                 if (found_any_selected_field)
+                   fwrite (output_delimiter_string, sizeof (char),
+                           output_delimiter_length, stdout);
+                 putchar (c);
+                 found_any_selected_field = 1;
+                 select_field = 1;
+               }
+             else
+               select_field = 0;
+             in_field = 1;
+           }
+       }
+    }
+
+  if (found_any_selected_field)
+    putchar ('\n');
+}
+
 static void
 cut_stream (FILE *stream)
 {
-  if (operating_mode == byte_mode)
-    cut_bytes (stream);
-  else
-    cut_fields (stream);
+  switch (operating_mode)
+    {
+    case byte_mode:
+      cut_bytes (stream);
+      break;
+    case field_mode:
+      cut_fields (stream);
+      break;
+    case field_mode_ws:
+      cut_fields_ws (stream);
+      break;
+    }
 }
 
 /* Process file FILE to standard output.
@@ -610,7 +680,7 @@
   delim = '\0';
   have_read_stdin = 0;
 
-  while ((optc = getopt_long (argc, argv, "b:c:d:f:ns", longopts, NULL)) != -1)
+  while ((optc = getopt_long (argc, argv, "b:c:d:f:w:ns", longopts, NULL)) != 
-1)
     {
       switch (optc)
        {
@@ -619,23 +689,35 @@
 
        case 'b':
        case 'c':
-         /* Build the byte list. */
          if (operating_mode != undefined_mode)
            FATAL_ERROR (_("only one type of list may be specified"));
          operating_mode = byte_mode;
+
+         /* Build the byte list. */
          if (set_fields (optarg) == 0)
            FATAL_ERROR (_("missing list of positions"));
          break;
 
        case 'f':
-         /* Build the field list. */
          if (operating_mode != undefined_mode)
            FATAL_ERROR (_("only one type of list may be specified"));
          operating_mode = field_mode;
+
+         /* Build the field list. */
          if (set_fields (optarg) == 0)
            FATAL_ERROR (_("missing list of fields"));
          break;
 
+       case 'w':
+         if (operating_mode != undefined_mode)
+           FATAL_ERROR (_("only one type of list may be specified"));
+         operating_mode = field_mode_ws;
+
+         /* Build the field list. */
+         if (set_fields (optarg) == 0)
+           FATAL_ERROR (_("missing list of fields"));
+         break;   /**/
+
        case 'd':
          /* New delimiter. */
          /* Interpret -d '' to mean `use the NUL byte as the delimiter.'  */
@@ -685,7 +767,7 @@
   if (output_delimiter_string == NULL)
     {
       static char dummy[2];
-      dummy[0] = delim;
+      dummy[0] = (operating_mode == field_mode_ws ? ' ' : delim);
       dummy[1] = '\0';
       output_delimiter_string = dummy;
       output_delimiter_length = 1;



reply via email to

[Prev in Thread] Current Thread [Next in Thread]