As discussed previously on this list
(http://mail.gnu.org/pipermail/bug-textutils/2002-May/001215.html),
it would be convenient if cut(1) would accept arbitrary amounts of
whitespace as a field delimiter. In fact, this is probably what you
actually want about 90% of the time, although it should not be the
default behavior for reasons of backward compatibility. The following
patch accomplishes this. It adds a new option, "-w", which functions
the same way as the "-f" option, except that it eats all spaces and tabs
between fields. The "-d" and "-s" options are not allowed in combination
with "-w".
-- Ian Bruce <ian dot bruce at myrealbox dot com>
--- textutils-2.0.21/src/cut.c.orig Sat Dec 1 09:29:26 2001
+++ textutils-2.0.21/src/cut.c Tue Jun 18 07:46:45 2002
@@ -109,8 +109,11 @@
/* Output characters that are in the given bytes. */
byte_mode,
- /* Output the given delimeter-separated fields. */
- field_mode
+ /* Output the given delimiter-separated fields. */
+ field_mode,
+
+ /* Output the given whitespace-separated fields. */
+ field_mode_ws
};
/* The name this program was run with. */
@@ -118,12 +121,12 @@
static enum operating_mode operating_mode;
-/* If nonzero do not output lines containing no delimeter characters.
+/* If nonzero do not output lines containing no delimiter characters.
Otherwise, all such lines are printed. This option is valid only
with field mode. */
static int suppress_non_delimited;
-/* The delimeter character for field mode. */
+/* The delimiter character for field mode. */
static int delim;
/* The length of output_delimiter_string. */
@@ -148,6 +151,7 @@
{"bytes", required_argument, 0, 'b'},
{"characters", required_argument, 0, 'c'},
{"fields", required_argument, 0, 'f'},
+ {"whitespace", required_argument, 0, 'w'},
{"delimiter", required_argument, 0, 'd'},
{"only-delimited", no_argument, 0, 's'},
{"output-delimiter", required_argument, 0, OUTPUT_DELIMITER_OPTION},
@@ -188,6 +192,7 @@
"), stdout);
fputs (_("\
-s, --only-delimited do not print lines not containing delimiters\n\
+ -w, --whitespace=LIST output only these fields, delimited by
whitespace\n\
--output-delimiter=STRING use STRING as the output delimiter\n\
the default is to use the input delimiter\n\
"), stdout);
@@ -195,8 +200,8 @@
fputs (VERSION_OPTION_DESCRIPTION, stdout);
fputs (_("\
\n\
-Use one, and only one of -b, -c or -f. Each LIST is made up of one\n\
-range, or many ranges separated by commas. Each range is one of:\n\
+Use one, and only one of -b, -c, -f, or -w. Each LIST is made up of\n\
+one range, or many ranges separated by commas. Each range is one of:\n\
\n\
N N'th byte, character or field, counted from 1\n\
N- from N'th byte, character or field, to end of line\n\
@@ -540,13 +545,78 @@
}
}
+/* Read from stream STREAM, printing to standard output any selected fields.
+ Fields are delimited by arbitrary amounts of spaces and tabs. */
+
+static void
+cut_fields_ws (FILE *stream)
+{
+ int c;
+ int in_field = 0;
+ int select_field = 0;
+ unsigned int field_idx = 0;
+ int found_any_selected_field = 0;
+
+ while ((c = getc (stream)) != EOF)
+ {
+ if (c == ' ' || c == '\t' || c == '\v')
+ {
+ in_field = 0;
+ }
+
+ else if (c == '\n' || c == '\r' || c == '\f')
+ {
+ if (found_any_selected_field)
+ putchar ('\n');
+ field_idx = 0;
+ in_field = 0;
+ found_any_selected_field = 0;
+ }
+
+ else
+ {
+ if (in_field)
+ {
+ if (select_field)
+ putchar (c);
+ }
+ else
+ {
+ if (print_kth (++field_idx))
+ {
+ if (found_any_selected_field)
+ fwrite (output_delimiter_string, sizeof (char),
+ output_delimiter_length, stdout);
+ putchar (c);
+ found_any_selected_field = 1;
+ select_field = 1;
+ }
+ else
+ select_field = 0;
+ in_field = 1;
+ }
+ }
+ }
+
+ if (found_any_selected_field)
+ putchar ('\n');
+}
+
static void
cut_stream (FILE *stream)
{
- if (operating_mode == byte_mode)
- cut_bytes (stream);
- else
- cut_fields (stream);
+ switch (operating_mode)
+ {
+ case byte_mode:
+ cut_bytes (stream);
+ break;
+ case field_mode:
+ cut_fields (stream);
+ break;
+ case field_mode_ws:
+ cut_fields_ws (stream);
+ break;
+ }
}
/* Process file FILE to standard output.
@@ -610,7 +680,7 @@
delim = '\0';
have_read_stdin = 0;
- while ((optc = getopt_long (argc, argv, "b:c:d:f:ns", longopts,
NULL)) != -1)
+ while ((optc = getopt_long (argc, argv, "b:c:d:f:w:ns", longopts,
NULL)) != -1)
{
switch (optc)
{
@@ -619,23 +689,35 @@
case 'b':
case 'c':
- /* Build the byte list. */
if (operating_mode != undefined_mode)
FATAL_ERROR (_("only one type of list may be specified"));
operating_mode = byte_mode;
+
+ /* Build the byte list. */
if (set_fields (optarg) == 0)
FATAL_ERROR (_("missing list of positions"));
break;
case 'f':
- /* Build the field list. */
if (operating_mode != undefined_mode)
FATAL_ERROR (_("only one type of list may be specified"));
operating_mode = field_mode;
+
+ /* Build the field list. */
if (set_fields (optarg) == 0)
FATAL_ERROR (_("missing list of fields"));
break;
+ case 'w':
+ if (operating_mode != undefined_mode)
+ FATAL_ERROR (_("only one type of list may be specified"));
+ operating_mode = field_mode_ws;
+
+ /* Build the field list. */
+ if (set_fields (optarg) == 0)
+ FATAL_ERROR (_("missing list of fields"));
+ break; /**/
+
case 'd':
/* New delimiter. */
/* Interpret -d '' to mean `use the NUL byte as the delimiter.' */
@@ -685,7 +767,7 @@
if (output_delimiter_string == NULL)
{
static char dummy[2];
- dummy[0] = delim;
+ dummy[0] = (operating_mode == field_mode_ws ? ' ' : delim);
dummy[1] = '\0';
output_delimiter_string = dummy;
output_delimiter_length = 1;
_______________________________________________
Bug-textutils mailing list
address@hidden
http://mail.gnu.org/mailman/listinfo/bug-textutils