[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
bug#23335: [PATCH 2/2] unexpand: Reimplemented the unexpand algorithm to
From: |
Jonathan Buchanan |
Subject: |
bug#23335: [PATCH 2/2] unexpand: Reimplemented the unexpand algorithm to satisfy the standard |
Date: |
Thu, 21 Apr 2016 20:33:36 -0400 |
* TODO: Removed the section detailing how unexpand did
not satisfy the standard.
* src/unexpand.c: Reimplemented the unexpand algorithm. The program
now satisfies the conditions specified in the old TODO.
---
TODO | 4 --
src/unexpand.c | 176 ++++++++++++++++++++++-----------------------------------
2 files changed, 69 insertions(+), 111 deletions(-)
diff --git a/TODO b/TODO
index de95e5a..dc1a9e2 100644
--- a/TODO
+++ b/TODO
@@ -67,10 +67,6 @@ lib/strftime.c: Since %N is the only format that we need but
that
would expand /%(-_)?\d*N/ to the desired string and then pass the
resulting string to glibc's strftime.
-unexpand: [http://www.opengroup.org/onlinepubs/007908799/xcu/unexpand.html]
- printf 'x\t \t y\n'|unexpand -t 8,9 should print its input, unmodified.
- printf 'x\t \t y\n'|unexpand -t 5,8 should print "x\ty\n"
-
sort: Investigate better sorting algorithms; see Knuth vol. 3.
We tried list merge sort, but it was about 50% slower than the
diff --git a/src/unexpand.c b/src/unexpand.c
index a758756..dcd40de 100644
--- a/src/unexpand.c
+++ b/src/unexpand.c
@@ -303,13 +303,6 @@ unexpand (void)
/* Input character, or EOF. */
int c;
- /* If true, perform translations. */
- bool convert = true;
-
-
- /* The following variables have valid values only when CONVERT
- is true: */
-
/* Column of next input character. */
uintmax_t column = 0;
@@ -319,127 +312,96 @@ unexpand (void)
/* Index in TAB_LIST of next tab stop to examine. */
size_t tab_index = 0;
- /* If true, the first pending blank came just before a tab stop. */
- bool one_blank_before_tab_stop = false;
-
- /* If true, the previous input character was a blank. This is
- initially true, since initial strings of blanks are treated
- as if the line was preceded by a blank. */
- bool prev_blank = true;
-
/* Number of pending columns of blanks. */
size_t pending = 0;
-
- /* Convert a line of text. */
+ /* If true, the previous input charactar was not a blank. */
+ bool previous_non_blank = false;
do
{
while ((c = getc (fp)) < 0 && (fp = next_file (fp)))
continue;
- if (convert)
+ if (c < 0)
+ {
+ free (pending_blank);
+ return;
+ }
+
+ /* Update the next tab column */
+ if (next_tab_column <= column)
{
- bool blank = !! isblank (c);
+ if (tab_size)
+ next_tab_column = (column + (tab_size - column % tab_size));
+ else
+ if (tab_index < first_free_tab)
+ next_tab_column = tab_list[tab_index++];
+ else
+ next_tab_column = -1;
+ }
- if (blank)
+ bool blank = !! isblank (c);
+ if (!blank)
+ {
+ /* If no -a, stop converting once a non-blank is reached. */
+ if (!convert_entire_line)
+ next_tab_column = -1;
+ if (fwrite (pending_blank, sizeof (char), pending, stdout)
+ != pending)
+ error (EXIT_FAILURE, errno, _("write error"));
+ pending = 0;
+ if (putchar (c) < 0)
+ error (EXIT_FAILURE, errno, _("write error"));
+ previous_non_blank = true;
+ }
+ else
+ {
+ pending_blank[pending] = c;
+ pending++;
+ /* POSIX says spaces should not precede tabs, so remove spaces
+ if a tab is found after spaces. */
+ if (pending_blank[0] != '\t' && c == '\t')
{
- if (next_tab_column <= column)
+ pending = 1;
+ pending_blank[0] = '\t';
+ }
+ if (column + 1 == next_tab_column)
+ {
+ /* POSIX says single trailing spaces should not be converted
+ to tabs if they are followed by a non-blank. */
+ if (c == ' ' && pending == 1 && previous_non_blank)
{
- if (tab_size)
- next_tab_column =
- column + (tab_size - column % tab_size);
+ previous_non_blank = false;
+ if ((c = getc (fp)) >= 0)
+ blank = !! isblank (c);
else
- while (true)
- if (tab_index == first_free_tab)
- {
- convert = false;
- break;
- }
- else
- {
- uintmax_t tab = tab_list[tab_index++];
- if (column < tab)
- {
- next_tab_column = tab;
- break;
- }
- }
- }
-
- if (convert)
- {
- if (next_tab_column < column)
- error (EXIT_FAILURE, 0, _("input line is too long"));
-
- if (c == '\t')
{
- column = next_tab_column;
-
- if (pending)
- pending_blank[0] = '\t';
+ /* End of file, do not convert to tab. */
+ if (putchar (' ') < 0)
+ error (EXIT_FAILURE, errno, _("write error"));
+ continue;
}
+ if (!blank)
+ c = ' ';
else
- {
- column++;
-
- if (! (prev_blank && column == next_tab_column))
- {
- /* It is not yet known whether the pending blanks
- will be replaced by tabs. */
- if (column == next_tab_column)
- one_blank_before_tab_stop = true;
- pending_blank[pending++] = c;
- prev_blank = true;
- continue;
- }
-
- /* Replace the pending blanks by a tab or two. */
- pending_blank[0] = c = '\t';
- }
-
- /* Discard pending blanks, unless it was a single
- blank just before the previous tab stop. */
- pending = one_blank_before_tab_stop;
+ c = '\t';
+ if (putchar (c) < 0)
+ error (EXIT_FAILURE, errno, _("write error"));
+ column += 1;
+ pending = 0;
+ /* Move the position in the file back and continue. */
+ fseek (fp, -1, SEEK_CUR);
+ continue;
}
- }
- else if (c == '\b')
- {
- /* Go back one column, and force recalculation of the
- next tab stop. */
- column -= !!column;
- next_tab_column = column;
- tab_index -= !!tab_index;
- }
- else
- {
- column++;
- if (!column)
- error (EXIT_FAILURE, 0, _("input line is too long"));
- }
-
- if (pending)
- {
- if (pending > 1 && one_blank_before_tab_stop)
- pending_blank[0] = '\t';
- if (fwrite (pending_blank, 1, pending, stdout) != pending)
- error (EXIT_FAILURE, errno, _("write error"));
+ previous_non_blank = false;
pending = 0;
- one_blank_before_tab_stop = false;
+ putchar ('\t');
}
-
- prev_blank = blank;
- convert &= convert_entire_line || blank;
- }
-
- if (c < 0)
- {
- free (pending_blank);
- return;
}
-
- if (putchar (c) < 0)
- error (EXIT_FAILURE, errno, _("write error"));
+ column++;
+ if (!column)
+ error (EXIT_FAILURE, 0, _("input line is too long"));
}
while (c != '\n');
}
--
2.8.0