bug-coreutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

bug#23335: [PATCH 2/2] unexpand: Reimplemented the unexpand algorithm to


From: Jonathan Buchanan
Subject: bug#23335: [PATCH 2/2] unexpand: Reimplemented the unexpand algorithm to satisfy the standard
Date: Thu, 21 Apr 2016 20:33:36 -0400

* TODO: Removed the section detailing how unexpand did
not satisfy the standard.
* src/unexpand.c: Reimplemented the unexpand algorithm. The program
now satisfies the conditions specified in the old TODO.
---
 TODO           |   4 --
 src/unexpand.c | 176 ++++++++++++++++++++++-----------------------------------
 2 files changed, 69 insertions(+), 111 deletions(-)

diff --git a/TODO b/TODO
index de95e5a..dc1a9e2 100644
--- a/TODO
+++ b/TODO
@@ -67,10 +67,6 @@ lib/strftime.c: Since %N is the only format that we need but 
that
   would expand /%(-_)?\d*N/ to the desired string and then pass the
   resulting string to glibc's strftime.
 
-unexpand: [http://www.opengroup.org/onlinepubs/007908799/xcu/unexpand.html]
-  printf 'x\t \t y\n'|unexpand -t 8,9 should print its input, unmodified.
-  printf 'x\t \t y\n'|unexpand -t 5,8 should print "x\ty\n"
-
 sort: Investigate better sorting algorithms; see Knuth vol. 3.
 
   We tried list merge sort, but it was about 50% slower than the
diff --git a/src/unexpand.c b/src/unexpand.c
index a758756..dcd40de 100644
--- a/src/unexpand.c
+++ b/src/unexpand.c
@@ -303,13 +303,6 @@ unexpand (void)
       /* Input character, or EOF.  */
       int c;
 
-      /* If true, perform translations.  */
-      bool convert = true;
-
-
-      /* The following variables have valid values only when CONVERT
-         is true:  */
-
       /* Column of next input character.  */
       uintmax_t column = 0;
 
@@ -319,127 +312,96 @@ unexpand (void)
       /* Index in TAB_LIST of next tab stop to examine.  */
       size_t tab_index = 0;
 
-      /* If true, the first pending blank came just before a tab stop.  */
-      bool one_blank_before_tab_stop = false;
-
-      /* If true, the previous input character was a blank.  This is
-         initially true, since initial strings of blanks are treated
-         as if the line was preceded by a blank.  */
-      bool prev_blank = true;
-
       /* Number of pending columns of blanks.  */
       size_t pending = 0;
 
-
-      /* Convert a line of text.  */
+      /* If true, the previous input charactar was not a blank.  */
+      bool previous_non_blank = false;
 
       do
         {
           while ((c = getc (fp)) < 0 && (fp = next_file (fp)))
             continue;
 
-          if (convert)
+          if (c < 0)
+            {
+              free (pending_blank);
+              return;
+            }
+
+          /* Update the next tab column */
+          if (next_tab_column <= column)
             {
-              bool blank = !! isblank (c);
+              if (tab_size)
+                next_tab_column = (column + (tab_size - column % tab_size));
+              else
+                if (tab_index < first_free_tab)
+                  next_tab_column = tab_list[tab_index++];
+                else
+                  next_tab_column = -1;
+            }
 
-              if (blank)
+          bool blank = !! isblank (c);
+          if (!blank)
+            {
+              /* If no -a, stop converting once a non-blank is reached.  */
+              if (!convert_entire_line)
+                next_tab_column = -1;
+              if (fwrite (pending_blank, sizeof (char), pending, stdout)
+                  != pending)
+                error (EXIT_FAILURE, errno, _("write error"));
+              pending = 0;
+              if (putchar (c) < 0)
+                error (EXIT_FAILURE, errno, _("write error"));
+              previous_non_blank = true;
+            }
+          else
+            {
+              pending_blank[pending] = c;
+              pending++;
+              /* POSIX says spaces should not precede tabs, so remove spaces
+                 if a tab is found after spaces.  */
+              if (pending_blank[0] != '\t' && c == '\t')
                 {
-                  if (next_tab_column <= column)
+                  pending = 1;
+                  pending_blank[0] = '\t';
+                }
+              if (column + 1 == next_tab_column)
+                {
+                  /* POSIX says single trailing spaces should not be converted
+                     to tabs if they are followed by a non-blank.  */
+                  if (c == ' ' && pending == 1 && previous_non_blank)
                     {
-                      if (tab_size)
-                        next_tab_column =
-                          column + (tab_size - column % tab_size);
+                      previous_non_blank = false;
+                      if ((c = getc (fp)) >= 0)
+                        blank = !! isblank (c);
                       else
-                        while (true)
-                          if (tab_index == first_free_tab)
-                            {
-                              convert = false;
-                              break;
-                            }
-                          else
-                            {
-                              uintmax_t tab = tab_list[tab_index++];
-                              if (column < tab)
-                                {
-                                  next_tab_column = tab;
-                                  break;
-                                }
-                            }
-                    }
-
-                  if (convert)
-                    {
-                      if (next_tab_column < column)
-                        error (EXIT_FAILURE, 0, _("input line is too long"));
-
-                      if (c == '\t')
                         {
-                          column = next_tab_column;
-
-                          if (pending)
-                            pending_blank[0] = '\t';
+                          /* End of file, do not convert to tab.  */
+                          if (putchar (' ') < 0)
+                            error (EXIT_FAILURE, errno, _("write error"));
+                          continue;
                         }
+                      if (!blank)
+                        c = ' ';
                       else
-                        {
-                          column++;
-
-                          if (! (prev_blank && column == next_tab_column))
-                            {
-                              /* It is not yet known whether the pending blanks
-                                 will be replaced by tabs.  */
-                              if (column == next_tab_column)
-                                one_blank_before_tab_stop = true;
-                              pending_blank[pending++] = c;
-                              prev_blank = true;
-                              continue;
-                            }
-
-                          /* Replace the pending blanks by a tab or two.  */
-                          pending_blank[0] = c = '\t';
-                        }
-
-                      /* Discard pending blanks, unless it was a single
-                         blank just before the previous tab stop.  */
-                      pending = one_blank_before_tab_stop;
+                        c = '\t';
+                      if (putchar (c) < 0)
+                        error (EXIT_FAILURE, errno, _("write error"));
+                      column += 1;
+                      pending = 0;
+                      /* Move the position in the file back and continue.  */
+                      fseek (fp, -1, SEEK_CUR);
+                      continue;
                     }
-                }
-              else if (c == '\b')
-                {
-                  /* Go back one column, and force recalculation of the
-                     next tab stop.  */
-                  column -= !!column;
-                  next_tab_column = column;
-                  tab_index -= !!tab_index;
-                }
-              else
-                {
-                  column++;
-                  if (!column)
-                    error (EXIT_FAILURE, 0, _("input line is too long"));
-                }
-
-              if (pending)
-                {
-                  if (pending > 1 && one_blank_before_tab_stop)
-                    pending_blank[0] = '\t';
-                  if (fwrite (pending_blank, 1, pending, stdout) != pending)
-                    error (EXIT_FAILURE, errno, _("write error"));
+                  previous_non_blank = false;
                   pending = 0;
-                  one_blank_before_tab_stop = false;
+                  putchar ('\t');
                 }
-
-              prev_blank = blank;
-              convert &= convert_entire_line || blank;
-            }
-
-          if (c < 0)
-            {
-              free (pending_blank);
-              return;
             }
-
-          if (putchar (c) < 0)
-            error (EXIT_FAILURE, errno, _("write error"));
+          column++;
+          if (!column)
+            error (EXIT_FAILURE, 0, _("input line is too long"));
         }
       while (c != '\n');
     }
-- 
2.8.0






reply via email to

[Prev in Thread] Current Thread [Next in Thread]