bug-gnu-utils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

tail inefficiencies


From: Bruno Haible
Subject: tail inefficiencies
Date: Tue, 15 May 2001 15:22:54 +0200 (CEST)

Here is a patch to fix a FIXME in the tail program: When reading from a
regular file, but not initially positioned at the file's start, "tail"
currently reads the complete contents of the file.

It also fixes a second inefficiency: When the file has fewer lines than
wanted, "tail" reads the first sector twice. Which is unnecessary, because
it also has it in the buffer:

$ echo adijasd > foo.in
$ ltrace tail -2 < foo.in
setlocale(6, "")                                  = "de_DE.UTF-8"
bindtextdomain("textutils", "/packages/gnu/share/locale") = 
"/packages/gnu/share/locale"
textdomain("textutils")                           = "textutils"
__cxa_atexit(0x0804b308, 0, 0, 0x0804c3ec, 0x401368e4) = 0
__errno_location()                                = 0x401375e0
__strtoul_internal(0xbffff6c0, 0xbffff444, 10, 0, 0xbffff6c1) = 2
malloc(52)                                        = 0x080633c8
__fxstat64(3, 0, 0xbffff32c, 0x40017510, 0x0804d0eb) = 0
lseek64(0, 0, 0, 1, 0x0804d0eb)                   = 0
lseek64(0, 0, 0, 2, 0x0804d0eb)                   = 8
lseek64(0, 0, 0, 0, 0)                            = 0
read(0, "adijasd\n", 8)                           = 8
lseek64(0, 0, 0, 0, 0)                            = 0   <---- UNNECESSARY
read(0, "adijasd\n", 8)                           = 8   <---- UNNECESSARY
fwrite("adijasd\n\344h\023@", 1, 8, 0x401346c0adijasd
)   = 8
close(0)                                          = 0
exit(0)                                           = <void>


2001-05-12  Bruno Haible  <address@hidden>

        * src/tail.c (xwrite): Change argument type to 'const char *'. Fix
        check of fwrite return value.
        (file_lines): Remove the file_length argument. Treat the case when
        the file descriptor is not positioned at the file start.
        (tail_lines): For regular files but when not positioned at the file
        start, call file_lines instead of pipe_lines.

*** textutils/src/tail.c        Sun Mar 18 08:53:29 2001
--- textutils-i18n/src/tail.c   Sat May 12 21:29:43 2001
***************
*** 305,315 ****
  }
  
  static void
! xwrite (int fd, char *const buffer, size_t n_bytes)
  {
    assert (fd == STDOUT_FILENO);
    assert (n_bytes >= 0);
!   if (n_bytes > 0 && fwrite (buffer, 1, n_bytes, stdout) == 0)
      error (EXIT_FAILURE, errno, _("write error"));
  }
  
--- 313,323 ----
  }
  
  static void
! xwrite (int fd, const char *const buffer, size_t n_bytes)
  {
    assert (fd == STDOUT_FILENO);
    assert (n_bytes >= 0);
!   if (n_bytes > 0 && fwrite (buffer, 1, n_bytes, stdout) < n_bytes)
      error (EXIT_FAILURE, errno, _("write error"));
  }
  
***************
*** 364,385 ****
     Go backward through the file, reading `BUFSIZ' bytes at a time (except
     probably the first), until we hit the start of the file or have
     read NUMBER newlines.
-    FILE_LENGTH is the length of the file (one more than the offset of the
-    last byte of the file).
     Return 0 if successful, 1 if an error occurred.  */
  
  static int
! file_lines (const char *pretty_filename, int fd, long int n_lines,
!           off_t file_length)
  {
    char buffer[BUFSIZ];
    int bytes_read;
    int i;                      /* Index into `buffer' for scanning.  */
!   off_t pos = file_length;
  
    if (n_lines == 0)
      return 0;
  
    /* Set `bytes_read' to the size of the last, probably partial, buffer;
       0 < `bytes_read' <= `BUFSIZ'.  */
    bytes_read = pos % BUFSIZ;
--- 372,399 ----
     Go backward through the file, reading `BUFSIZ' bytes at a time (except
     probably the first), until we hit the start of the file or have
     read NUMBER newlines.
     Return 0 if successful, 1 if an error occurred.  */
  
  static int
! file_lines (const char *pretty_filename, int fd, long int n_lines)
  {
    char buffer[BUFSIZ];
    int bytes_read;
    int i;                      /* Index into `buffer' for scanning.  */
!   off_t file_start;
!   off_t file_length;
!   off_t pos;
  
    if (n_lines == 0)
      return 0;
  
+   file_start = lseek (fd, (off_t) 0, SEEK_CUR);
+   file_length = lseek (fd, (off_t) 0, SEEK_END);
+   if (file_start >= file_length)
+     return 0;
+ 
+   pos = file_length;
+ 
    /* Set `bytes_read' to the size of the last, probably partial, buffer;
       0 < `bytes_read' <= `BUFSIZ'.  */
    bytes_read = pos % BUFSIZ;
***************
*** 388,393 ****
--- 402,414 ----
    /* Make `pos' a multiple of `BUFSIZ' (0 if the file is short), so that all
       reads will be on block boundaries, which might increase efficiency.  */
    pos -= bytes_read;
+   /* But don't position below file_start.  */
+   if (pos < file_start)
+     {
+       /* Note: pos + bytes_read = file_length > file_start.  */
+       bytes_read -= file_start - pos; /* > 0 */
+       pos = file_start;
+     }
    /* FIXME: check lseek return value  */
    lseek (fd, pos, SEEK_SET);
    bytes_read = safe_read (fd, buffer, bytes_read);
***************
*** 419,437 ****
            }
        }
        /* Not enough newlines in that bufferfull.  */
!       if (pos == 0)
        {
          /* Not enough lines in the file; print the entire file.  */
!         /* FIXME: check lseek return value  */
!         lseek (fd, (off_t) 0, SEEK_SET);
!         dump_remainder (pretty_filename, fd, file_length);
          return 0;
        }
!       pos -= BUFSIZ;
        /* FIXME: check lseek return value  */
        lseek (fd, pos, SEEK_SET);
      }
!   while ((bytes_read = safe_read (fd, buffer, BUFSIZ)) > 0);
  
    if (bytes_read == -1)
      {
--- 440,467 ----
            }
        }
        /* Not enough newlines in that bufferfull.  */
!       if (pos == file_start)
        {
          /* Not enough lines in the file; print the entire file.  */
!         xwrite (STDOUT_FILENO, buffer, bytes_read);
!         dump_remainder (pretty_filename, fd,
!                         file_length - (pos + bytes_read));
          return 0;
        }
!       /* Position file descriptor for next read.  */
!       bytes_read = BUFSIZ;
!       pos -= bytes_read;
!       /* But don't position below file_start.  */
!       if (pos < file_start)
!       {
!         /* Note: pos + bytes_read > file_start.  */
!         bytes_read -= file_start - pos;       /* > 0 */
!         pos = file_start;
!       }
        /* FIXME: check lseek return value  */
        lseek (fd, pos, SEEK_SET);
      }
!   while ((bytes_read = safe_read (fd, buffer, bytes_read)) > 0);
  
    if (bytes_read == -1)
      {
***************
*** 1059,1065 ****
  tail_lines (const char *pretty_filename, int fd, long int n_lines)
  {
    struct stat stats;
-   off_t length;
  
    /* We need binary input, since `tail' relies on `lseek' and byte counts,
       while binary output will preserve the style (Unix/DOS) of text file.  */
--- 1206,1211 ----
***************
*** 1079,1097 ****
      }
    else
      {
!       /* Use file_lines only if FD refers to a regular file with
!          its file pointer positioned at beginning of file.  */
!       /* FIXME: adding the lseek conjunct is a kludge.
!        Once there's a reasonable test suite, fix the true culprit:
!        file_lines.  file_lines shouldn't presume that the input
!        file pointer is initially positioned to beginning of file.  */
!       if (S_ISREG (stats.st_mode)
!         && lseek (fd, (off_t) 0, SEEK_CUR) == (off_t) 0)
!       {
!         length = lseek (fd, (off_t) 0, SEEK_END);
!         if (length != 0 && file_lines (pretty_filename, fd, n_lines, length))
!           return 1;
!       }
        else
        return pipe_lines (pretty_filename, fd, n_lines);
      }
--- 1225,1233 ----
      }
    else
      {
!       /* Use file_lines only if FD refers to a regular file.  */
!       if (S_ISREG (stats.st_mode))
!       return file_lines (pretty_filename, fd, n_lines);
        else
        return pipe_lines (pretty_filename, fd, n_lines);
      }



reply via email to

[Prev in Thread] Current Thread [Next in Thread]