--- Begin Message ---
Subject: |
[PATCH] tail: fix following /proc and /sys files when using a 64K page size |
Date: |
Mon, 27 Nov 2023 09:24:33 -0700 |
* src/tail.c (file_lines): Use fstat() to determine a file's block
size and dynamically allocate a buffer of that size for traversing
backwards.
---
src/tail.c | 36 ++++++++++++++++++++++++++----------
1 file changed, 26 insertions(+), 10 deletions(-)
diff --git a/src/tail.c b/src/tail.c
index c45f3b65a..437a38204 100644
--- a/src/tail.c
+++ b/src/tail.c
@@ -518,18 +518,30 @@ static bool
file_lines (char const *pretty_filename, int fd, uintmax_t n_lines,
off_t start_pos, off_t end_pos, uintmax_t *read_pos)
{
- char buffer[BUFSIZ];
+ char *buffer;
size_t bytes_read;
+ blksize_t bufsize;
off_t pos = end_pos;
+ bool ok = true;
+ struct stat stats;
if (n_lines == 0)
return true;
+ if (fstat (fd, &stats) != 0)
+ {
+ error (0, errno, _("cannot fstat %s"), quoteaf (pretty_filename));
+ return false;
+ }
+
+ bufsize = ST_BLKSIZE (stats);
+ buffer = xmalloc (bufsize);
+
/* Set 'bytes_read' to the size of the last, probably partial, buffer;
0 < 'bytes_read' <= 'BUFSIZ'. */
- bytes_read = (pos - start_pos) % BUFSIZ;
+ bytes_read = (pos - start_pos) % bufsize;
if (bytes_read == 0)
- bytes_read = BUFSIZ;
+ bytes_read = bufsize;
/* Make 'pos' a multiple of 'BUFSIZ' (0 if the file is short), so that all
reads will be on block boundaries, which might increase efficiency. */
pos -= bytes_read;
@@ -538,7 +550,8 @@ file_lines (char const *pretty_filename, int fd, uintmax_t
n_lines,
if (bytes_read == SAFE_READ_ERROR)
{
error (0, errno, _("error reading %s"), quoteaf (pretty_filename));
- return false;
+ ok = false;
+ goto free_buffer;
}
*read_pos = pos + bytes_read;
@@ -565,7 +578,7 @@ file_lines (char const *pretty_filename, int fd, uintmax_t
n_lines,
xwrite_stdout (nl + 1, bytes_read - (n + 1));
*read_pos += dump_remainder (false, pretty_filename, fd,
end_pos - (pos + bytes_read));
- return true;
+ goto free_buffer;
}
}
@@ -577,23 +590,26 @@ file_lines (char const *pretty_filename, int fd,
uintmax_t n_lines,
xlseek (fd, start_pos, SEEK_SET, pretty_filename);
*read_pos = start_pos + dump_remainder (false, pretty_filename, fd,
end_pos);
- return true;
+ goto free_buffer;
}
- pos -= BUFSIZ;
+ pos -= bufsize;
xlseek (fd, pos, SEEK_SET, pretty_filename);
- bytes_read = safe_read (fd, buffer, BUFSIZ);
+ bytes_read = safe_read (fd, buffer, bufsize);
if (bytes_read == SAFE_READ_ERROR)
{
error (0, errno, _("error reading %s"), quoteaf (pretty_filename));
- return false;
+ ok = false;
+ goto free_buffer;
}
*read_pos = pos + bytes_read;
}
while (bytes_read > 0);
- return true;
+free_buffer:
+ free (buffer);
+ return ok;
}
/* Print the last N_LINES lines from the end of the standard input,
--
2.42.0
--- End Message ---
--- Begin Message ---
Subject: |
Re: bug#67490: [PATCH v2] tail: fix tailing sysfs files on large page kernels |
Date: |
Fri, 1 Dec 2023 23:23:19 +0000 |
User-agent: |
Mozilla Thunderbird |
On 01/12/2023 01:54, Paul Eggert wrote:
On 11/30/23 12:11, Pádraig Brady wrote:
Though that will generally give 128K, which is good when processing all
of a file,
but perhaps overkill when processing just the last part of a file.
The 128 KiB number was computed as being better for apps like 'sed' that
typically read all or most of the file. 'tail' sometimes behaves that
way (e.g., 'tail -c +10') and so 'tail' should use 128 KiB in those
cases. The simplest way to do that is for 'tail' to use 128 KiB all the
time - that would cost little for uses like plain 'tail' and it could be
a significant win for uses like 'tail -c +10'.
Yes I agree we should use io_blksize() in other routines in tail
where we may dump lots of a file. However in this (most common) case
the routine is dealing with the end of a regular file,
so it's probably best to somewhat minimize the amount of data read,
and more directly check the page_size which is issue at hand.
I've pushed the fix at https://github.com/coreutils/coreutils/commit/73d119f4f
where the adjustment (which also corresponds to what we do in wc) is:
if (sb->st_size % page_size == 0)
bufsize = MAX (BUFSIZ, page_size);
I'll follow up with another patch to address the performance aspect,
which uses io_blksize() where appropriate.
(As an aside, the 128 KiB number was computed in 2014. These days 256
KiB might be better if someone could take the time to measure....)
I periodically check with the documented script,
but I should update the comment when I do that.
I'll update the date in the comment now at least.
I quickly tested a few systems here,
which suggests 256KiB _may_ be a more appropriate default now.
More testing required.
Marking this as done.
thanks,
Pádraig
--- End Message ---