--- lib/argv-iter.c 2013-12-04 16:53:33.000000000 +0200 +++ lib/argv-iter.c 2014-05-20 22:59:57.000000000 +0300 @@ -27,6 +27,7 @@ /* Test FP to determine whether in read-mode or argv-mode. */ /* file-mode: fp records position */ FILE *fp; + int delim; size_t item_idx; char *tok; size_t buf_len; @@ -43,6 +44,7 @@ if (!ai) return NULL; ai->fp = NULL; + ai->delim = 0; ai->arg_list = argv; ai->p = argv; return ai; @@ -53,10 +55,19 @@ struct argv_iterator * argv_iter_init_stream (FILE *fp) { + return argv_iter_init_stream_delim (fp, '\0'); +} + +/* Initialize to read from the stream, FP. + The input is expected to contain a list of DELIM-delimited tokens. */ +struct argv_iterator * +argv_iter_init_stream_delim (FILE *fp, int delim) +{ struct argv_iterator *ai = malloc (sizeof *ai); if (!ai) return NULL; ai->fp = fp; + ai->delim = delim; ai->tok = NULL; ai->buf_len = 0; @@ -70,12 +81,23 @@ { if (ai->fp) { - ssize_t len = getdelim (&ai->tok, &ai->buf_len, '\0', ai->fp); + ssize_t len = getdelim (&ai->tok, &ai->buf_len, ai->delim, ai->fp); if (len < 0) { *err = feof (ai->fp) ? AI_ERR_EOF : AI_ERR_READ; return NULL; } + else if (len > 0) + { + if (ai->delim) + { + char *p = &ai->tok[len - 1]; + if (*p == ai->delim) + *p = 0; + } + } + else + abort (); *err = AI_ERR_OK; ai->item_idx++; --- lib/argv-iter.h 2013-12-04 16:53:33.000000000 +0200 +++ lib/argv-iter.h 2014-05-20 21:17:42.000000000 +0300 @@ -34,6 +34,8 @@ _GL_ARG_NONNULL ((1)); struct argv_iterator *argv_iter_init_stream (FILE *fp) _GL_ARG_NONNULL ((1)); +struct argv_iterator *argv_iter_init_stream_delim (FILE *fp, int delim) + _GL_ARG_NONNULL ((1)); char *argv_iter (struct argv_iterator *, enum argv_iter_err *) _GL_ARG_NONNULL ((1, 2)); size_t argv_iter_n_args (struct argv_iterator const *) --- lib/readtokens0.c 2013-12-04 16:53:33.000000000 +0200 +++ lib/readtokens0.c 2014-05-20 23:22:41.000000000 +0300 @@ -65,7 +65,16 @@ bool readtokens0 (FILE *in, struct Tokens *t) { + return readtokens0_delim (in, t, '\0'); +} +/* Read DELIM-separated tokens from stream IN into T until EOF or error. + The final DELIM is optional. Always append a NULL pointer to the + resulting list of token pointers, but that pointer isn't counted + via t->n_tok. Return true if successful. */ +bool +readtokens0_delim (FILE *in, struct Tokens *t, int delim) +{ while (1) { int c = fgetc (in); @@ -84,6 +93,8 @@ break; } + if (c == delim) + c = '\0'; obstack_1grow (&t->o_data, c); if (c == '\0') save_token (t); --- lib/readtokens0.h 2013-12-04 16:53:33.000000000 +0200 +++ lib/readtokens0.h 2014-05-20 23:23:02.000000000 +0300 @@ -38,5 +38,6 @@ void readtokens0_init (struct Tokens *t); void readtokens0_free (struct Tokens *t); bool readtokens0 (FILE *in, struct Tokens *t); +bool readtokens0_delim (FILE *in, struct Tokens *t, int delim); #endif --- src/stat.c 2013-12-13 16:12:46.000000000 +0200 +++ src/stat.c 2014-05-22 08:50:42.000000000 +0300 @@ -55,6 +55,7 @@ # include #endif #include +#include #include "system.h" @@ -72,6 +73,17 @@ #include "strftime.h" #include "find-mount-point.h" #include "xvasprintf.h" +#include "readtokens0.h" +#include "argv-iter.h" +#include "physmem.h" +#include "argmatch.h" +#include "quotearg.h" +#include "quote.h" +#include "md5.h" +#include "sha1.h" +#include "sha256.h" +#include "sha512.h" +#include "fadvise.h" #if USE_STATVFS # define STRUCT_STATVFS struct statvfs @@ -174,15 +186,21 @@ enum { - PRINTF_OPTION = CHAR_MAX + 1 + PRINTF_OPTION = CHAR_MAX + 1, + QUOTING_STYLE_OPTION, + FILES_FROM_OPTION, + DIGEST_TYPE_OPTION, }; static struct option const long_options[] = { {"dereference", no_argument, NULL, 'L'}, + {"digest-type", required_argument, NULL, DIGEST_TYPE_OPTION}, {"file-system", no_argument, NULL, 'f'}, + {"files-from", required_argument, NULL, FILES_FROM_OPTION}, {"format", required_argument, NULL, 'c'}, {"printf", required_argument, NULL, PRINTF_OPTION}, + {"quoting-style", required_argument, NULL, QUOTING_STYLE_OPTION}, {"terse", no_argument, NULL, 't'}, {GETOPT_HELP_OPTION_DECL}, {GETOPT_VERSION_OPTION_DECL}, @@ -204,6 +222,118 @@ static char const *decimal_point; static size_t decimal_point_len; +/* Quoting style for file name output. */ +static enum quoting_style filename_quoting_style = literal_quoting_style; + +enum digest_type + { + md5_digest_type, + sha1_digest_type, + sha224_digest_type, + sha256_digest_type, + sha384_digest_type, + sha512_digest_type, + }; + +struct digest_desc +{ + const char *type; + const char *reference; + int (*stream) (FILE*, void*); + void* (*buffer) (const char*, size_t, void*); + size_t bits; + size_t align; +}; + +static const struct digest_desc digest_descs[] = +{ + /* md5 digest */ + { + type: "MD5", + reference: "RFC 1321", + stream: md5_stream, + buffer: md5_buffer, + bits: 128, + align: 4 + }, + + /* sha1 digest */ + { + type: "SHA1", + reference: "FIPS-180-1", + stream: sha1_stream, + buffer: sha1_buffer, + bits: 160, + align: 4 + }, + + /* sha224 digest */ + { + type: "SHA224", + reference: "RFC 3874", + stream: sha224_stream, + buffer: sha224_buffer, + bits: 224, + align: 4 + }, + + /* sha256 digest */ + { + type: "SHA256", + reference: "FIPS-180-2", + stream: sha256_stream, + buffer: sha256_buffer, + bits: 256, + align: 4 + }, + + /* sha384 digest */ + { + type: "SHA384", + reference: "FIPS-180-2", + stream: sha384_stream, + buffer: sha384_buffer, + bits: 384, + align: 8 + }, + + /* sha512 digest */ + { + type: "SHA512", + reference: "FIPS-180-2", + stream: sha512_stream, + buffer: sha512_buffer, + bits: 512, + align: 8 + }, +}; + +#define MAX_DIGEST_ALIGN 8 +#define MAX_DIGEST_BITS 512 +#define MAX_DIGEST_BYTES (MAX_DIGEST_BITS / CHAR_BIT) + +static char const* const digest_type_args[] = { + "md5", + "sha1", + "sha224", + "sha256", + "sha384", + "sha512", + 0 +}; +static const enum digest_type digest_type_vals[] = { + md5_digest_type, + sha1_digest_type, + sha224_digest_type, + sha256_digest_type, + sha384_digest_type, + sha512_digest_type, +}; +ARGMATCH_VERIFY (digest_type_args, digest_type_vals); + +/* Type of computed digest sums. */ +static enum digest_type file_digest_type = sha1_digest_type; + /* Return the type of the specified file system. Some systems have statfvs.f_basetype[FSTYPSZ] (AIX, HP-UX, and Solaris). Others have statvfs.f_fstypename[_VFS_NAMELEN] (NetBSD 3.0). @@ -592,6 +722,23 @@ return printf (pformat, -0.25); } +/* Output string in hexadecimal, using printf's %02x format + for each char in it. */ +static void +out_hex_string (char *pformat, size_t prefix_len, char const *arg, + size_t arg_size) +{ + char const *end = arg + arg_size; + char *str = alloca (2 * arg_size + 1); + char *ptr = str; + + for (; arg < end; arg++, ptr += 2) + sprintf(ptr, "%02x", (unsigned char) *arg); + *ptr = '\0'; + + out_string (pformat, prefix_len, str); +} + /* Output the number of seconds since the Epoch, using a format that acts like printf's %f format. */ static void @@ -909,6 +1056,112 @@ return z; } +/* An interface to the function STREAM. + Operate on regular file FILENAME. + + Put the checksum in *BIN_RESULT, which must be properly aligned. + Return true if successful. */ +static bool +digest_regular_file (int (*stream) (FILE*, void*), const char *filename, + unsigned char *bin_result) +{ + FILE *fp; + int err; + + fp = fopen (filename, "rb"); + if (fp == NULL) + { + error (0, errno, "%s", filename); + return false; + } + + fadvise (fp, FADVISE_SEQUENTIAL); + + err = stream (fp, bin_result); + if (err) + { + error (0, errno, "%s", quote (filename)); + fclose (fp); + return false; + } + + if (fclose (fp) != 0) + { + error (0, errno, "%s", quote (filename)); + return false; + } + + return true; +} + +/* An interface to the function BUFFER. + Operate on symlink file FILENAME of size LINK_SIZE. + + Put the checksum in *BIN_RESULT, which must be properly aligned. + Return true if successful. */ +static bool +digest_symlink_file (void* (*buffer) (const char*, size_t, void*), + const char *filename, size_t link_size, + unsigned char *bin_result) +{ + char *link_name = areadlink_with_size (filename, link_size); + if (link_name == NULL) + { + error (0, errno, _("cannot read symbolic link %s"), + quote (filename)); + return false; + } + + (void) buffer (link_name, link_size, bin_result); + + free (link_name); + + return true; +} + +/* Print the digest sum. Return zero upon success, nonzero upon failure. */ +static bool ATTRIBUTE_WARN_UNUSED_RESULT +out_file_digest (char *pformat, size_t prefix_len, char const *filename, + struct stat *statbuf) +{ + static unsigned char raw_buffer[MAX_DIGEST_BYTES + MAX_DIGEST_ALIGN]; + const struct digest_desc *desc; + unsigned char *bin_buffer; + size_t n_bin_buffer; + bool digest = false; + bool fail = false; + + desc = &digest_descs[file_digest_type]; + + assert (desc->bits % CHAR_BIT == 0); + assert (desc->bits / CHAR_BIT <= MAX_DIGEST_BYTES); + assert (desc->align <= MAX_DIGEST_ALIGN); + + /* The number of bytes required by the digest sum. */ + n_bin_buffer = desc->bits / CHAR_BIT; + + /* Make sure bin_buffer is properly aligned. */ + bin_buffer = ptr_align (raw_buffer, desc->align); + + assert (bin_buffer >= raw_buffer); + assert (bin_buffer + n_bin_buffer <= raw_buffer + sizeof (raw_buffer)); + + /* FIXME: maybe we shouldn't compute the digest sum + from one call to the other when filenames are identical. */ + if ((digest = S_ISREG (statbuf->st_mode))) + fail = !digest_regular_file (desc->stream, filename, bin_buffer); + else if ((digest = S_ISLNK (statbuf->st_mode))) + fail = !digest_symlink_file (desc->buffer, filename, statbuf->st_size, + bin_buffer); + + if (digest && !fail) + out_hex_string (pformat, prefix_len, bin_buffer, n_bin_buffer); + else + out_string (pformat, prefix_len, fail ? "?" : "-"); + + return fail; +} + /* Print stat info. Return zero upon success, nonzero upon failure. */ static bool print_stat (char *pformat, size_t prefix_len, unsigned int m, @@ -922,7 +1175,8 @@ switch (m) { case 'n': - out_string (pformat, prefix_len, filename); + out_string (pformat, prefix_len, quotearg_style (filename_quoting_style, + filename)); break; case 'N': out_string (pformat, prefix_len, quote (filename)); @@ -992,6 +1246,9 @@ case 's': out_int (pformat, prefix_len, statbuf->st_size); break; + case 'S': + fail |= out_file_digest (pformat, prefix_len, filename, statbuf); + break; case 'B': out_uint (pformat, prefix_len, ST_NBLOCKSIZE); break; @@ -1357,7 +1614,11 @@ emit_try_help (); else { - printf (_("Usage: %s [OPTION]... FILE...\n"), program_name); + printf (_("\ +Usage: %s [OPTION]... [FILE]...\n\ + or: %s [OPTION]... --files-from=F\n\ +"), program_name, program_name); + fputs (_("\ Display file or file system status.\n\ "), stdout); @@ -1366,14 +1627,28 @@ fputs (_("\ -L, --dereference follow links\n\ + --digest-type=WORD\n\ + when computing file content sums use specified\n\ + message digest algorithm: md5, sha1, sha224, sha256,\n\ + sha384 or sha512; when the option is not specified\n\ + compute sha1 digests\n\ -f, --file-system display file system status instead of file status\n\ "), stdout); + fputs (_("\ + --files-from=F display status of files specified by names in file F;\n\ + If F is - then read names from standard input\n\ +"), stdout); fputs (_("\ -c --format=FORMAT use the specified FORMAT instead of the default;\n\ output a newline after each use of FORMAT\n\ --printf=FORMAT like --format, but interpret backslash escapes,\n\ and do not output a mandatory trailing newline;\n\ if you want a newline, include \\n in FORMAT\n\ + --quoting-style=WORD\n\ + use quoting style WORD for file names:\n\ + literal, locale, shell, shell-always, c, escape\n\ + c-maybe, clocale;\n\ + when no option is given, use literal style\n\ -t, --terse print the information in terse form\n\ "), stdout); fputs (HELP_OPTION_DESCRIPTION, stdout); @@ -1404,6 +1679,7 @@ %N quoted file name with dereference if symbolic link\n\ %o optimal I/O transfer size hint\n\ %s total size, in bytes\n\ + %S file content digest sum\n\ %t major device type in hex, for character/block device special files\n\ %T minor device type in hex, for character/block device special files\n\ "), stdout); @@ -1455,6 +1731,9 @@ char *format = NULL; char *format2; bool ok = true; + FILE *stream = NULL; + char *files_from = NULL; + struct Tokens tok; initialize_main (&argc, &argv); set_program_name (argv[0]); @@ -1496,6 +1775,22 @@ terse = true; break; + case DIGEST_TYPE_OPTION: + file_digest_type = XARGMATCH ("--digest-type", optarg, + digest_type_args, + digest_type_vals); + break; + + case FILES_FROM_OPTION: + files_from = optarg; + break; + + case QUOTING_STYLE_OPTION: + filename_quoting_style = XARGMATCH ("--quoting-style", optarg, + quoting_style_args, + quoting_style_vals); + break; + case_GETOPT_HELP_CHAR; case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); @@ -1505,12 +1800,6 @@ } } - if (argc == optind) - { - error (0, 0, _("missing operand")); - usage (EXIT_FAILURE); - } - if (format) format2 = format; else @@ -1519,10 +1808,113 @@ format2 = default_format (fs, terse, true); } - for (i = optind; i < argc; i++) - ok &= (fs - ? do_statfs (argv[i], format) - : do_stat (argv[i], format, format2)); + bool read_tokens = false; + struct argv_iterator *ai; + if (files_from) + { + /* When using --files-from=F, you may not specify any files + on the command-line. */ + if (optind < argc) + { + error (0, 0, _("extra operand %s"), quote (argv[optind])); + fprintf (stderr, "%s\n", + _("file operands cannot be combined with --files-from")); + usage (EXIT_FAILURE); + } + + if (STREQ (files_from, "-")) + stream = stdin; + else + { + stream = fopen (files_from, "r"); + if (stream == NULL) + error (EXIT_FAILURE, errno, _("cannot open %s for reading"), + quote (files_from)); + } + + /* Read the file list into RAM if we can detect its size and that + size is reasonable. Otherwise, we'll read a name at a time. */ + struct stat st; + if (fstat (fileno (stream), &st) == 0 + && S_ISREG (st.st_mode) + && st.st_size <= MIN (10 * 1024 * 1024, physmem_available () / 2)) + { + read_tokens = true; + readtokens0_init (&tok); + if (! readtokens0_delim (stream, &tok, '\n')) + error (EXIT_FAILURE, 0, _("cannot read file names from %s"), + quote (files_from)); + ai = argv_iter_init_argv (tok.tok); + } + else + { + ai = argv_iter_init_stream_delim (stream, '\n'); + } + } + else if (argc > optind) + { + ai = argv_iter_init_argv (argv + optind); + } + else + { + error (0, 0, _("missing operand")); + usage (EXIT_FAILURE); + } + + if (!ai) + xalloc_die (); + + while (true) + { + enum argv_iter_err ai_err; + char *file_name = argv_iter (ai, &ai_err); + if (!file_name) + { + switch (ai_err) + { + case AI_ERR_EOF: + goto argv_iter_done; + case AI_ERR_READ: + error (0, errno, _("%s: read error"), + quotearg_colon (files_from)); + ok = false; + goto argv_iter_done; + case AI_ERR_MEM: + xalloc_die (); + default: + assert (!"unexpected error code from argv_iter"); + } + } + + /* Silently ignore input empty lines when + given --files-from=FILE. */ + if (files_from && !file_name[0]) + continue; + + if (files_from && STREQ (files_from, "-") && STREQ (file_name, "-")) + { + /* Give a better diagnostic in an unusual case: + printf - | stat --files-from=- */ + error (0, 0, _("when reading file names from stdin, " + "no file name of %s allowed"), + quote (file_name)); + ok = false; + continue; + } + + ok &= (fs + ? do_statfs (file_name, format) + : do_stat (file_name, format, format2)); + } + argv_iter_done: + + if (read_tokens) + readtokens0_free (&tok); + + argv_iter_free (ai); + + if (files_from && (ferror (stream) || fclose (stream) != 0)) + error (EXIT_FAILURE, 0, _("error reading %s"), quote (files_from)); exit (ok ? EXIT_SUCCESS : EXIT_FAILURE); }