bug-coreutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[patch] New option for du: --only-here


From: Vebjorn Ljosa
Subject: [patch] New option for du: --only-here
Date: Sun, 1 Oct 2006 12:32:08 -0700
User-agent: Mutt/1.5.9i

Hi

I have implemented a new option "--only-here" in du:

$ ./du --help|grep only-here
      --only-here       only include files for which all hard links are found

When this option is given, du only includes a file if it finds all
links to it.  This is useful when many of the files in a directory
structure have hard links other places as well, and you would like to
know how much space will be freed up if you delete the directory
structure.  (This is a common problem for users of disk-based backup
systems, where files that have not been modified since yesterday are
hard linked into today's backup directory.)

The unified diff against coreutils-6.3 is included below.  (It can
also be applied to coreutils-6.2.)  I have tried to adhere to the
existing style conventions.  I hope this can make its way into the
next version of coreutils.  Please let me know if you have questions.

Thanks,
Vebjorn


--- du.c~       2006-09-02 23:38:53.000000000 -0700
+++ du.c        2006-10-01 12:13:28.833673545 -0700
@@ -71,6 +71,8 @@
 {
   ino_t st_ino;
   dev_t st_dev;
+  nlink_t st_nlink;
+  nlink_t links_seen;
 };
 
 /* A set of dev/ino pairs.  */
@@ -157,6 +159,9 @@
 /* If true, print most recently modified date, using the specified format.  */
 static bool opt_time = false;
 
+/* If non-zero, only count files for which we find all the links. */
+static int opt_only_here;
+
 /* Type of time to display. controlled by --time.  */
 
 enum time_type
@@ -204,6 +209,7 @@
   /* FIXME: --megabytes is deprecated (but not -m); remove in late 2006 */
   MEGABYTES_LONG_OPTION,
 
+  ONLY_HERE_OPTION,
   TIME_OPTION,
   TIME_STYLE_OPTION
 };
@@ -228,6 +234,7 @@
   {"megabytes", no_argument, NULL, MEGABYTES_LONG_OPTION},
   {"no-dereference", no_argument, NULL, 'P'},
   {"one-file-system", no_argument, NULL, 'x'},
+  {"only-here", no_argument, NULL, ONLY_HERE_OPTION},
   {"separate-dirs", no_argument, NULL, 'S'},
   {"summarize", no_argument, NULL, 's'},
   {"total", no_argument, NULL, 'c'},
@@ -333,6 +340,7 @@
       --time-style=STYLE show times using style STYLE:\n\
                           full-iso, long-iso, iso, +FORMAT\n\
                           FORMAT is interpreted like `date'\n\
+      --only-here       only include files for which all hard links are 
found\n\
 "), stdout);
       fputs (HELP_OPTION_DESCRIPTION, stdout);
       fputs (VERSION_OPTION_DESCRIPTION, stdout);
@@ -366,10 +374,13 @@
 }
 
 /* Try to insert the INO/DEV pair into the global table, HTAB.
-   Return true if the pair is successfully inserted,
-   false if the pair is already in the table.  */
-static bool
-hash_ins (ino_t ino, dev_t dev)
+   If the pair is successfully inserted, return zero.
+   Upon failed memory allocation, die.
+   If the pair is already in the table, return the number of links
+   that have been seen so far.  (This count is one after the insert,
+   and is increased by one each time hash_ins is called.)  */
+static int
+hash_ins (ino_t ino, dev_t dev, nlink_t nlink)
 {
   struct entry *ent;
   struct entry *ent_from_table;
@@ -377,6 +388,8 @@
   ent = xmalloc (sizeof *ent);
   ent->st_ino = ino;
   ent->st_dev = dev;
+  ent->st_nlink = nlink;
+  ent->links_seen = 1;
 
   ent_from_table = hash_insert (htab, ent);
   if (ent_from_table == NULL)
@@ -388,13 +401,13 @@
   if (ent_from_table == ent)
     {
       /* Insertion succeeded.  */
-      return true;
+      return 0;
     }
 
   /* That pair is already in the table, so ENT was not inserted.  Free it.  */
   free (ent);
 
-  return false;
+  return ent_from_table->links_seen++;
 }
 
 /* Initialize the hash table.  */
@@ -517,13 +530,19 @@
   if (ent->fts_info == FTS_D || skip)
     return ok;
 
-  /* If the file is being excluded or if it has already been counted
-     via a hard link, then don't let it contribute to the sums.  */
+  /* Don't let the file contribute to the sums if it is (1) being 
+     excluded, (2) has already been counted via a hard link, or 
+     (3) --only-here is specified and this is a directory for which
+     we have not yet seen all the links.  */
   if (skip
       || (!opt_count_all
+          && !opt_only_here
          && ! S_ISDIR (sb->st_mode)
          && 1 < sb->st_nlink
-         && ! hash_ins (sb->st_ino, sb->st_dev)))
+         && hash_ins (sb->st_ino, sb->st_dev, sb->st_nlink))
+      || (opt_only_here
+          && ! S_ISDIR (sb->st_mode)
+          && hash_ins (sb->st_ino, sb->st_dev, sb->st_nlink) + 1 < 
sb->st_nlink))
     {
       /* Note that we must not simply return here.
         We still have to update prev_level and maybe propagate
@@ -533,10 +552,13 @@
     }
   else
     {
+      /* If both --count-all and --only-here are specified, count the size
+         sb->st_nlink times because we skipped previous links. */
       duinfo_set (&dui,
                  (apparent_size
                   ? sb->st_size
-                  : (uintmax_t) ST_NBLOCKS (*sb) * ST_NBLOCKSIZE),
+                  : (uintmax_t) ST_NBLOCKS (*sb) * ST_NBLOCKSIZE) *
+                  (opt_count_all && opt_only_here ? sb->st_nlink : 1),
                  (time_type == time_mtime ? get_stat_mtime (sb)
                   : time_type == time_atime ? get_stat_atime (sb)
                   : get_stat_ctime (sb)));
@@ -838,6 +860,10 @@
          add_exclude (exclude, optarg, EXCLUDE_WILDCARDS);
          break;
 
+        case ONLY_HERE_OPTION:
+          opt_only_here = 1;
+          break;
+
        case TIME_OPTION:
          opt_time = true;
          time_type =




reply via email to

[Prev in Thread] Current Thread [Next in Thread]