[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: Du feature request - group reporting
From: |
Daniel Gall |
Subject: |
Re: Du feature request - group reporting |
Date: |
Fri, 2 Feb 2018 22:27:51 -0500 |
Sorry for the delay; life intervened. In addition to the feature add,
I found a place where du was calling xcalloc and did not check that
the returned pointer was not NULL. I added a check. I didn't add a
test because 1. I'm lazy. and 2. it seems more relevant, yet super
hard to abstractly create files in different groups and then be able
to delete them as a build test not knowing whether the builder even
has membership in more than one group. I can probably make a less
useful test for how it works with files in a single group. However,
let's give this a shot and hopefully get some feedback before I invest
the time into a test that isn't needed if the feature is rejected:
>From 544c581654cd0dcfb363215801245a7c2dd3fcd3 Mon Sep 17 00:00:00 2001
From: Daniel Gall <address@hidden>
Date: Fri, 2 Feb 2018 17:18:44 -0500
Subject: [PATCH] added du group reporting feature and fixed a bug where du
allocated memory and did not check that the target pointer was not NULL after
the allocation call.
---
NEWS | 4 ++
doc/coreutils.texi | 5 ++
src/du.c | 196 ++++++++++++++++++++++++++++++++++++++++++++++++++---
3 files changed, 196 insertions(+), 9 deletions(-)
diff --git a/NEWS b/NEWS
index 8a9e09e..877c594 100644
--- a/NEWS
+++ b/NEWS
@@ -32,6 +32,8 @@ GNU coreutils NEWS
-*- outline -*-
df no longer hangs when given a fifo argument.
[bug introduced in coreutils-7.3]
+ du no longer allocates memory without checking whether the
allocation call succeeded.
+
ptx -S no longer infloops for a pattern which returns zero-length matches.
[the bug dates back to the initial implementation]
@@ -56,6 +58,8 @@ GNU coreutils NEWS
-*- outline -*-
timeout now supports the --verbose option to diagnose forced termination.
+ du now supports the -g option for group reporting
+
** Improvements
dd now supports iflag=direct with arbitrary sized files on all file systems.
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index cdde136..d220012 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -11910,6 +11910,11 @@ is at level 0, so @code{du --max-depth=0} is
equivalent to @code{du -s}.
@c --files0-from=FILE
@filesZeroFromOption{du,, with the @option{--total} (@option{-c}) option}
+@item -g
+@opindex -g
+@cindex group reporting
+Show group subtotals for each item reported on.
+
@item -H
@opindex -H
Equivalent to @option{--dereference-args} (@option{-D}).
diff --git a/src/du.c b/src/du.c
index ac4489f..ab61032 100644
--- a/src/du.c
+++ b/src/du.c
@@ -35,6 +35,7 @@
#include "error.h"
#include "exclude.h"
#include "fprintftime.h"
+#include "grp.h"
#include "human.h"
#include "mountlist.h"
#include "quote.h"
@@ -61,6 +62,9 @@ extern bool fts_debug;
# define FTS_CROSS_CHECK(Fts)
#endif
+/* If true, display group size info. */
+bool opt_group_sizes = false;
+
/* A set of dev/ino pairs to help identify files and directories
whose sizes have already been counted. */
static struct di_set *di_files;
@@ -80,7 +84,7 @@ struct duinfo
/* Number of inodes in directory. */
uintmax_t inodes;
-
+ uintmax_t *group_size;
/* Latest timestamp found. If tmax.tv_sec == TYPE_MINIMUM (time_t)
&& tmax.tv_nsec < 0, no timestamp has been found. */
struct timespec tmax;
@@ -90,28 +94,62 @@ struct duinfo
static inline void
duinfo_init (struct duinfo *a)
{
+ uintmax_t i = 0;
a->size = 0;
a->inodes = 0;
a->tmax.tv_sec = TYPE_MINIMUM (time_t);
a->tmax.tv_nsec = -1;
+ if (opt_group_sizes)
+ {
+ for (i=0; i<65535; i++)
+ {
+ a->group_size[i] = 0;
+ }
+ }
}
/* Set directory data. */
static inline void
-duinfo_set (struct duinfo *a, uintmax_t size, struct timespec tmax)
+duinfo_set (struct duinfo *a, uintmax_t size, struct timespec tmax,
uintmax_t gid)
{
+ uintmax_t gid_u = (uintmax_t)gid;
+ uintmax_t gid_s;
a->size = size;
a->inodes = 1;
a->tmax = tmax;
+ if (opt_group_sizes)
+ {
+ if (gid_u > 65534)
+ {
+ gid_s = 65534;
+ }
+ else
+ {
+ gid_s = gid_u;
+ }
+ if (gid_s >=0 && gid_s <=65534)
+ {
+ a->group_size[gid_s] = size;
+ }
+ }
}
/* Accumulate directory data. */
static inline void
duinfo_add (struct duinfo *a, struct duinfo const *b)
{
+ uintmax_t i = 0;
uintmax_t sum = a->size + b->size;
a->size = a->size <= sum ? sum : UINTMAX_MAX;
a->inodes = a->inodes + b->inodes;
+ if (opt_group_sizes)
+ {
+ for (i=0; i<65535; i++)
+ {
+ sum = a->group_size[i] + b->group_size[i];
+ a->group_size[i] = a->group_size[i] <= sum ? sum : UINTMAX_MAX;
+ }
+ }
if (timespec_cmp (a->tmax, b->tmax) < 0)
a->tmax = b->tmax;
}
@@ -226,6 +264,7 @@ static struct option const long_options[] =
{"exclude", required_argument, NULL, EXCLUDE_OPTION},
{"exclude-from", required_argument, NULL, 'X'},
{"files0-from", required_argument, NULL, FILES0_FROM_OPTION},
+ {"group-reporting", no_argument, NULL, 'g'},
{"human-readable", no_argument, NULL, 'h'},
{"inodes", no_argument, NULL, INODES_OPTION},
{"si", no_argument, NULL, HUMAN_SI_OPTION},
@@ -317,6 +356,7 @@ Summarize disk usage of the set of FILEs,
recursively for directories.\n\
--files0-from=F summarize disk usage of the\n\
NUL-terminated file names specified in file F;\n\
if F is -, then read names from standard input\n\
+ -g, --group-reporting also print group subtotals\n\
-H equivalent to --dereference-args (-D)\n\
-h, --human-readable print sizes in human readable format (e.g.,
1K 234M 2G)\
\n\
@@ -411,7 +451,24 @@ print_size (const struct duinfo *pdui, const char *string)
print_only_size (opt_inodes
? pdui->inodes
: pdui->size);
-
+ if (opt_group_sizes)
+ {
+ uintmax_t i=0;
+ struct group *g;
+ for (i=0; i<65535; i++){
+ if (pdui->group_size[i] > 0)
+ {
+ g = getgrgid(i);
+ printf (",");
+ if (g == NULL){
+ printf (" %Ld:", (long long unsigned int)i);
+ }else{
+ printf(" %s:", g->gr_name);
+ }
+ print_only_size(pdui->group_size[i]);
+ }
+ }
+ }
if (opt_time)
{
putchar ('\t');
@@ -506,6 +563,16 @@ process_file (FTS *fts, FTSENT *ent)
const struct stat *sb = ent->fts_statp;
int info = ent->fts_info;
+ if(opt_group_sizes)
+ {
+ dui.group_size = xcalloc (65536, sizeof (uintmax_t));
+ dui_to_print.group_size = xcalloc (65536, sizeof (uintmax_t));
+ if (dui.group_size == NULL || dui_to_print.group_size == NULL)
+ {
+ return false;
+ }
+ }
+
if (info == FTS_DNR)
{
/* An error occurred, but the size is known, so count it. */
@@ -530,7 +597,18 @@ process_file (FTS *fts, FTSENT *ent)
if (info == FTS_NS || info == FTS_SLNONE)
{
error (0, ent->fts_errno, _("cannot access %s"), quoteaf (file));
- return false;
+ if(opt_group_sizes)
+ {
+ if (dui.group_size != NULL)
+ {
+ free(dui.group_size);
+ }
+ if (dui_to_print.group_size != NULL)
+ {
+ free(dui_to_print.group_size);
+ }
+ }
+ return false;
}
/* The --one-file-system (-x) option cannot exclude anything
@@ -558,13 +636,34 @@ process_file (FTS *fts, FTSENT *ent)
FTSENT const *e = fts_read (fts);
assert (e == ent);
}
-
+ if(opt_group_sizes)
+ {
+ if (dui.group_size != NULL)
+ {
+ free(dui.group_size);
+ }
+ if (dui_to_print.group_size != NULL)
+ {
+ free(dui_to_print.group_size);
+ }
+ }
return true;
}
switch (info)
{
case FTS_D:
+ if(opt_group_sizes)
+ {
+ if (dui.group_size != NULL)
+ {
+ free(dui.group_size);
+ }
+ if (dui_to_print.group_size != NULL)
+ {
+ free(dui_to_print.group_size);
+ }
+ }
return true;
case FTS_ERR:
@@ -574,6 +673,17 @@ process_file (FTS *fts, FTSENT *ent)
break;
case FTS_DC:
+ if(opt_group_sizes)
+ {
+ if (dui.group_size != NULL)
+ {
+ free(dui.group_size);
+ }
+ if (dui_to_print.group_size != NULL)
+ {
+ free(dui_to_print.group_size);
+ }
+ }
/* If not following symlinks and not a (bind) mount point. */
if (cycle_warning_required (fts, ent)
&& ! mount_point_in_fts_cycle (ent))
@@ -591,15 +701,48 @@ process_file (FTS *fts, FTSENT *ent)
: (uintmax_t) ST_NBLOCKS (*sb) * ST_NBLOCKSIZE),
(time_type == time_mtime ? get_stat_mtime (sb)
: time_type == time_atime ? get_stat_atime (sb)
- : get_stat_ctime (sb)));
+ : get_stat_ctime (sb)),
+ sb->st_gid);
level = ent->fts_level;
- dui_to_print = dui;
+
+ if (opt_group_sizes)
+ {
+ duinfo_set (&dui_to_print,
+ (apparent_size
+ ? MAX (0, sb->st_size)
+ : (uintmax_t) ST_NBLOCKS (*sb) * ST_NBLOCKSIZE),
+ (time_type == time_mtime ? get_stat_mtime (sb)
+ : time_type == time_atime ? get_stat_atime (sb)
+ : get_stat_ctime (sb)),
+ sb->st_gid);
+ }
+ else
+ {
+ dui_to_print = dui;
+ }
if (n_alloc == 0)
{
+ size_t i;
n_alloc = level + 10;
dulvl = xcalloc (n_alloc, sizeof *dulvl);
+ if (dulvl == NULL)
+ {
+ return false;
+ }
+ if(opt_group_sizes)
+ {
+ for (i=0; i<n_alloc; i++)
+ {
+ dulvl[i].ent.group_size = xcalloc (65536, sizeof (uintmax_t));
+ dulvl[i].subdir.group_size = xcalloc (65536, sizeof (uintmax_t));
+ if (dulvl[i].ent.group_size == NULL ||
dulvl[i].subdir.group_size == NULL)
+ {
+ return false;
+ }
+ }
+ }
}
else
{
@@ -613,14 +756,28 @@ process_file (FTS *fts, FTSENT *ent)
Clear the accumulators for *all* levels between prev_level
and the current one. The depth may change dramatically,
e.g., from 1 to 10. */
+ size_t i;
if (n_alloc <= level)
{
dulvl = xnrealloc (dulvl, level, 2 * sizeof *dulvl);
+ if(opt_group_sizes)
+ {
+ for (i=n_alloc; i<level*2; i++)
+ {
+ dulvl[i].ent.group_size = xcalloc (65536,
sizeof (uintmax_t));
+ dulvl[i].subdir.group_size = xcalloc (65536,
sizeof (uintmax_t));
+ if (dulvl[i].ent.group_size == NULL ||
dulvl[i].subdir.group_size == NULL)
+ {
+ return false;
+ }
+
+ }
+ }
n_alloc = level * 2;
}
- for (size_t i = prev_level + 1; i <= level; i++)
+ for (i = prev_level + 1; i <= level; i++)
{
duinfo_init (&dulvl[i].ent);
duinfo_init (&dulvl[i].subdir);
@@ -666,6 +823,18 @@ process_file (FTS *fts, FTSENT *ent)
print_size (&dui_to_print, file);
}
+ if(opt_group_sizes)
+ {
+ if (dui.group_size != NULL)
+ {
+ free(dui.group_size);
+ }
+ if (dui_to_print.group_size != NULL)
+ {
+ free(dui_to_print.group_size);
+ }
+ }
+
return ok;
}
@@ -755,7 +924,7 @@ main (int argc, char **argv)
while (true)
{
int oi = -1;
- int c = getopt_long (argc, argv, "0abd:chHklmst:xB:DLPSX:",
+ int c = getopt_long (argc, argv, "0abgd:chHklmst:xB:DLPSX:",
long_options, &oi);
if (c == -1)
break;
@@ -800,6 +969,15 @@ main (int argc, char **argv)
output_block_size = 1;
break;
+ case 'g':
+ tot_dui.group_size = xcalloc (65536, sizeof (uintmax_t));
+ if (tot_dui.group_size == NULL)
+ {
+ ok = false;
+ }
+ opt_group_sizes = true;
+ break;
+
case 'k':
human_output_opts = 0;
output_block_size = 1024;
--
2.10.2
On Thu, Jan 25, 2018 at 5:22 PM, Daniel Gall <address@hidden> wrote:
> Wow, those are pretty neat invocations of find and awk. They also, as you
> allude to, add an extra stat of each file. My code/idea changes simply pick
> up the group information du gets for free when stating for file size and
> currently throws in the bit bucket. Adding a user option seems useful too as
> that info is also in the stat record. Efficiency is important, especially as
> storage density continues to outscale io throughput, iops, and compute.
>
> Sent from my iPhone
>
>> On Jan 25, 2018, at 4:18 PM, Assaf Gordon <address@hidden> wrote:
>>
>> Hello Dan,
>>
>> Expanding on Eric's comments:
>>
>>> On Thu, Jan 25, 2018 at 02:42:32PM -0600, Eric Blake wrote:
>>>> On 01/25/2018 12:11 PM, Daniel Gall wrote:
>>>> coreutils-8.26> !diff
>>>
>>> We prefer 'git diff' output against the latest coreutils.git,
>>> but any program which can produce unified diffs (diff -u) is better than
>>> an ed script diff.
>>
>> Good starting points are here:
>> https://git.savannah.gnu.org/cgit/coreutils.git/tree/README-hacking
>> https://git.savannah.gnu.org/cgit/coreutils.git/tree/HACKING
>> https://git.savannah.gnu.org/cgit/coreutils.git/tree/.github/PULL_REQUEST_TEMPLATE.txt
>>
>>> A feature addition requires documentation, NEWS update, and preferably
>>> testsuite additions to be complete
>>
>> A typical example of these required changes is here:
>> https://git.savannah.gnu.org/cgit/coreutils.git/commit/?id=57dea5ed07471b2192cc5edf08993e663a3f6802
>>
>>
>>
>> Additionally, a work-around would be to combine several existing programs
>> to get approximately similar information:
>>
>> First, use `find` to print the size (%s) and group (%g) of each
>> file/directory:
>>
>> $ find /home -printf "%g %s\n"
>> root 4096 /home
>> gordon 4096 /home/gordon
>> gordon 59 /home/gordon/.Xauthority
>> gordon 4096 /home/gordon/.cache
>> gordon 4096 /home/gordon/.cache/RStudio
>> ...
>>
>> Then, use `awk` to sum up the sizes per group:
>>
>> $ find /home -printf "%g %s\n" \
>> | awk '{a[$1] += $2} END {for(i in a) { print a[i],i }}'
>> 1044086087 gordon
>> 542342 mike
>> 4123 root
>>
>> And optionally, use `numfmt` to print human sizes:
>>
>> $ find /home -printf "%g %s\n" \
>> | awk '{a[$1] += $2} END {for(i in a) { print a[i],i }}' \
>> | numfmt --to=iec
>> 997M gordon
>> 530K mike
>> 4.1K root
>>
>>
>> The above commands are rather naive, counting hard-links as many times
>> as they appear (similar to 'du -l'), and showing the apparent size
>> instead of allocated blocks (similar to 'du --apparent-size').
>>
>> To show allocated blocks, replace '%s' with '%k'.
>>
>> To count hardlinked files just once, print the device(%D) and inode number
>> (%i) of
>> each file, then use 'sort -u' to keep only one of each:
>>
>> find /home -printf "%g %s %D %i\n" \
>> | sort -k3n,3 -k4n,4 -u \
>> | awk '{a[$1] += $2} END {for(i in a) { print a[i],i }}' \
>> | numfmt --to=iec
>>
>> This isn't as efficient as 'du', but could be used with existing programs
>> without code modifications (and using find's many predicates allows
>> fine-tuning
>> of the summaries, e.g. per-user, per-user-and-group, etc.).
>>
>> regards,
>> - assaf
>>
>>
0001-added-du-group-reporting-feature-and-fixed-a-bug-whe.patch
Description: Text Data
- Re: Du feature request - group reporting,
Daniel Gall <=
- Re: Du feature request - group reporting, Eric Blake, 2018/02/05
- Re: Du feature request - group reporting, Daniel Gall, 2018/02/05
- Re: Du feature request - group reporting, Daniel Gall, 2018/02/05
- Re: Du feature request - group reporting, Daniel Gall, 2018/02/06
- Re: Du feature request - group reporting, Daniel Gall, 2018/02/07
- Re: Du feature request - group reporting, Daniel Gall, 2018/02/28
- Re: Du feature request - group reporting, Daniel Gall, 2018/02/28