bug-coreutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

coreutils 'ls' fix for multibyte user and group names


From: Paul Eggert
Subject: coreutils 'ls' fix for multibyte user and group names
Date: Mon, 21 Jun 2004 00:21:20 -0700
User-agent: Gnus/5.1006 (Gnus v5.10.6) Emacs/21.3 (gnu/linux)

coreutils 'ls' mishandles user or group names that contain multibyte
characters where the number of columns is not equal to the number of
bytes.  For example, if a user name is "castaneda" (except that the
"n" has a tilde over it), and if the current locale is en_US.utf8,
then the n-with-a-tilde consumes two bytes but only one print column.
Current GNU "ls -l" outputs something like this:

-rw-r--r--  1 eggert     eggert 4127 2004-06-12 23:18 file1
-rw-r--r--  1 castaneda eggert  753 2004-06-20 23:58 file2

(again, where the "n" in "castaneda" has a tilde over it), so the
columns don't line up.  With the patch proposed below, the output
looks like this instead:

-rw-r--r--  1 eggert    eggert 4127 2004-06-12 23:18 file1
-rw-r--r--  1 castaneda eggert  753 2004-06-20 23:58 file2

2004-06-21  Paul Eggert  <address@hidden>

        Fix bug: GNU 'ls' didn't count columns correctly if user or group
        names contained multibyte characters where the column count
        differed from the byte count.  This patch also corrects
        some comments.

        * src/ls.c (format_user_or_group): New function, which counts
        columns correctly.
        (format_user, format_group): Use it.
        (format_user_or_group_width): New function, which counts columns
        correctly.
        (format_user_width, format_group_width): Use it.
        
Index: src/ls.c
===================================================================
RCS file: /home/meyering/coreutils/cu/src/ls.c,v
retrieving revision 1.357
diff -p -u -r1.357 ls.c
--- src/ls.c    15 Jun 2004 18:00:03 -0000      1.357
+++ src/ls.c    21 Jun 2004 07:05:18 -0000
@@ -330,7 +330,7 @@ static struct pending *pending_dirs;
 static time_t current_time = TYPE_MINIMUM (time_t);
 static int current_time_ns = -1;
 
-/* The number of bytes to use for columns containing inode numbers,
+/* The number of columns to use for columns containing inode numbers,
    block sizes, link counts, owners, groups, authors, major device
    numbers, minor device numbers, and file sizes, respectively.  */
 
@@ -804,14 +804,14 @@ static struct column_info *column_info;
 /* Maximum number of columns ever possible for this display.  */
 static size_t max_idx;
 
-/* The minimum width of a colum is 3: 1 character for the name and 2
+/* The minimum width of a column is 3: 1 character for the name and 2
    for the separating white space.  */
 #define MIN_COLUMN_WIDTH       3
 
 
 /* This zero-based index is used solely with the --dired option.
    When that option is in effect, this counter is incremented for each
-   character of output generated by this program so that the beginning
+   byte of output generated by this program so that the beginning
    and ending indices (in that output) of every file name can be recorded
    and later output themselves.  */
 static size_t dired_pos;
@@ -3055,19 +3055,44 @@ get_current_time (void)
   current_time_ns = 999999999;
 }
 
+/* Print the user or group name NAME, with numeric id ID, using a
+   print width of WIDTH columns.  */
+
+static void
+format_user_or_group (char const *name, unsigned long int id, int width)
+{
+  size_t len;
+
+  if (name)
+    {
+      /* The output column count may differ from the byte count.
+        Adjust for this, but don't output garbage if integer overflow
+        occurs during adjustment.  */
+      len = strlen (name);
+      width -= mbswidth (name, 0);
+      width += len;
+      if (width < 0)
+       width = 0;
+      printf ("%-*s ", width, name);
+      if (len < width)
+       len = width;
+    }
+  else
+    {
+      printf ("%*lu ", width, id);
+      len = width;
+    }
+
+  dired_pos += len + 1;
+}
+
 /* Print the name or id of the user with id U, using a print width of
    WIDTH.  */
 
 static void
 format_user (uid_t u, int width)
 {
-  char const *name = (numeric_ids ? NULL : getuser (u));
-  if (name)
-    printf ("%-*s ", width, name);
-  else
-    printf ("%*lu ", width, (unsigned long int) u);
-  dired_pos += width;
-  dired_pos++;
+  format_user_or_group (numeric_ids ? NULL : getuser (u), u, width);
 }
 
 /* Likewise, for groups.  */
@@ -3075,34 +3100,33 @@ format_user (uid_t u, int width)
 static void
 format_group (gid_t g, int width)
 {
-  char const *name = (numeric_ids ? NULL : getgroup (g));
-  if (name)
-    printf ("%-*s ", width, name);
-  else
-    printf ("%*lu ", width, (unsigned long int) g);
-  dired_pos += width;
-  dired_pos++;
+  format_user_or_group (numeric_ids ? NULL : getgroup (g), g, width);
 }
 
-/* Return the number of bytes that format_user will print.  */
+/* Return the number of columns that format_user_or_group will print.  */
 
 static int
-format_user_width (uid_t u)
+format_user_or_group_width (char const *name, unsigned long int id)
 {
-  char const *name = (numeric_ids ? NULL : getuser (u));
-  char buf[INT_BUFSIZE_BOUND (unsigned long int)];
-  size_t len;
-
-  if (! name)
+  if (name)
     {
-      sprintf (buf, "%lu", (unsigned long int) u);
-      name = buf;
+      int len = mbswidth (name, 0);
+      return MAX (0, len);
     }
+  else
+    {
+      char buf[INT_BUFSIZE_BOUND (unsigned long int)];
+      sprintf (buf, "%lu", id);
+      return strlen (buf);
+    }
+}
 
-  len = strlen (name);
-  if (INT_MAX < len)
-    error (EXIT_FAILURE, 0, _("User name too long"));
-  return len;
+/* Return the number of columns that format_user will print.  */
+
+static int
+format_user_width (uid_t u)
+{
+  return format_user_or_group_width (numeric_ids ? NULL : getuser (u), u);
 }
 
 /* Likewise, for groups.  */
@@ -3110,20 +3134,7 @@ format_user_width (uid_t u)
 static int
 format_group_width (gid_t g)
 {
-  char const *name = (numeric_ids ? NULL : getgroup (g));
-  char buf[INT_BUFSIZE_BOUND (unsigned long int)];
-  size_t len;
-
-  if (! name)
-    {
-      sprintf (buf, "%lu", (unsigned long int) g);
-      name = buf;
-    }
-
-  len = strlen (name);
-  if (INT_MAX < len)
-    error (EXIT_FAILURE, 0, _("Group name too long"));
-  return len;
+  return format_user_or_group_width (numeric_ids ? NULL : getgroup (g), g);
 }
 
 




reply via email to

[Prev in Thread] Current Thread [Next in Thread]