[INSTALLED] basenc: prefer signed to unsigned integers

coreutils
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[INSTALLED] basenc: prefer signed to unsigned integers

From:	Paul Eggert
Subject:	[INSTALLED] basenc: prefer signed to unsigned integers
Date:	Fri, 27 Aug 2021 17:14:20 -0700
This patch modifies basenc to prefer signed integers to
unsigned, as signed are less error-prone.
This patch also updates Gnulib to to latest, which updates Gnulib’s
base32 and base64 modules to prefer signed to unsigned integers.
* src/basenc.c: Include idx.h.
(struct base2_decode_context): Use unsigned char, not unsigned
for an octet that must fit in an unsigned char.
(base_encode, struct base_decode_context)
(base64_decode_ctx_wrapper, prepare_inbuf, base64url_encode)
(base64url_decode_ctx_wrapper, base32_decode_ctx_wrapper)
(base32hex_encode, base32hex_decode_ctx_wrapper, base16_encode)
(base16_decode_ctx, z85_encode, Z85_HI_CTX_TO_32BIT_VAL)
(z85_decoding, z85_decode_ctx, base2msbf_encode)
(base2lsbf_encode, base2lsbf_decode_ctx, base2msbf_decode_ctx)
(wrap_write, do_encode, do_decode, main):
Prefer signed integers to unsigned.
(main): Treat extremely large wrap columns as if they were
infinite; that’s good enough.  Since we’re now using xstrtoimax,
this allows ‘-w -0’ (same as ‘-w 0’).
* tests/misc/base64.pl (gen_tests): -w-0 is no longer an error.
---
 gnulib               |   2 +-
 src/basenc.c         | 188 ++++++++++++++++++++-----------------------
 tests/misc/base64.pl |   3 -
 3 files changed, 88 insertions(+), 105 deletions(-)

diff --git a/gnulib b/gnulib
index 4ea0e64a8..452e8a8f7 160000
--- a/gnulib
+++ b/gnulib
@@ -1 +1 @@
-Subproject commit 4ea0e64a8db7064427f6aa5624a4efd4b41db132
+Subproject commit 452e8a8f7b07a794b1de3813062a992509f5c0c9
diff --git a/src/basenc.c b/src/basenc.c
index 5c97a3652..19a7b0aad 100644
--- a/src/basenc.c
+++ b/src/basenc.c
@@ -27,6 +27,7 @@
 #include "die.h"
 #include "error.h"
 #include "fadvise.h"
+#include "idx.h"
 #include "quote.h"
 #include "xstrtol.h"
 #include "xdectoint.h"
@@ -217,8 +218,8 @@ verify (DEC_BLOCKSIZE % 12 == 0);  /* So complete encoded 
blocks are used.  */
 
 static int (*base_length) (int i);
 static bool (*isbase) (char ch);
-static void (*base_encode) (char const *restrict in, size_t inlen,
-                            char *restrict out, size_t outlen);
+static void (*base_encode) (char const *restrict in, idx_t inlen,
+                            char *restrict out, idx_t outlen);
 
 struct base16_decode_context
 {
@@ -234,7 +235,7 @@ struct z85_decode_context
 
 struct base2_decode_context
 {
-  unsigned octet;
+  unsigned char octet;
 };
 
 struct base_decode_context
@@ -248,12 +249,12 @@ struct base_decode_context
     struct z85_decode_context z85;
   } ctx;
   char *inbuf;
-  size_t bufsize;
+  idx_t bufsize;
 };
 static void (*base_decode_ctx_init) (struct base_decode_context *ctx);
 static bool (*base_decode_ctx) (struct base_decode_context *ctx,
-char const *restrict in, size_t inlen,
-char *restrict out, size_t *outlen);
+                                char const *restrict in, idx_t inlen,
+                                char *restrict out, idx_t *outlen);
 #endif
 
 
@@ -275,8 +276,8 @@ base64_decode_ctx_init_wrapper (struct base_decode_context 
*ctx)
 
 static bool
 base64_decode_ctx_wrapper (struct base_decode_context *ctx,
-                           char const *restrict in, size_t inlen,
-                           char *restrict out, size_t *outlen)
+                           char const *restrict in, idx_t inlen,
+                           char *restrict out, idx_t *outlen)
 {
   bool b = base64_decode_ctx (&ctx->ctx.base64, in, inlen, out, outlen);
   ctx->i = ctx->ctx.base64.i;
@@ -291,7 +292,7 @@ init_inbuf (struct base_decode_context *ctx)
 }
 
 static void
-prepare_inbuf (struct base_decode_context *ctx, size_t inlen)
+prepare_inbuf (struct base_decode_context *ctx, idx_t inlen)
 {
   if (ctx->bufsize < inlen)
     {
@@ -302,8 +303,8 @@ prepare_inbuf (struct base_decode_context *ctx, size_t 
inlen)
 
 
 static void
-base64url_encode (char const *restrict in, size_t inlen,
-                  char *restrict out, size_t outlen)
+base64url_encode (char const *restrict in, idx_t inlen,
+                  char *restrict out, idx_t outlen)
 {
   base64_encode (in, inlen, out, outlen);
   /* translate 62nd and 63rd characters */
@@ -335,14 +336,14 @@ base64url_decode_ctx_init_wrapper (struct 
base_decode_context *ctx)
 
 static bool
 base64url_decode_ctx_wrapper (struct base_decode_context *ctx,
-                              char const *restrict in, size_t inlen,
-                              char *restrict out, size_t *outlen)
+                              char const *restrict in, idx_t inlen,
+                              char *restrict out, idx_t *outlen)
 {
   prepare_inbuf (ctx, inlen);
   memcpy (ctx->inbuf, in, inlen);
 
   /* translate 62nd and 63rd characters */
-  size_t i = inlen;
+  idx_t i = inlen;
   char* p = ctx->inbuf;
   while (i--)
     {
@@ -381,8 +382,8 @@ base32_decode_ctx_init_wrapper (struct base_decode_context 
*ctx)
 
 static bool
 base32_decode_ctx_wrapper (struct base_decode_context *ctx,
-                           char const *restrict in, size_t inlen,
-                           char *restrict out, size_t *outlen)
+                           char const *restrict in, idx_t inlen,
+                           char *restrict out, idx_t *outlen)
 {
   bool b = base32_decode_ctx (&ctx->ctx.base32, in, inlen, out, outlen);
   ctx->i = ctx->ctx.base32.i;
@@ -439,8 +440,8 @@ isbase32hex (char ch)
 
 
 static void
-base32hex_encode (char const *restrict in, size_t inlen,
-                  char *restrict out, size_t outlen)
+base32hex_encode (char const *restrict in, idx_t inlen,
+                  char *restrict out, idx_t outlen)
 {
   base32_encode (in, inlen, out, outlen);
 
@@ -462,12 +463,12 @@ base32hex_decode_ctx_init_wrapper (struct 
base_decode_context *ctx)
 
 static bool
 base32hex_decode_ctx_wrapper (struct base_decode_context *ctx,
-                              char const *restrict in, size_t inlen,
-                              char *restrict out, size_t *outlen)
+                              char const *restrict in, idx_t inlen,
+                              char *restrict out, idx_t *outlen)
 {
   prepare_inbuf (ctx, inlen);
 
-  size_t i = inlen;
+  idx_t i = inlen;
   char *p = ctx->inbuf;
   while (i--)
     {
@@ -502,8 +503,8 @@ base16_length (int len)
 static const char base16[16] = "0123456789ABCDEF";
 
 static void
-base16_encode (char const *restrict in, size_t inlen,
-               char *restrict out, size_t outlen)
+base16_encode (char const *restrict in, idx_t inlen,
+               char *restrict out, idx_t outlen)
 {
   while (inlen--)
     {
@@ -526,11 +527,10 @@ base16_decode_ctx_init (struct base_decode_context *ctx)
 
 static bool
 base16_decode_ctx (struct base_decode_context *ctx,
-                   char const *restrict in, size_t inlen,
-                   char *restrict out, size_t *outlen)
+                   char const *restrict in, idx_t inlen,
+                   char *restrict out, idx_t *outlen)
 {
   bool ignore_lines = true;  /* for now, always ignore them */
-  unsigned int nib;
 
   *outlen = 0;
 
@@ -548,15 +548,14 @@ base16_decode_ctx (struct base_decode_context *ctx,
           continue;
         }
 
-      if (*in >= 'A' && *in <= 'F')
-        nib = (*in-'A'+10);
-      else if (*in >= '0' && *in <= '9')
-        nib = (*in-'0');
+      int nib = *in++;
+      if ('0' <= nib && nib <= '9')
+        nib -= '0';
+      else if ('A' <= nib && nib <= 'F')
+        nib -= 'A' - 10;
       else
         return false; /* garbage - return false */
 
-      ++in;
-
       if (ctx->ctx.base16.have_nibble)
         {
           /* have both nibbles, write octet */
@@ -597,13 +596,12 @@ static char const z85_encoding[85] =
   ".-:+=^!/*?&<>()[]{}@%$#";
 
 static void
-z85_encode (char const *restrict in, size_t inlen,
-            char *restrict out, size_t outlen)
+z85_encode (char const *restrict in, idx_t inlen,
+            char *restrict out, idx_t outlen)
 {
   int i = 0;
   unsigned char quad[4];
-  unsigned int val;
-  size_t outidx = 0;
+  idx_t outidx = 0;
 
   while (true)
     {
@@ -626,14 +624,12 @@ z85_encode (char const *restrict in, size_t inlen,
       /* Got a quad, encode it */
       if (i == 4)
         {
-          val = ((uint32_t) quad[0] << 24)
-                + ((uint32_t) quad[1] << 16)
-                + ((uint32_t) quad[2] << 8)
-                + quad[3];
+          int_fast64_t val = quad[0];
+          val = (val << 24) + (quad[1] << 16) + (quad[2] << 8) + quad[3];
 
-          for (int j = 4; j>=0; --j)
+          for (int j = 4; j >= 0; --j)
             {
-              unsigned char c = val%85;
+              int c = val % 85;
               val /= 85;
 
               /* NOTE: if there is padding (which is trimmed by z85
@@ -667,7 +663,7 @@ z85_decode_ctx_init (struct base_decode_context *ctx)
 
 
 # define Z85_HI_CTX_TO_32BIT_VAL(ctx) \
-  ((uint32_t)(ctx)->ctx.z85.octets[0] * 85 * 85 * 85 * 85 )
+  ((int_fast64_t) (ctx)->ctx.z85.octets[0] * 85 * 85 * 85 * 85 )
 
 /*
  0 -  9:  0 1 2 3 4 5 6 7 8 9
@@ -680,25 +676,25 @@ z85_decode_ctx_init (struct base_decode_context *ctx)
  70 - 79:  * ? & < > ( ) [ ] {
  80 - 84:  } @ % $ #
 */
-static unsigned char z85_decoding[93] = {
-  68, 255, 84,  83, 82,  72, 255,              /* ! " # $ % & ' */
-  75, 76,  70,  65, 255, 63, 62,  69,          /* ( ) * + , - . / */
+static signed char const z85_decoding[93] = {
+  68, -1,  84,  83, 82,  72, -1,               /* ! " # $ % & ' */
+  75, 76,  70,  65, -1,  63, 62, 69,           /* ( ) * + , - . / */
   0,  1,   2,   3,  4,   5,  6,   7,  8,  9,   /* '0' to '9' */
-  64, 255, 73,  66, 74,  71, 81,               /* : ; < =  > ? @ */
+  64, -1,  73,  66, 74,  71, 81,               /* : ; < =  > ? @ */
   36, 37,  38,  39, 40,  41, 42,  43, 44, 45,  /* 'A' to 'J' */
   46, 47,  48,  49, 50,  51, 52,  53, 54, 55,  /* 'K' to 'T' */
   56, 57,  58,  59, 60,  61,                   /* 'U' to 'Z' */
-  77, 255, 78,  67, 255, 255,                  /* [ \ ] ^ _ ` */
+  77,  -1, 78,  67,  -1,  -1,                  /* [ \ ] ^ _ ` */
   10, 11,  12,  13, 14,  15, 16,  17, 18, 19,  /* 'a' to 'j' */
   20, 21,  22,  23, 24,  25, 26,  27, 28, 29,  /* 'k' to 't' */
   30, 31,  32,  33, 34,  35,                   /* 'u' to 'z' */
-  79, 255, 80                                  /* { | } */
+  79, -1,  80                                  /* { | } */
 };
 
 static bool
 z85_decode_ctx (struct base_decode_context *ctx,
-                char const *restrict in, size_t inlen,
-                char *restrict out, size_t *outlen)
+                char const *restrict in, idx_t inlen,
+                char *restrict out, idx_t *outlen)
 {
   bool ignore_lines = true;  /* for now, always ignore them */
 
@@ -731,9 +727,10 @@ z85_decode_ctx (struct base_decode_context *ctx,
 
       if (c >= 33 && c <= 125)
         {
-          c = z85_decoding[c-33];
-          if (c == 255)
+          signed char ch = z85_decoding[c - 33];
+          if (ch < 0)
             return false; /* garbage - return false */
+          c = ch;
         }
       else
         return false; /* garbage - return false */
@@ -744,28 +741,16 @@ z85_decode_ctx (struct base_decode_context *ctx,
       if (ctx->ctx.z85.i == 5)
         {
           /* decode the lowest 4 octets, then check for overflows.  */
-          unsigned int val = Z85_LO_CTX_TO_32BIT_VAL (ctx);
+          int_fast64_t val = Z85_LO_CTX_TO_32BIT_VAL (ctx);
 
           /* The Z85 spec and the reference implementation say nothing
-             about overflows. To be on the safe side, reject them.
-
-             '$' (decoded to 83) in the highest octet
-             would result in value of 83*85^4 = 4332651875 , which is larger
-             than 2^32-1 and will overflow an unsigned int (similarly
-             for '$' decoded to 84).
-
-             '%' (decoded to 82) in the highest octet can fit in unsigned int
-             if the other 4 octets decode to a small enough value.
-          */
-          if (ctx->ctx.z85.octets[0] == 84 || ctx->ctx.z85.octets[0] == 83
-              || (ctx->ctx.z85.octets[0] == 82
-                  && val > 0xFFFFFFFF - 82*85*85*85*85U))
-            return false;
+             about overflows. To be on the safe side, reject them.  */
 
-          /* no overflow, add the high octet value */
           val += Z85_HI_CTX_TO_32BIT_VAL (ctx);
+          if ((val >> 24) & ~0xFF)
+            return false;
 
-          *out++ = (val >> 24) & 0xFF;
+          *out++ = val >> 24;
           *out++ = (val >> 16) & 0xFF;
           *out++ = (val >> 8) & 0xFF;
           *out++ = val & 0xFF;
@@ -794,8 +779,8 @@ base2_length (int len)
 
 
 inline static void
-base2msbf_encode (char const *restrict in, size_t inlen,
-                  char *restrict out, size_t outlen)
+base2msbf_encode (char const *restrict in, idx_t inlen,
+                  char *restrict out, idx_t outlen)
 {
   while (inlen--)
     {
@@ -811,8 +796,8 @@ base2msbf_encode (char const *restrict in, size_t inlen,
 }
 
 inline static void
-base2lsbf_encode (char const *restrict in, size_t inlen,
-                  char *restrict out, size_t outlen)
+base2lsbf_encode (char const *restrict in, idx_t inlen,
+                  char *restrict out, idx_t outlen)
 {
   while (inlen--)
     {
@@ -839,8 +824,8 @@ base2_decode_ctx_init (struct base_decode_context *ctx)
 
 static bool
 base2lsbf_decode_ctx (struct base_decode_context *ctx,
-                      char const *restrict in, size_t inlen,
-                      char *restrict out, size_t *outlen)
+                      char const *restrict in, idx_t inlen,
+                      char *restrict out, idx_t *outlen)
 {
   bool ignore_lines = true;  /* for now, always ignore them */
 
@@ -883,8 +868,8 @@ base2lsbf_decode_ctx (struct base_decode_context *ctx,
 
 static bool
 base2msbf_decode_ctx (struct base_decode_context *ctx,
-                      char const *restrict in, size_t inlen,
-                      char *restrict out, size_t *outlen)
+                      char const *restrict in, idx_t inlen,
+                      char *restrict out, idx_t *outlen)
 {
   bool ignore_lines = true;  /* for now, always ignore them */
 
@@ -932,11 +917,9 @@ base2msbf_decode_ctx (struct base_decode_context *ctx,
 
 
 static void
-wrap_write (char const *buffer, size_t len,
-            uintmax_t wrap_column, size_t *current_column, FILE *out)
+wrap_write (char const *buffer, idx_t len,
+            idx_t wrap_column, idx_t *current_column, FILE *out)
 {
-  size_t written;
-
   if (wrap_column == 0)
     {
       /* Simple write. */
@@ -944,11 +927,9 @@ wrap_write (char const *buffer, size_t len,
         die (EXIT_FAILURE, errno, _("write error"));
     }
   else
-    for (written = 0; written < len;)
+    for (idx_t written = 0; written < len; )
       {
-        uintmax_t cols_remaining = wrap_column - *current_column;
-        size_t to_write = MIN (cols_remaining, SIZE_MAX);
-        to_write = MIN (to_write, len - written);
+        idx_t to_write = MIN (wrap_column - *current_column, len - written);
 
         if (to_write == 0)
           {
@@ -967,18 +948,18 @@ wrap_write (char const *buffer, size_t len,
 }
 
 static void
-do_encode (FILE *in, FILE *out, uintmax_t wrap_column)
+do_encode (FILE *in, FILE *out, idx_t wrap_column)
 {
-  size_t current_column = 0;
+  idx_t current_column = 0;
   char *inbuf, *outbuf;
-  size_t sum;
+  idx_t sum;
 
   inbuf = xmalloc (ENC_BLOCKSIZE);
   outbuf = xmalloc (BASE_LENGTH (ENC_BLOCKSIZE));
 
   do
     {
-      size_t n;
+      idx_t n;
 
       sum = 0;
       do
@@ -1015,7 +996,7 @@ static void
 do_decode (FILE *in, FILE *out, bool ignore_garbage)
 {
   char *inbuf, *outbuf;
-  size_t sum;
+  idx_t sum;
   struct base_decode_context ctx;
 
   inbuf = xmalloc (BASE_LENGTH (DEC_BLOCKSIZE));
@@ -1029,17 +1010,16 @@ do_decode (FILE *in, FILE *out, bool ignore_garbage)
   do
     {
       bool ok;
-      size_t n;
-      unsigned int k;
 
       sum = 0;
       do
         {
-          n = fread (inbuf + sum, 1, BASE_LENGTH (DEC_BLOCKSIZE) - sum, in);
+          idx_t n = fread (inbuf + sum,
+                           1, BASE_LENGTH (DEC_BLOCKSIZE) - sum, in);
 
           if (ignore_garbage)
             {
-              for (size_t i = 0; n > 0 && i < n;)
+              for (idx_t i = 0; n > 0 && i < n;)
                 {
                   if (isbase (inbuf[sum + i]) || inbuf[sum + i] == '=')
                     i++;
@@ -1059,11 +1039,11 @@ do_decode (FILE *in, FILE *out, bool ignore_garbage)
          However, when it processes the final input buffer, we want
          to iterate it one additional time, but with an indicator
          telling it to flush what is in CTX.  */
-      for (k = 0; k < 1 + !!feof (in); k++)
+      for (int k = 0; k < 1 + !!feof (in); k++)
         {
           if (k == 1 && ctx.i == 0)
             break;
-          n = DEC_BLOCKSIZE;
+          idx_t n = DEC_BLOCKSIZE;
           ok = base_decode_ctx (&ctx, inbuf, (k == 0 ? sum : 0), outbuf, &n);
 
           if (fwrite (outbuf, 1, n, out) < n)
@@ -1093,8 +1073,8 @@ main (int argc, char **argv)
   bool decode = false;
   /* True if we should ignore non-base-alphabetic characters. */
   bool ignore_garbage = false;
-  /* Wrap encoded data around the 76:th column, by default. */
-  uintmax_t wrap_column = 76;
+  /* Wrap encoded data around the 76th column, by default. */
+  idx_t wrap_column = 76;
 
 #if BASE_TYPE == 42
   int base_type = 0;
@@ -1116,8 +1096,14 @@ main (int argc, char **argv)
         break;
 
       case 'w':
-        wrap_column = xdectoumax (optarg, 0, UINTMAX_MAX, "",
-                                  _("invalid wrap size"), 0);
+        {
+          intmax_t w;
+          strtol_error s_err = xstrtoimax (optarg, NULL, 10, &w, "");
+          if (LONGINT_OVERFLOW < s_err || w < 0)
+            die (EXIT_FAILURE, 0, "%s: %s",
+                 _("invalid wrap size"), quote (optarg));
+          wrap_column = s_err == LONGINT_OVERFLOW || IDX_MAX < w ? 0 : w;
+        }
         break;
 
       case 'i':
diff --git a/tests/misc/base64.pl b/tests/misc/base64.pl
index 3daeb0573..59fdb4a14 100755
--- a/tests/misc/base64.pl
+++ b/tests/misc/base64.pl
@@ -91,9 +91,6 @@ sub gen_tests($)
      ['wrap-bad-3', '-w-1', {IN=>''}, {OUT=>""},
       {ERR_SUBST => 's/base..:/base..:/'},
       {ERR => "base..: invalid wrap size: '-1'\n"}, {EXIT => 1}],
-     ['wrap-bad-4', '-w-0', {IN=>''}, {OUT=>""},
-      {ERR_SUBST => 's/base..:/base..:/'},
-      {ERR => "base..: invalid wrap size: '-0'\n"}, {EXIT => 1}],
 
      ['buf-1',   '--decode', {IN=>&$enc(1)}, {OUT=>'a' x 1}],
      ['buf-2',   '--decode', {IN=>&$enc(2)}, {OUT=>'a' x 2}],
-- 
2.31.1
[Prev in Thread]
Current Thread
[Next in Thread]
[INSTALLED] basenc: prefer signed to unsigned integers, Paul Eggert <=
Prev by Date: Re: how to speed up sort for partially sorted input?
Next by Date: [PATCH] sum: always output a file name if one passed
Previous by thread: [PATCH] tests: avoid reflinks when testing SEEK_DATA logic
Next by thread: [PATCH] sum: always output a file name if one passed
Index(es):
- Date
- Thread