From 2fd319ae0e4047c5579809802b0851187c040e2a Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Wed, 10 Aug 2022 13:59:03 -0700 Subject: [PATCH 3/3] Improve integer overflow checking for formats * awk.h: Include intprops.h here instead of builtin.c. (UINTMAX_MAX, SIZE_MAX): Move here from builtin.c. Simplify UINTMAX_MAX definiens. (WCHAR_MAX, UINTMAX_WIDTH): New macros. (get_number_ui): Remove; unused. (get_number_si, get_number_uj): Replace these macros with functions that check for integer overflow. * builtin.c (TYPE_SIGNED, TYPE_MINIMUM, TYPE_MAXIMUM): Remove; now defined by intprops.h. (INTMAX_MIN): Remove; unused. (UINTMAX_MAX, SIZE_MAX): Move to awk.h. (format_tree): Last arg is int, not long. All uses changed. Check for int overflow when computing field width, precision, arg number, or a wchar_t value. Remove no-longer-needed casts to int. * mpfr.c (GMP_NUM_BITS, mpz_to_uintmax) [HAVE_MPFR]: New constant and function, copied from GNU Emacs. (integer_overflow): New function. (get_number_si, get_number_uj): Now functions, which check for integer overflow. --- ChangeLog | 22 +++++++++ README_d/README.hacking | 8 ++-- awk.h | 30 ++++++++----- builtin.c | 54 ++++++++--------------- mpfr.c | 98 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 162 insertions(+), 50 deletions(-) diff --git a/ChangeLog b/ChangeLog index eb27aae3..a5248ec9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,27 @@ 2022-08-10 Paul Eggert + Improve integer overflow checking for formats + * awk.h: Include intprops.h here instead of builtin.c. + (UINTMAX_MAX, SIZE_MAX): Move here from builtin.c. + Simplify UINTMAX_MAX definiens. + (WCHAR_MAX, UINTMAX_WIDTH): New macros. + (get_number_ui): Remove; unused. + (get_number_si, get_number_uj): + Replace these macros with functions that check for integer overflow. + * builtin.c (TYPE_SIGNED, TYPE_MINIMUM, TYPE_MAXIMUM): + Remove; now defined by intprops.h. + (INTMAX_MIN): Remove; unused. + (UINTMAX_MAX, SIZE_MAX): Move to awk.h. + (format_tree): Last arg is int, not long. All uses changed. + Check for int overflow when computing field width, + precision, arg number, or a wchar_t value. + Remove no-longer-needed casts to int. + * mpfr.c (GMP_NUM_BITS, mpz_to_uintmax) [HAVE_MPFR]: + New constant and function, copied from GNU Emacs. + (integer_overflow): New function. + (get_number_si, get_number_uj): Now functions, which check + for integer overflow. + Use _Noreturn a bit more This helps avoid some warnings with GCC 12 in developer mode, in future patches. diff --git a/README_d/README.hacking b/README_d/README.hacking index d179488c..63583b89 100644 --- a/README_d/README.hacking +++ b/README_d/README.hacking @@ -1,9 +1,9 @@ -* Use one of the following macros to access the value of a numeric NODE: - Macro Returned C type +* Use one of the following to access the value of a numeric NODE: + Call Returned C type --------------------------------------- - get_number_ui(n) unsigned long get_number_si(n) long - get_number_d(n) double + get_number_sj(n) intmax_t get_number_uj(n) uintmax_t + get_number_d(n) double * Use iszero(n) to test if a numeric NODE is zero. diff --git a/awk.h b/awk.h index 50f6abe9..3a5c5af1 100644 --- a/awk.h +++ b/awk.h @@ -102,6 +102,22 @@ extern int errno; # include #endif +#include "intprops.h" + +#ifndef UINTMAX_MAX +# define UINTMAX_MAX ((uintmax_t) -1) +#endif +#ifndef SIZE_MAX +# define SIZE_MAX ((size_t) -1) +#endif +#ifndef WCHAR_MAX +# define WCHAR_MAX TYPE_MAXIMUM (wchar_t) +#endif + +#ifndef UINTMAX_WIDTH +# define UINTMAX_WIDTH TYPE_WIDTH (uintmax_t) +#endif + /* ----------------- System dependencies (with more includes) -----------*/ /* This section is the messiest one in the file, not a lot that can be done */ @@ -1321,14 +1337,8 @@ DEREF(NODE *r) (!((n)->flags & (MPFN|MPZN)) ? (dblval) : (((n)->flags & MPFN) ? (mpfrval) : (mpzval))) /* conversion to C types */ -#define get_number_ui(n) numtype_choose((n), mpfr_get_ui((n)->mpg_numbr, ROUND_MODE), mpz_get_ui((n)->mpg_i), (unsigned long) (n)->numbr) - -#define get_number_si(n) numtype_choose((n), mpfr_get_si((n)->mpg_numbr, ROUND_MODE), mpz_get_si((n)->mpg_i), (long) (n)->numbr) - #define get_number_d(n) numtype_choose((n), mpfr_get_d((n)->mpg_numbr, ROUND_MODE), mpz_get_d((n)->mpg_i), (double) (n)->numbr) -#define get_number_uj(n) numtype_choose((n), mpfr_get_uj((n)->mpg_numbr, ROUND_MODE), (uintmax_t) mpz_get_d((n)->mpg_i), (uintmax_t) (n)->numbr) - #define is_zero(n) numtype_choose((n), mpfr_zero_p((n)->mpg_numbr), (mpz_sgn((n)->mpg_i) == 0), ((n)->numbr == 0.0)) #define IEEE_FMT(r, t) (void) (do_ieee_fmt && format_ieee(r, t)) @@ -1339,10 +1349,7 @@ DEREF(NODE *r) #define is_mpg_integer(n) (((n)->flags & MPZN) != 0) #define is_mpg_number(n) (((n)->flags & (MPZN|MPFN)) != 0) #else -#define get_number_ui(n) (unsigned long) (n)->numbr -#define get_number_si(n) (long) (n)->numbr #define get_number_d(n) (double) (n)->numbr -#define get_number_uj(n) (uintmax_t) (n)->numbr #define is_mpg_number(n) 0 #define is_mpg_float(n) 0 @@ -1506,7 +1513,7 @@ extern NODE *do_sub(int nargs, unsigned int flags); extern NODE *call_sub(const char *name, int nargs); extern NODE *call_match(int nargs); extern NODE *call_split_func(const char *name, int nargs); -extern NODE *format_tree(const char *, size_t, NODE **, long); +extern NODE *format_tree(const char *, size_t, NODE **, int); extern NODE *do_lshift(int nargs); extern NODE *do_rshift(int nargs); extern NODE *do_and(int nargs); @@ -1715,6 +1722,9 @@ extern void *mpfr_mem_alloc(size_t alloc_size); extern void *mpfr_mem_realloc(void *ptr, size_t old_size, size_t new_size); extern void mpfr_mem_free(void *ptr, size_t size); #endif +extern _Noreturn void integer_overflow(void); +extern uintmax_t get_number_uj(NODE *); +extern long get_number_si(NODE *); /* msg.c */ extern _Noreturn void gawk_exit(int status); extern _Noreturn void final_exit(int status) ATTRIBUTE_NORETURN; diff --git a/builtin.c b/builtin.c index 04fac763..b2fc6a3a 100644 --- a/builtin.c +++ b/builtin.c @@ -39,28 +39,6 @@ # define CHAR_BIT 8 #endif -/* The extra casts work around common compiler bugs. */ -#define TYPE_SIGNED(t) (! ((t) 0 < (t) -1)) -/* Note: these assume that negative integers are represented internally - via 2's complement, which is not mandated by C. They also ignore the - fact that signed integer arithmetic overflow can trigger exceptions, - unlike unsigned which is guaranteed not to do so. */ -#define TYPE_MINIMUM(t) ((t) (TYPE_SIGNED (t) \ - ? ~ (uintmax_t) 0 << (sizeof (t) * CHAR_BIT - 1) \ - : 0)) -#define TYPE_MAXIMUM(t) ((t) (~ (t) 0 - TYPE_MINIMUM (t))) - -#ifndef INTMAX_MIN -# define INTMAX_MIN TYPE_MINIMUM (intmax_t) -#endif -#ifndef UINTMAX_MAX -# define UINTMAX_MAX TYPE_MAXIMUM (uintmax_t) -#endif - -#ifndef SIZE_MAX /* C99 constant, can't rely on it everywhere */ -#define SIZE_MAX ((size_t) -1) -#endif - #define DEFAULT_G_PRECISION 6 static size_t mbc_byte_count(const char *ptr, size_t numchars); @@ -707,7 +685,7 @@ format_tree( const char *fmt_string, size_t n0, NODE **the_args, - long num_args) + int num_args) { /* copy 'l' bytes from 's' to 'obufout' checking for space in the process */ /* difference of pointers should be of ptrdiff_t type, but let us be kind */ @@ -747,7 +725,7 @@ format_tree( osiz += delta; \ } - size_t cur_arg = 0; + int cur_arg = 0; NODE *r = NULL; int i, nc; bool toofew = false; @@ -757,10 +735,11 @@ format_tree( const char *s0, *s1; int cs1; NODE *arg; - long fw, prec, argnum; + int fw, prec, argnum; bool used_dollar; bool lj, alt, have_prec, need_format; - long *cur = NULL; + int *cur = NULL; + long lval; uintmax_t uval; bool sgn; int base; @@ -1000,7 +979,7 @@ check_pos: goto out; } if (argnum >= num_args) { - msg(_("fatal: argument index %ld greater than total number of supplied arguments"), argnum); + msg(_("fatal: argument index %d greater than total number of supplied arguments"), argnum); goto out; } } else { @@ -1041,9 +1020,12 @@ check_pos: parse_next_arg(); } (void) force_number(arg); - *cur = get_number_si(arg); + lval = get_number_si(arg); + if (INT_ADD_WRAPV(lval, 0, cur)) + integer_overflow(); if (*cur < 0 && cur == &fw) { - *cur = -*cur; + if (INT_SUBTRACT_WRAPV(0, *cur, cur)) + integer_overflow(); lj = true; } if (cur == &prec) { @@ -1133,7 +1115,7 @@ check_pos: memset(& mbs, 0, sizeof(mbs)); /* handle systems with too small wchar_t */ - if (sizeof(wchar_t) < 4 && uval > 0xffff) { + if (WCHAR_MAX < uval) { if (do_lint) lintwarn( _("[s]printf: value %g is too big for %%c format"), @@ -1629,21 +1611,21 @@ mpf1: case MP_INT_WITH_PREC: sprintf(cp, "*.*Z%c", cs1); while ((nc = mpfr_snprintf(obufout, ofre, cpbuf, - (int) fw, (int) prec, zi)) >= (int) ofre) + fw, prec, zi)) >= (int) ofre) chksize(nc) need_to_add_thousands = true; break; case MP_INT_WITHOUT_PREC: sprintf(cp, "*Z%c", cs1); while ((nc = mpfr_snprintf(obufout, ofre, cpbuf, - (int) fw, zi)) >= (int) ofre) + fw, zi)) >= (int) ofre) chksize(nc) need_to_add_thousands = true; break; case MP_FLOAT: sprintf(cp, "*.*R*%c", cs1); while ((nc = mpfr_snprintf(obufout, ofre, cpbuf, - (int) fw, (int) prec, ROUND_MODE, mf)) >= (int) ofre) + fw, prec, ROUND_MODE, mf)) >= (int) ofre) chksize(nc) break; #endif @@ -1651,7 +1633,7 @@ mpf1: if (have_prec || tolower(cs1) != 'a') { sprintf(cp, "*.*%c", cs1); while ((nc = snprintf(obufout, ofre, cpbuf, - (int) fw, (int) prec, + fw, prec, (double) tmpval)) >= (int) ofre) chksize(nc) } else { @@ -1659,7 +1641,7 @@ mpf1: // wasn't supplied by the user. sprintf(cp, "*%c", cs1); while ((nc = snprintf(obufout, ofre, cpbuf, - (int) fw, + fw, (double) tmpval)) >= (int) ofre) chksize(nc) } @@ -2776,7 +2758,7 @@ do_match(int nargs) dest = POP_PARAM(); if (dest->type != Node_var_array) fatal(_("match: third argument is not an array")); - check_symtab_functab(dest, "match", + check_symtab_functab(dest, "match", _("%s: cannot use %s as third argument")); assoc_clear(dest); } diff --git a/mpfr.c b/mpfr.c index 21bac6cd..3fde64e3 100644 --- a/mpfr.c +++ b/mpfr.c @@ -2015,3 +2015,101 @@ mpfr_unset(NODE *n) /* dummy function */ } #endif + + +#ifdef HAVE_MPFR + +/* The following is taken from GNU Emacs. */ + +/* Number of data bits in a limb. */ +# ifndef GMP_NUMB_BITS +enum { GMP_NUMB_BITS = TYPE_WIDTH (mp_limb_t) }; +# endif + +/* If Z fits into *PI, store its value there and return true. + Return false otherwise. */ +static bool +mpz_to_uintmax (mpz_t const z, uintmax_t *pi) +{ + if (mpz_sgn (z) < 0) + return false; + ptrdiff_t bits = mpz_sizeinbase (z, 2); + if (UINTMAX_WIDTH < bits) + return false; + + uintmax_t v = 0; + int i = 0, shift = 0; + + do + { + uintmax_t limb = mpz_getlimbn (z, i++); + v += limb << shift; + shift += GMP_NUMB_BITS; + } + while (shift < bits); + + *pi = v; + return true; +} +#endif + +/* integer_overflow --- report a fatal integer overflow */ + +void +integer_overflow(void) +{ + fatal(_("integer overflow")); +} + +/* get_number_si --- get a long */ + +long +get_number_si(NODE *n) +{ + double lo = LONG_MIN, hi = LONG_MAX; +#ifdef HAVE_MPFR + if (n->flags & (MPFN | MPZN)) { + if (n->flags & MPFN) { + long r; + mpfr_clear_erangeflag (); + r = mpfr_get_si(n->mpg_numbr, ROUND_MODE); + if (mpfr_erangeflag_p()) + integer_overflow(); + return r; + } else { + if (! mpz_fits_slong_p(n->mpg_i)) + integer_overflow(); + return mpz_get_si(n->mpg_i); + } + } +#endif + if (! (lo - 1 < n->numbr && n->numbr < hi + 1)) + integer_overflow(); + return n->numbr; +} + +/* get_number_uj --- get an uintmax_t */ + +uintmax_t +get_number_uj(NODE *n) +{ + double hi = UINTMAX_MAX; +#ifdef HAVE_MPFR + if (n->flags & (MPFN | MPZN)) { + uintmax_t r; + if (n->flags & MPFN) { + mpfr_clear_erangeflag (); + r = mpfr_get_uj(n->mpg_numbr, ROUND_MODE); + if (mpfr_erangeflag_p()) + integer_overflow(); + } else { + if (! mpz_to_uintmax(n->mpg_i, &r)) + integer_overflow(); + } + return r; + } +#endif + if (! (-1 < n->numbr && n->numbr < hi + 1)) + integer_overflow(); + return n->numbr; +} -- 2.37.1