gawk-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[SCM] gawk branch, stable/printf-rework, updated. gawk-4.1.0-5490-g8d181


From: Arnold Robbins
Subject: [SCM] gawk branch, stable/printf-rework, updated. gawk-4.1.0-5490-g8d18169d
Date: Tue, 2 Jul 2024 14:50:35 -0400 (EDT)

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "gawk".

The branch, stable/printf-rework has been updated
       via  8d18169d7124cee926d1755e64c6eb5ae3edef20 (commit)
      from  95fc5822f3f0951df7371000e6cf255df0138643 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.sv.gnu.org/cgit/gawk.git/commit/?id=8d18169d7124cee926d1755e64c6eb5ae3edef20

commit 8d18169d7124cee926d1755e64c6eb5ae3edef20
Author: Arnold D. Robbins <arnold@skeeve.com>
Date:   Tue Jul 2 21:50:10 2024 +0300

    Start refactoring format_tree.

diff --git a/ChangeLog b/ChangeLog
index 6847f98d..9108e64e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,18 @@
+2024-07-02         Arnold D. Robbins     <arnold@skeeve.com>
+
+       * awk.h (format_tree_new, format_tree_old): Add declarations.
+       (format_tree): Temporarily make into a function.
+       * main.c (format_tree): Declare it.
+       (main): Check PRINTF_NEW environment variable and if so,
+       use the new version. Otherwise use the old one.
+       * printf.c (struct flags): Declare.
+       (format_tree_old): The original version.
+       (format_tree_new): The new one, being refactored.
+       (add_thousands): Use the global `loc' variable, not a parameter.
+       static const char *format_integer(NODE *arg, struct flags *flags);
+       (format_mpg_integer, format_float, format_out_of_range,
+       compute_zero_flag): New functions.
+
 2024-07-02         Arnold D. Robbins     <arnold@skeeve.com>
 
        * re.c (make_regexp): \u escapes also now treated literally
diff --git a/awk.h b/awk.h
index 490fbbc8..fb1a74a7 100644
--- a/awk.h
+++ b/awk.h
@@ -1516,7 +1516,9 @@ extern NODE *do_sub(int nargs, unsigned int flags);
 extern NODE *call_sub(const char *name, int nargs);
 extern NODE *call_match(int nargs);
 extern NODE *call_split_func(const char *name, int nargs);
-extern NODE *format_tree(const char *, size_t, NODE **, long);
+extern NODE *format_tree_old(const char *, size_t, NODE **, long);
+extern NODE *format_tree_new(const char *, size_t, NODE **, long);
+extern NODE *(*format_tree)(const char *, size_t, NODE **, long);
 extern NODE *do_lshift(int nargs);
 extern NODE *do_rshift(int nargs);
 extern NODE *do_and(int nargs);
diff --git a/main.c b/main.c
index 4ef0208d..f71d3584 100644
--- a/main.c
+++ b/main.c
@@ -161,6 +161,8 @@ GETGROUPS_T *groupset;              /* current group set */
 int ngroups;                   /* size of said set */
 #endif
 
+NODE *(*format_tree)(const char *, size_t, NODE **, long);
+
 void (*lintfunc)(const char *mesg, ...) = r_warning;
 
 /* Sorted by long option name! */
@@ -218,6 +220,11 @@ main(int argc, char **argv)
        const char *initial_locale;
 #endif
 
+       if (getenv("PRINTF_NEW") != NULL)
+               format_tree = format_tree_new;
+       else
+               format_tree = format_tree_old;
+
        myname = gawk_name(argv[0]);
 
        check_pma_security(persist_file);
diff --git a/printf.c b/printf.c
index 31910f35..dadca64e 100644
--- a/printf.c
+++ b/printf.c
@@ -27,6 +27,21 @@
 
 #include "awk.h"
 
+struct flags {
+       bool left_just;
+       bool alt;
+       bool zero;
+       bool space;
+       bool plus;
+       bool quote;
+       bool have_prec;
+       bool magic_posix_flag;
+       char format;
+       int base;
+       int field_width;
+       int precision;
+};
+
 extern int max_args;
 extern NODE **args_array;
 extern FILE *output_fp;
@@ -36,7 +51,12 @@ extern FILE *output_fp;
 static size_t mbc_byte_count(const char *ptr, size_t numchars);
 static size_t mbc_char_count(const char *ptr, size_t numbytes);
 static void reverse(char *str);
-static const char *add_thousands(const char *original, struct lconv *loc);
+static const char *add_thousands(const char *original);
+static const char *format_integer(NODE *arg, struct flags *flags);
+static const char *format_mpg_integer(NODE *arg, struct flags *flags);
+static const char *format_float(NODE *arg, struct flags *flags);
+static const char *format_out_of_range(NODE *arg, struct flags *flags);
+static bool compute_zero_flag(struct flags *flags);
 
 #ifdef HAVE_MPFR
 
@@ -47,45 +67,1074 @@ static const char *add_thousands(const char *original, 
struct lconv *loc);
  */
 
 
-static mpfr_ptr
-mpz2mpfr(mpz_ptr zi)
-{
-       size_t prec;
-       static mpfr_t mpfrval;
-       static bool inited = false;
-       int tval;
+static mpfr_ptr
+mpz2mpfr(mpz_ptr zi)
+{
+       size_t prec;
+       static mpfr_t mpfrval;
+       static bool inited = false;
+       int tval;
+
+       /* estimate minimum precision for exact conversion */
+       prec = mpz_sizeinbase(zi, 2);   /* most significant 1 bit position 
starting at 1 */
+       prec -= (size_t) mpz_scan1(zi, 0);      /* least significant 1 bit 
index starting at 0 */
+       if (prec < MPFR_PREC_MIN)
+               prec = MPFR_PREC_MIN;
+       else if (prec > MPFR_PREC_MAX)
+               prec = MPFR_PREC_MAX;
+
+       if (! inited) {
+               mpfr_init2(mpfrval, prec);
+               inited = true;
+       } else
+               mpfr_set_prec(mpfrval, prec);
+       tval = mpfr_set_z(mpfrval, zi, ROUND_MODE);
+       IEEE_FMT(mpfrval, tval);
+       return mpfrval;
+}
+#endif
+
+/*
+ * format_tree() formats arguments of sprintf,
+ * and accordingly to a fmt_string providing a format like in
+ * printf family from C library.  Returns a string node which value
+ * is a formatted string.  Called by  sprintf function.
+ *
+ * It is one of the uglier parts of gawk.  Thanks to Michal Jaegermann
+ * for taming this beast and making it compatible with ANSI C.
+ */
+
+NODE *
+format_tree_old(
+       const char *fmt_string,
+       size_t n0,
+       NODE **the_args,
+       long num_args)
+{
+/* copy 'l' bytes from 's' to 'obufout' checking for space in the process */
+/* difference of pointers should be of ptrdiff_t type, but let us be kind */
+#define bchunk(s, l) if (l) { \
+       while ((l) > ofre) { \
+               size_t olen = obufout - obuf; \
+               erealloc(obuf, char *, osiz * 2, "format_tree"); \
+               ofre += osiz; \
+               osiz *= 2; \
+               obufout = obuf + olen; \
+       } \
+       memcpy(obufout, s, (size_t) (l)); \
+       obufout += (l); \
+       ofre -= (l); \
+}
+
+/* copy one byte from 's' to 'obufout' checking for space in the process */
+#define bchunk_one(s) { \
+       if (ofre < 1) { \
+               size_t olen = obufout - obuf; \
+               erealloc(obuf, char *, osiz * 2, "format_tree"); \
+               ofre += osiz; \
+               osiz *= 2; \
+               obufout = obuf + olen; \
+       } \
+       *obufout++ = *s; \
+       --ofre; \
+}
+
+/* Is there space for something L big in the buffer? */
+#define chksize(l)  if ((l) >= ofre) { \
+       size_t olen = obufout - obuf; \
+       size_t delta = osiz+l-ofre; \
+       erealloc(obuf, char *, osiz + delta, "format_tree"); \
+       obufout = obuf + olen; \
+       ofre += delta; \
+       osiz += delta; \
+}
+
+       size_t cur_arg = 0;
+       NODE *r = NULL;
+       int i, nc;
+       bool toofew = false;
+       char *obuf, *obufout;
+       size_t osiz, ofre, olen_final;
+       const char *chbuf;
+       const char *s0, *s1;
+       int cs1;
+       NODE *arg;
+       long fw, prec, argnum;
+       bool used_dollar;
+       bool lj, alt, have_prec, need_format;
+       long *cur = NULL;
+       uintmax_t uval;
+       bool sgn;
+       int base;
+       /*
+        * Although this is an array, the elements serve two different
+        * purposes. The first element is the general buffer meant
+        * to hold the entire result string.  The second one is a
+        * temporary buffer for large floating point values. They
+        * could just as easily be separate variables, and the
+        * code might arguably be clearer.
+        */
+       struct {
+               char *buf;
+               size_t bufsize;
+               char stackbuf[30];
+       } cpbufs[2];
+#define cpbuf  cpbufs[0].buf
+       char *cend = &cpbufs[0].stackbuf[sizeof(cpbufs[0].stackbuf)];
+       char *cp;
+       const char *fill;
+       AWKNUM tmpval = 0.0;
+       char signchar = '\0';
+       size_t len;
+       bool zero_flag = false;
+       bool quote_flag = false;
+       int ii, jj;
+       char *chp;
+       size_t copy_count, char_count;
+       char *nan_inf_val;
+       bool magic_posix_flag;
+#ifdef HAVE_MPFR
+       mpz_ptr zi;
+       mpfr_ptr mf;
+#endif
+       enum { MP_NONE = 0, MP_INT_WITH_PREC = 1, MP_INT_WITHOUT_PREC, MP_FLOAT 
} fmt_type;
+
+       static const char sp[] = " ";
+       static const char zero_string[] = "0";
+       static const char lchbuf[] = "0123456789abcdef";
+       static const char Uchbuf[] = "0123456789ABCDEF";
+       static const char bad_modifiers[] = "hjlLtz";
+       static bool warned[sizeof(bad_modifiers)-1];    // auto-init to zero
+
+       bool modifier_seen[sizeof(bad_modifiers)-1];
+#define modifier_index(c)  (strchr(bad_modifiers, c) - bad_modifiers)
+
+#define INITIAL_OUT_SIZE       64
+       emalloc(obuf, char *, INITIAL_OUT_SIZE, "format_tree");
+       obufout = obuf;
+       osiz = INITIAL_OUT_SIZE;
+       ofre = osiz - 1;
+
+       cur_arg = 1;
+
+       {
+               size_t k;
+               for (k = 0; k < sizeof(cpbufs)/sizeof(cpbufs[0]); k++) {
+                       cpbufs[k].bufsize = sizeof(cpbufs[k].stackbuf);
+                       cpbufs[k].buf = cpbufs[k].stackbuf;
+               }
+       }
+
+       /*
+        * The point of this goop is to grow the buffer
+        * holding the converted number, so that large
+        * values don't overflow a fixed length buffer.
+        */
+#define PREPEND(CH) do {       \
+       if (cp == cpbufs[0].buf) {      \
+               char *prev = cpbufs[0].buf;     \
+               emalloc(cpbufs[0].buf, char *, 2*cpbufs[0].bufsize, \
+                       "format_tree"); \
+               memcpy((cp = cpbufs[0].buf+cpbufs[0].bufsize), prev,    \
+                      cpbufs[0].bufsize);      \
+               cpbufs[0].bufsize *= 2; \
+               if (prev != cpbufs[0].stackbuf) \
+                       efree(prev);    \
+               cend = cpbufs[0].buf+cpbufs[0].bufsize; \
+       }       \
+       *--cp = (CH);   \
+} while(0)
+
+       /*
+        * Check first for use of `count$'.
+        * If plain argument retrieval was used earlier, choke.
+        *      Otherwise, return the requested argument.
+        * If not `count$' now, but it was used earlier, choke.
+        * If this format is more than total number of args, choke.
+        * Otherwise, return the current argument.
+        */
+#define parse_next_arg() { \
+       if (argnum > 0) { \
+               if (cur_arg > 1) { \
+                       msg(_("fatal: must use `count$' on all formats or 
none")); \
+                       goto out; \
+               } \
+               arg = the_args[argnum]; \
+       } else if (used_dollar) { \
+               msg(_("fatal: must use `count$' on all formats or none")); \
+               arg = 0; /* shutup the compiler */ \
+               goto out; \
+       } else if (cur_arg >= num_args) { \
+               arg = 0; /* shutup the compiler */ \
+               toofew = true; \
+               break; \
+       } else { \
+               arg = the_args[cur_arg]; \
+               cur_arg++; \
+       } \
+}
+
+       need_format = false;
+       used_dollar = false;
+
+       s0 = s1 = fmt_string;
+       while (n0-- > 0) {
+               if (*s1 != '%') {
+                       s1++;
+                       continue;
+               }
+               need_format = true;
+               bchunk(s0, s1 - s0);
+               s0 = s1;
+               cur = &fw;
+               fw = 0;
+               prec = 0;
+               base = 0;
+               argnum = 0;
+               base = 0;
+               have_prec = false;
+               signchar = '\0';
+               zero_flag = false;
+               quote_flag = false;
+               nan_inf_val = NULL;
+#ifdef HAVE_MPFR
+               mf = NULL;
+               zi = NULL;
+#endif
+               fmt_type = MP_NONE;
+
+               lj = alt = false;
+               memset(modifier_seen, 0, sizeof(modifier_seen));
+               magic_posix_flag = false;
+               fill = sp;
+               cp = cend;
+               chbuf = lchbuf;
+               s1++;
+
+retry:
+               if (n0-- == 0)  /* ran out early! */
+                       break;
+
+               switch (cs1 = *s1++) {
+               case (-1):      /* dummy case to allow for checking */
+check_pos:
+                       if (cur != &fw)
+                               break;          /* reject as a valid format */
+                       goto retry;
+               case '%':
+                       need_format = false;
+                       /*
+                        * 29 Oct. 2002:
+                        * The C99 standard pages 274 and 279 seem to imply that
+                        * since there's no arg converted, the field width 
doesn't
+                        * apply.  The code already was that way, but this
+                        * comment documents it, at least in the code.
+                        */
+                       if (do_lint) {
+                               const char *msg = NULL;
+
+                               if (fw && ! have_prec)
+                                       msg = _("field width is ignored for 
`%%' specifier");
+                               else if (fw == 0 && have_prec)
+                                       msg = _("precision is ignored for `%%' 
specifier");
+                               else if (fw && have_prec)
+                                       msg = _("field width and precision are 
ignored for `%%' specifier");
+
+                               if (msg != NULL)
+                                       lintwarn("%s", msg);
+                       }
+                       bchunk_one("%");
+                       s0 = s1;
+                       break;
+
+               case '0':
+                       /*
+                        * Only turn on zero_flag if we haven't seen
+                        * the field width or precision yet.  Otherwise,
+                        * screws up floating point formatting.
+                        */
+                       if (cur == & fw)
+                               zero_flag = true;
+                       if (lj)
+                               goto retry;
+                       /* fall through */
+               case '1':
+               case '2':
+               case '3':
+               case '4':
+               case '5':
+               case '6':
+               case '7':
+               case '8':
+               case '9':
+                       if (cur == NULL)
+                               break;
+                       if (prec >= 0)
+                               *cur = cs1 - '0';
+                       /*
+                        * with a negative precision *cur is already set
+                        * to -1, so it will remain negative, but we have
+                        * to "eat" precision digits in any case
+                        */
+                       while (n0 > 0 && *s1 >= '0' && *s1 <= '9') {
+                               --n0;
+                               *cur = *cur * 10 + *s1++ - '0';
+                       }
+                       if (prec < 0)   /* negative precision is discarded */
+                               have_prec = false;
+                       if (cur == &prec)
+                               cur = NULL;
+                       if (n0 == 0)    /* badly formatted control string */
+                               continue;
+                       goto retry;
+               case '$':
+                       if (do_traditional) {
+                               msg(_("fatal: `$' is not permitted in awk 
formats"));
+                               goto out;
+                       }
+
+                       if (cur == &fw) {
+                               argnum = fw;
+                               fw = 0;
+                               used_dollar = true;
+                               if (argnum <= 0) {
+                                       msg(_("fatal: argument index with `$' 
must be > 0"));
+                                       goto out;
+                               }
+                               if (argnum >= num_args) {
+                                       msg(_("fatal: argument index %ld 
greater than total number of supplied arguments"), argnum);
+                                       goto out;
+                               }
+                       } else {
+                               msg(_("fatal: `$' not permitted after period in 
format"));
+                               goto out;
+                       }
+
+                       goto retry;
+               case '*':
+                       if (cur == NULL)
+                               break;
+                       if (! do_traditional && used_dollar && ! 
isdigit((unsigned char) *s1)) {
+                               fatal(_("fatal: must use `count$' on all 
formats or none"));
+                               break;  /* silence warnings */
+                       } else if (! do_traditional && isdigit((unsigned char) 
*s1)) {
+                               int val = 0;
+
+                               for (; n0 > 0 && *s1 && isdigit((unsigned char) 
*s1); s1++, n0--) {
+                                       val *= 10;
+                                       val += *s1 - '0';
+                               }
+                               if (*s1 != '$') {
+                                       msg(_("fatal: no `$' supplied for 
positional field width or precision"));
+                                       goto out;
+                               } else {
+                                       s1++;
+                                       n0--;
+                               }
+                               // val could be less than zero if someone 
provides a field width
+                               // so large that it causes integer overflow. 
Mainly fuzzers do this,
+                               // but let's try to be good anyway.
+                               if (val < 0 || val >= num_args) {
+                                       toofew = true;
+                                       break;
+                               }
+                               arg = the_args[val];
+                       } else {
+                               parse_next_arg();
+                       }
+                       (void) force_number(arg);
+                       *cur = get_number_si(arg);
+                       if (*cur < 0 && cur == &fw) {
+                               *cur = -*cur;
+                               lj = true;
+                       }
+                       if (cur == &prec) {
+                               if (*cur >= 0)
+                                       have_prec = true;
+                               else
+                                       have_prec = false;
+                               cur = NULL;
+                       }
+                       goto retry;
+               case ' ':               /* print ' ' or '-' */
+                                       /* 'space' flag is ignored */
+                                       /* if '+' already present  */
+                       if (signchar != false)
+                               goto check_pos;
+                       /* FALL THROUGH */
+               case '+':               /* print '+' or '-' */
+                       signchar = cs1;
+                       goto check_pos;
+               case '-':
+                       if (prec < 0)
+                               break;
+                       if (cur == &prec) {
+                               prec = -1;
+                               goto retry;
+                       }
+                       fill = sp;      /* if left justified then other */
+                       lj = true;      /* filling is ignored */
+                       goto check_pos;
+               case '.':
+                       if (cur != &fw)
+                               break;
+                       cur = &prec;
+                       have_prec = true;
+                       goto retry;
+               case '#':
+                       alt = true;
+                       goto check_pos;
+               case '\'':
+#if defined(HAVE_LOCALE_H)
+                       quote_flag = true;
+                       goto check_pos;
+#else
+                       goto retry;
+#endif
+               case 'h':
+               case 'j':
+               case 'l':
+               case 'L':
+               case 't':
+               case 'z':
+                       if (modifier_seen[modifier_index(cs1)])
+                               break;
+                       else {
+                               int ind = modifier_index(cs1);
+
+                               if (do_lint && ! warned[ind]) {
+                                       lintwarn(_("`%c' is meaningless in awk 
formats; ignored"), cs1);
+                                       warned[ind] = true;
+                               }
+                               if (do_posix) {
+                                       msg(_("fatal: `%c' is not permitted in 
POSIX awk formats"), cs1);
+                                       goto out;
+                               }
+                       }
+                       modifier_seen[modifier_index(cs1)] = true;
+                       goto retry;
+
+               case 'P':
+                       if (magic_posix_flag)
+                               break;
+                       magic_posix_flag = true;
+                       goto retry;
+               case 'c':
+                       need_format = false;
+                       parse_next_arg();
+                       /* user input that looks numeric is numeric */
+                       fixtype(arg);
+                       if ((arg->flags & NUMBER) != 0) {
+                               uval = get_number_uj(arg);
+                               if (gawk_mb_cur_max > 1) {
+                                       char buf[100];
+                                       wchar_t wc;
+                                       mbstate_t mbs;
+                                       size_t count;
+
+                                       memset(& mbs, 0, sizeof(mbs));
+
+                                       /* handle systems with too small 
wchar_t */
+                                       if (sizeof(wchar_t) < 4 && uval > 
0xffff) {
+                                               if (do_lint)
+                                                       lintwarn(
+                                               _("[s]printf: value %g is too 
big for %%c format"),
+                                                                       
arg->numbr);
+
+                                               goto out0;
+                                       }
+
+                                       wc = uval;
+
+                                       count = wcrtomb(buf, wc, & mbs);
+                                       if (count == 0
+                                           || count == (size_t) -1) {
+                                               if (do_lint)
+                                                       lintwarn(
+                                               _("[s]printf: value %g is not a 
valid wide character"),
+                                                                       
arg->numbr);
+
+                                               goto out0;
+                                       }
+
+                                       memcpy(cpbuf, buf, count);
+                                       prec = count;
+                                       cp = cpbuf;
+                                       goto pr_tail;
+                               }
+out0:
+                               ;
+                               /* else,
+                                       fall through */
+
+                               cpbuf[0] = uval;
+                               prec = 1;
+                               cp = cpbuf;
+                               goto pr_tail;
+                       }
+                       /*
+                        * As per POSIX, only output first character of a
+                        * string value.  Thus, we ignore any provided
+                        * precision, forcing it to 1.  (Didn't this
+                        * used to work? 6/2003.)
+                        */
+                       cp = arg->stptr;
+                       prec = 1;
+                       /*
+                        * First character can be multiple bytes if
+                        * it's a multibyte character. Grr.
+                        */
+                       if (gawk_mb_cur_max > 1) {
+                               mbstate_t state;
+                               size_t count;
+
+                               memset(& state, 0, sizeof(state));
+                               count = mbrlen(cp, arg->stlen, & state);
+                               if (count != (size_t) -1 && count != (size_t) 
-2 && count > 0) {
+                                       prec = count;
+                                       /* may need to increase fw so that 
padding happens, see pr_tail code */
+                                       if (fw > 0)
+                                               fw += count - 1;
+                               }
+                       }
+                       goto pr_tail;
+               case 's':
+                       need_format = false;
+                       parse_next_arg();
+                       arg = force_string(arg);
+                       if (fw == 0 && ! have_prec)
+                               prec = arg->stlen;
+                       else {
+                               char_count = mbc_char_count(arg->stptr, 
arg->stlen);
+                               if (! have_prec || prec > char_count)
+                                       prec = char_count;
+                       }
+                       cp = arg->stptr;
+                       goto pr_tail;
+               case 'd':
+               case 'i':
+                       need_format = false;
+                       parse_next_arg();
+                       (void) force_number(arg);
+
+                       /*
+                        * Check for Nan or Inf.
+                        */
+                       if (out_of_range(arg))
+                               goto out_of_range;
+#ifdef HAVE_MPFR
+                       if (is_mpg_float(arg))
+                               goto mpf0;
+                       else if (is_mpg_integer(arg))
+                               goto mpz0;
+                       else
+#endif
+                       tmpval = double_to_int(arg->numbr);
+
+                       /*
+                        * ``The result of converting a zero value with a
+                        * precision of zero is no characters.''
+                        */
+                       if (have_prec && prec == 0 && tmpval == 0)
+                               goto pr_tail;
+
+                       if (tmpval < 0) {
+                               tmpval = -tmpval;
+                               sgn = true;
+                       } else {
+                               if (tmpval == -0.0)
+                                       /* avoid printing -0 */
+                                       tmpval = 0.0;
+                               sgn = false;
+                       }
+                       /*
+                        * Use snprintf return value to tell if there
+                        * is enough room in the buffer or not.
+                        */
+                       while ((i = snprintf(cpbufs[1].buf,
+                                            cpbufs[1].bufsize, "%.0f",
+                                            tmpval)) >=
+                              cpbufs[1].bufsize) {
+                               if (cpbufs[1].buf == cpbufs[1].stackbuf)
+                                       cpbufs[1].buf = NULL;
+                               if (i > 0) {
+                                       cpbufs[1].bufsize += ((i > 
cpbufs[1].bufsize) ?
+                                                             i : 
cpbufs[1].bufsize);
+                               }
+                               else
+                                       cpbufs[1].bufsize *= 2;
+                               assert(cpbufs[1].bufsize > 0);
+                               erealloc(cpbufs[1].buf, char *,
+                                        cpbufs[1].bufsize, "format_tree");
+                       }
+                       if (i < 1)
+                               goto out_of_range;
+#if defined(HAVE_LOCALE_H)
+                       quote_flag = (quote_flag && loc.thousands_sep[0] != 0);
+#endif
+                       chp = &cpbufs[1].buf[i-1];
+                       ii = jj = 0;
+                       do {
+                               PREPEND(*chp);
+                               chp--; i--;
+#if defined(HAVE_LOCALE_H)
+                               if (quote_flag && loc.grouping[ii] && ++jj == 
loc.grouping[ii]) {
+                                       if (i) {        /* only add if more 
digits coming */
+                                               int k;
+                                               const char *ts = 
loc.thousands_sep;
+
+                                               for (k = strlen(ts) - 1; k >= 
0; k--) {
+                                                       PREPEND(ts[k]);
+                                               }
+                                       }
+                                       if (loc.grouping[ii+1] == 0)
+                                               jj = 0;         /* keep using 
current val in loc.grouping[ii] */
+                                       else if (loc.grouping[ii+1] == CHAR_MAX)
+                                               quote_flag = false;
+                                       else {
+                                               ii++;
+                                               jj = 0;
+                                       }
+                               }
+#endif
+                       } while (i > 0);
+
+                       /* add more output digits to match the precision */
+                       if (have_prec) {
+                               while (cend - cp < prec)
+                                       PREPEND('0');
+                       }
+
+                       if (sgn)
+                               PREPEND('-');
+                       else if (signchar)
+                               PREPEND(signchar);
+                       /*
+                        * When to fill with zeroes is of course not simple.
+                        * First: No zero fill if left-justifying.
+                        * Next: There seem to be two cases:
+                        *      A '0' without a precision, e.g. %06d
+                        *      A precision with no field width, e.g. %.10d
+                        * Any other case, we don't want to fill with zeroes.
+                        */
+                       if (! lj
+                           && ((zero_flag && ! have_prec)
+                                || (fw == 0 && have_prec)))
+                               fill = zero_string;
+                       if (prec > fw)
+                               fw = prec;
+                       prec = cend - cp;
+                       if (fw > prec && ! lj && fill != sp
+                           && (*cp == '-' || signchar)) {
+                               bchunk_one(cp);
+                               cp++;
+                               prec--;
+                               fw--;
+                       }
+                       goto pr_tail;
+               case 'X':
+                       chbuf = Uchbuf; /* FALL THROUGH */
+               case 'x':
+                       base += 6;      /* FALL THROUGH */
+               case 'u':
+                       base += 2;      /* FALL THROUGH */
+               case 'o':
+                       base += 8;
+                       need_format = false;
+                       parse_next_arg();
+                       (void) force_number(arg);
+
+                       if (out_of_range(arg))
+                               goto out_of_range;
+#ifdef HAVE_MPFR
+                       if (is_mpg_integer(arg)) {
+mpz0:
+                               zi = arg->mpg_i;
+
+                               if (cs1 != 'd' && cs1 != 'i') {
+                                       if (mpz_sgn(zi) <= 0) {
+                                               /*
+                                                * Negative value or 0 requires 
special handling.
+                                                * Unlike MPFR, GMP does not 
allow conversion
+                                                * to (u)intmax_t. So we first 
convert GMP type to
+                                                * a MPFR type.
+                                                */
+                                               mf = mpz2mpfr(zi);
+                                               goto mpf1;
+                                       }
+                                       signchar = '\0';        /* Don't print 
'+' */
+                               }
+
+                               /* See comments above about when to fill with 
zeros */
+                               zero_flag = (! lj
+                                                   && ((zero_flag && ! 
have_prec)
+                                                        || (fw == 0 && 
have_prec)));
+
+                               fmt_type = have_prec ? MP_INT_WITH_PREC : 
MP_INT_WITHOUT_PREC;
+                               goto fmt0;
+
+                       } else if (is_mpg_float(arg)) {
+mpf0:
+                               mf = arg->mpg_numbr;
+                               if (! mpfr_number_p(mf)) {
+                                       /* inf or NaN */
+                                       cs1 = 'g';
+                                       fmt_type = MP_FLOAT;
+                                       goto fmt1;
+                               }
+
+                               if (cs1 != 'd' && cs1 != 'i') {
+mpf1:
+                                       /*
+                                        * The output of printf("%#.0x", 0) is 
0 instead of 0x, hence <= in
+                                        * the comparison below.
+                                        */
+                                       if (mpfr_sgn(mf) <= 0) {
+                                               if (! mpfr_fits_intmax_p(mf, 
ROUND_MODE)) {
+                                                       /* -ve number is too 
large */
+                                                       cs1 = 'g';
+                                                       fmt_type = MP_FLOAT;
+                                                       goto fmt1;
+                                               }
+
+                                               tmpval = uval = (uintmax_t) 
mpfr_get_sj(mf, ROUND_MODE);
+                                               if (! alt && have_prec && prec 
== 0 && tmpval == 0)
+                                                       goto pr_tail;   /* 
printf("%.0x", 0) is no characters */
+                                               goto int0;
+                                       }
+                                       signchar = '\0';        /* Don't print 
'+' */
+                               }
+
+                               /* See comments above about when to fill with 
zeros */
+                               zero_flag = (! lj
+                                                   && ((zero_flag && ! 
have_prec)
+                                                        || (fw == 0 && 
have_prec)));
+
+                               (void) mpfr_get_z(mpzval, mf, MPFR_RNDZ);       
/* convert to GMP integer */
+                               fmt_type = have_prec ? MP_INT_WITH_PREC : 
MP_INT_WITHOUT_PREC;
+                               zi = mpzval;
+                               goto fmt0;
+                       } else
+#endif
+                               tmpval = arg->numbr;
+
+                       /*
+                        * ``The result of converting a zero value with a
+                        * precision of zero is no characters.''
+                        *
+                        * If I remember the ANSI C standard, though,
+                        * it says that for octal conversions
+                        * the precision is artificially increased
+                        * to add an extra 0 if # is supplied.
+                        * Indeed, in C,
+                        *      printf("%#.0o\n", 0);
+                        * prints a single 0.
+                        */
+                       if (! alt && have_prec && prec == 0 && tmpval == 0)
+                               goto pr_tail;
+
+                       if (tmpval < 0) {
+                               uval = (uintmax_t) (intmax_t) tmpval;
+                               if ((AWKNUM)(intmax_t)uval != 
double_to_int(tmpval))
+                                       goto out_of_range;
+                       } else {
+                               uval = (uintmax_t) tmpval;
+                               if ((AWKNUM)uval != double_to_int(tmpval))
+                                       goto out_of_range;
+                       }
+#ifdef HAVE_MPFR
+       int0:
+#endif
+#if defined(HAVE_LOCALE_H)
+                       quote_flag = (quote_flag && loc.thousands_sep[0] != 0);
+#endif
+                       /*
+                        * When to fill with zeroes is of course not simple.
+                        * First: No zero fill if left-justifying.
+                        * Next: There seem to be two cases:
+                        *      A '0' without a precision, e.g. %06d
+                        *      A precision with no field width, e.g. %.10d
+                        * Any other case, we don't want to fill with zeroes.
+                        */
+                       if (! lj
+                           && ((zero_flag && ! have_prec)
+                                || (fw == 0 && have_prec)))
+                               fill = zero_string;
+                       ii = jj = 0;
+                       do {
+                               PREPEND(chbuf[uval % base]);
+                               uval /= base;
+#if defined(HAVE_LOCALE_H)
+                               if (base == 10 && quote_flag && 
loc.grouping[ii] && ++jj == loc.grouping[ii]) {
+                                       if (uval) {     /* only add if more 
digits coming */
+                                               int k;
+                                               const char *ts = 
loc.thousands_sep;
+
+                                               for (k = strlen(ts) - 1; k >= 
0; k--) {
+                                                       PREPEND(ts[k]);
+                                               }
+                                       }
+                                       if (loc.grouping[ii+1] == 0)
+                                               jj = 0;     /* keep using 
current val in loc.grouping[ii] */
+                                       else if (loc.grouping[ii+1] == CHAR_MAX)
+                                               quote_flag = false;
+                                       else {
+                                               ii++;
+                                               jj = 0;
+                                       }
+                               }
+#endif
+                       } while (uval > 0);
+
+                       /* add more output digits to match the precision */
+                       if (have_prec) {
+                               while (cend - cp < prec)
+                                       PREPEND('0');
+                       }
+
+                       if (alt && tmpval != 0) {
+                               if (base == 16) {
+                                       PREPEND(cs1);
+                                       PREPEND('0');
+                                       if (fill != sp) {
+                                               bchunk(cp, 2);
+                                               cp += 2;
+                                               fw -= 2;
+                                       }
+                               } else if (base == 8)
+                                       PREPEND('0');
+                       }
+                       base = 0;
+                       if (prec > fw)
+                               fw = prec;
+                       prec = cend - cp;
+       pr_tail:
+                       if (! lj) {
+                               while (fw > prec) {
+                                       bchunk_one(fill);
+                                       fw--;
+                               }
+                       }
+                       copy_count = prec;
+                       if (fw == 0 && ! have_prec)
+                               ;
+                       else if (gawk_mb_cur_max > 1) {
+                               if (cs1 == 's') {
+                                       assert(cp == arg->stptr || cp == cpbuf);
+                                       copy_count = mbc_byte_count(arg->stptr, 
prec);
+                               }
+                               /* prec was set by code for %c */
+                               /* else
+                                       copy_count = prec; */
+                       }
+                       bchunk(cp, copy_count);
+                       while (fw > prec) {
+                               bchunk_one(fill);
+                               fw--;
+                       }
+                       s0 = s1;
+                       break;
+
+     out_of_range:
+                       /*
+                        * out of range - emergency use of %g format,
+                        * or format NaN and INF values.
+                        */
+                       nan_inf_val = format_nan_inf(arg, cs1);
+                       if (do_posix || magic_posix_flag || nan_inf_val == 
NULL) {
+                               if (do_lint && ! do_posix && ! magic_posix_flag)
+                                       lintwarn(_("[s]printf: value %g is out 
of range for `%%%c' format"),
+                                                               (double) 
tmpval, cs1);
+                               tmpval = arg->numbr;
+                               if (strchr("aAeEfFgG", cs1) == NULL)
+                                       cs1 = 'g';
+                               goto fmt1;
+                       } else {
+                               if (do_lint)
+                                       lintwarn(_("[s]printf: value %s is out 
of range for `%%%c' format"),
+                                                               nan_inf_val, 
cs1);
+                               bchunk(nan_inf_val, strlen(nan_inf_val));
+                               s0 = s1;
+                               break;
+                       }
+
+               case 'F':
+#if ! defined(PRINTF_HAS_F_FORMAT) || PRINTF_HAS_F_FORMAT != 1
+                       cs1 = 'f';
+                       /* FALL THROUGH */
+#endif
+               case 'g':
+               case 'G':
+               case 'e':
+               case 'f':
+               case 'E':
+#if defined(PRINTF_HAS_A_FORMAT) && PRINTF_HAS_A_FORMAT == 1
+               case 'A':
+               case 'a':
+               {
+                       static bool warned = false;
+
+                       if (do_lint && tolower(cs1) == 'a' && ! warned) {
+                               warned = true;
+                               lintwarn(_("%%%c format is POSIX standard but 
not portable to other awks"), cs1);
+                       }
+               }
+#endif
+                       need_format = false;
+                       parse_next_arg();
+                       (void) force_number(arg);
+
+                       if (! is_mpg_number(arg))
+                               tmpval = arg->numbr;
+#ifdef HAVE_MPFR
+                       else if (is_mpg_float(arg)) {
+                               mf = arg->mpg_numbr;
+                               fmt_type = MP_FLOAT;
+                       } else {
+                               /* arbitrary-precision integer, convert to MPFR 
float */
+                               assert(mf == NULL);
+                               mf = mpz2mpfr(arg->mpg_i);
+                               fmt_type = MP_FLOAT;
+                       }
+#endif
+                       if (out_of_range(arg))
+                               goto out_of_range;
+
+     fmt1:
+                       if (! have_prec)
+                               prec = DEFAULT_G_PRECISION;
+#ifdef HAVE_MPFR
+     fmt0:
+#endif
+                       chksize(fw + prec + 11);        /* 11 == slop */
+                       cp = cpbuf;
+                       *cp++ = '%';
+                       if (lj)
+                               *cp++ = '-';
+                       if (signchar)
+                               *cp++ = signchar;
+                       if (alt)
+                               *cp++ = '#';
+                       if (zero_flag)
+                               *cp++ = '0';
+                       if (quote_flag)
+                               *cp++ = '\'';
+
+#if defined(LC_NUMERIC)
+                       if (quote_flag && ! use_lc_numeric)
+                               setlocale(LC_NUMERIC, "");
+#endif
+
+                       bool need_to_add_thousands = false;
+                       switch (fmt_type) {
+#ifdef HAVE_MPFR
+                       case MP_INT_WITH_PREC:
+                               sprintf(cp, "*.*Z%c", cs1);
+                               while ((nc = mpfr_snprintf(obufout, ofre, cpbuf,
+                                            (int) fw, (int) prec, zi)) >= 
(int) ofre)
+                                       chksize(nc)
+                               need_to_add_thousands = true;
+                               break;
+                       case MP_INT_WITHOUT_PREC:
+                               sprintf(cp, "*Z%c", cs1);
+                               while ((nc = mpfr_snprintf(obufout, ofre, cpbuf,
+                                            (int) fw, zi)) >= (int) ofre)
+                                       chksize(nc)
+                               need_to_add_thousands = true;
+                               break;
+                       case MP_FLOAT:
+                               sprintf(cp, "*.*R*%c", cs1);
+                               while ((nc = mpfr_snprintf(obufout, ofre, cpbuf,
+                                            (int) fw, (int) prec, ROUND_MODE, 
mf)) >= (int) ofre)
+                                       chksize(nc)
+                               break;
+#endif
+                       default:
+                               if (have_prec || tolower(cs1) != 'a') {
+                                       sprintf(cp, "*.*%c", cs1);
+                                       while ((nc = snprintf(obufout, ofre, 
cpbuf,
+                                                    (int) fw, (int) prec,
+                                                    (double) tmpval)) >= (int) 
ofre)
+                                               chksize(nc)
+                               } else {
+                                       // For %a and %A, use the default 
precision if it
+                                       // wasn't supplied by the user.
+                                       sprintf(cp, "*%c", cs1);
+                                       while ((nc = snprintf(obufout, ofre, 
cpbuf,
+                                                    (int) fw,
+                                                    (double) tmpval)) >= (int) 
ofre)
+                                               chksize(nc)
+                               }
+                       }
+
+#if defined(LC_NUMERIC)
+                       if (quote_flag && ! use_lc_numeric)
+                               setlocale(LC_NUMERIC, "C");
+#endif
+                       len = strlen(obufout);
+                       if (quote_flag && need_to_add_thousands) {
+                               const char *new_text = add_thousands(obufout/*, 
& loc*/);
+
+                               len = strlen(new_text);
+                               chksize(len)
+                               strcpy(obufout, new_text);
+                               free((void *) new_text);
+                       }
 
-       /* estimate minimum precision for exact conversion */
-       prec = mpz_sizeinbase(zi, 2);   /* most significant 1 bit position 
starting at 1 */
-       prec -= (size_t) mpz_scan1(zi, 0);      /* least significant 1 bit 
index starting at 0 */
-       if (prec < MPFR_PREC_MIN)
-               prec = MPFR_PREC_MIN;
-       else if (prec > MPFR_PREC_MAX)
-               prec = MPFR_PREC_MAX;
+                       ofre -= len;
+                       obufout += len;
+                       s0 = s1;
+                       break;
+               default:
+                       if (do_lint && is_alpha(cs1))
+                               lintwarn(_("ignoring unknown format specifier 
character `%c': no argument converted"), cs1);
+                       break;
+               }
+               if (toofew) {
+                       msg("%s\n\t`%s'\n\t%*s%s",
+                             _("fatal: not enough arguments to satisfy format 
string"),
+                             fmt_string, (int) (s1 - fmt_string - 1), "",
+                             _("^ ran out for this one"));
+                       goto out;
+               }
+       }
+       if (do_lint) {
+               if (need_format)
+                       lintwarn(
+                       _("[s]printf: format specifier does not have control 
letter"));
+               if (cur_arg < num_args)
+                       lintwarn(
+                       _("too many arguments supplied for format string"));
+       }
+       bchunk(s0, s1 - s0);
+       olen_final = obufout - obuf;
+#define GIVE_BACK_SIZE (INITIAL_OUT_SIZE * 2)
+       if (ofre > GIVE_BACK_SIZE)
+               erealloc(obuf, char *, olen_final + 1, "format_tree");
+       r = make_str_node(obuf, olen_final, ALREADY_MALLOCED);
+       obuf = NULL;
+out:
+       {
+               size_t k;
+               size_t count = sizeof(cpbufs)/sizeof(cpbufs[0]);
+               for (k = 0; k < count; k++) {
+                       if (cpbufs[k].buf != cpbufs[k].stackbuf)
+                               efree(cpbufs[k].buf);
+               }
+               if (obuf != NULL)
+                       efree(obuf);
+       }
 
-       if (! inited) {
-               mpfr_init2(mpfrval, prec);
-               inited = true;
-       } else
-               mpfr_set_prec(mpfrval, prec);
-       tval = mpfr_set_z(mpfrval, zi, ROUND_MODE);
-       IEEE_FMT(mpfrval, tval);
-       return mpfrval;
+       if (r == NULL)
+               gawk_exit(EXIT_FATAL);
+       return r;
 }
-#endif
-
-/*
- * format_tree() formats arguments of sprintf,
- * and accordingly to a fmt_string providing a format like in
- * printf family from C library.  Returns a string node which value
- * is a formatted string.  Called by  sprintf function.
- *
- * It is one of the uglier parts of gawk.  Thanks to Michal Jaegermann
- * for taming this beast and making it compatible with ANSI C.
- */
+#undef bchunk
+#undef bchunk_one
+#undef chksize
 
 NODE *
-format_tree(
+format_tree_new(
        const char *fmt_string,
        size_t n0,
        NODE **the_args,
@@ -146,6 +1195,9 @@ format_tree(
        uintmax_t uval;
        bool sgn;
        int base;
+       bool space_flag;
+       bool plus_flag;
+
        /*
         * Although this is an array, the elements serve two different
         * purposes. The first element is the general buffer meant
@@ -178,6 +1230,7 @@ format_tree(
        mpfr_ptr mf;
 #endif
        enum { MP_NONE = 0, MP_INT_WITH_PREC = 1, MP_INT_WITHOUT_PREC, MP_FLOAT 
} fmt_type;
+       struct flags flags;
 
        static const char sp[] = " ";
        static const char zero_string[] = "0";
@@ -277,6 +1330,8 @@ format_tree(
                zero_flag = false;
                quote_flag = false;
                nan_inf_val = NULL;
+               space_flag = false;
+               plus_flag = false;
 #ifdef HAVE_MPFR
                mf = NULL;
                zi = NULL;
@@ -290,6 +1345,7 @@ format_tree(
                cp = cend;
                chbuf = lchbuf;
                s1++;
+               memset(& flags, 0, sizeof(flags));
 
 retry:
                if (n0-- == 0)  /* ran out early! */
@@ -309,11 +1365,15 @@ check_pos:
                         * since there's no arg converted, the field width 
doesn't
                         * apply.  The code already was that way, but this
                         * comment documents it, at least in the code.
+                        *
+                        * 27 June 2024:
+                        * This is still the case. The 2023 standard says you 
shouldn't
+                        * have anything between the percents.
                         */
                        if (do_lint) {
                                const char *msg = NULL;
 
-                               if (fw && ! have_prec)
+                               if (fw != 0 && ! have_prec)
                                        msg = _("field width is ignored for 
`%%' specifier");
                                else if (fw == 0 && have_prec)
                                        msg = _("precision is ignored for `%%' 
specifier");
@@ -386,6 +1446,7 @@ check_pos:
                                        goto out;
                                }
                        } else {
+                               // FIXME
                                msg(_("fatal: `$' not permitted after period in 
format"));
                                goto out;
                        }
@@ -439,10 +1500,13 @@ check_pos:
                case ' ':               /* print ' ' or '-' */
                                        /* 'space' flag is ignored */
                                        /* if '+' already present  */
+                       space_flag = true;
                        if (signchar != false)
                                goto check_pos;
-                       /* FALL THROUGH */
+                       signchar = cs1;
+                       goto check_pos;
                case '+':               /* print '+' or '-' */
+                       plus_flag = true;
                        signchar = cs1;
                        goto check_pos;
                case '-':
@@ -499,6 +1563,9 @@ check_pos:
                                break;
                        magic_posix_flag = true;
                        goto retry;
+               case 'C':       // POSIX 2024
+                       cs1 = 'c';
+                       // FALL THROUGH
                case 'c':
                        need_format = false;
                        parse_next_arg();
@@ -578,6 +1645,9 @@ out0:
                                }
                        }
                        goto pr_tail;
+               case 'S':       // POSIX 2024
+                       cs1 = 's';
+                       // FALL THROUGH
                case 's':
                        need_format = false;
                        parse_next_arg();
@@ -724,6 +1794,22 @@ out0:
                        parse_next_arg();
                        (void) force_number(arg);
 
+#define set_flags() \
+       flags.left_just = lj; \
+       flags.alt = alt; \
+       flags.zero = zero_flag; \
+       flags.space = space_flag; \
+       flags.plus = plus_flag; \
+       flags.quote = quote_flag; \
+       flags.have_prec = have_prec; \
+       flags.format = cs1; \
+       flags.base = base; \
+       flags.field_width = fw; \
+       flags.precision = prec
+
+                       set_flags();
+
+
                        if (out_of_range(arg))
                                goto out_of_range;
 #ifdef HAVE_MPFR
@@ -964,6 +2050,20 @@ mpf1:
                        parse_next_arg();
                        (void) force_number(arg);
 
+                       set_flags();
+                       {
+                       const char *formatted = format_float(arg, & flags);
+                       len = strlen(formatted);
+                       chksize(len)
+                       strcpy(obufout, formatted);
+                       free((void *) formatted);
+
+                       ofre -= len;
+                       obufout += len;
+                       s0 = s1;
+                       break;
+                       }
+
                        if (! is_mpg_number(arg))
                                tmpval = arg->numbr;
 #ifdef HAVE_MPFR
@@ -1053,7 +2153,7 @@ mpf1:
 #endif
                        len = strlen(obufout);
                        if (quote_flag && need_to_add_thousands) {
-                               const char *new_text = add_thousands(obufout, & 
loc);
+                               const char *new_text = add_thousands(obufout);
 
                                len = strlen(new_text);
                                chksize(len)
@@ -1298,7 +2398,7 @@ out_of_range(NODE *n)
                return (isnan(n->numbr) || isinf(n->numbr));
 }
 
-/* format_nan_inf --- format NaN and INF values */
+/* format_nan_inf --- format NaN and INF values. return value is to a static 
buffer */
 
 char *
 format_nan_inf(NODE *n, char format)
@@ -1349,7 +2449,6 @@ fmt:
 }
 
 
-
 /* reverse --- reverse the contents of a string in place */
 
 static void
@@ -1365,7 +2464,7 @@ reverse(char *str)
        }
 }
 
-/* add_thousands --- add the thousands separator. Needed for MPFR %d format */
+/* add_thousands --- add the thousands separator. caller free the return value 
*/
 
 /*
  * Copy the source string into the destination string, backwards,
@@ -1375,25 +2474,26 @@ reverse(char *str)
  */
 
 static const char *
-add_thousands(const char *original, struct lconv *loc)
+add_thousands(const char *original)
 {
        size_t orig_len = strlen(original);
-       size_t new_len = orig_len + (orig_len * strlen(loc->thousands_sep)) + 
1;        // worst case
+       size_t new_len = orig_len + 1;
        char *newbuf;
-       char decimal_point = '\0';
-       const char *dec = NULL;
        const char *src;
        char *dest;
 
        emalloc(newbuf, char *, new_len, "add_thousands");
        memset(newbuf, '\0', new_len);
 
+#if defined(HAVE_LOCALE_H)
+       new_len = orig_len + (orig_len * strlen(loc.thousands_sep)) + 1;        
// worst case
        src = original + strlen(original) - 1;
        dest = newbuf;
 
-       if (loc->decimal_point[0] != '\0') {
-               decimal_point = loc->decimal_point[0];
-               if ((dec = strchr(original, decimal_point)) != NULL) {
+       if (loc.decimal_point[0] != '\0') {
+               const char *dec = NULL;
+
+               if ((dec = strchr(original, loc.decimal_point[0])) != NULL) {
                        while (src >= dec)
                                *dest++ = *src--;
                }
@@ -1404,17 +2504,17 @@ add_thousands(const char *original, struct lconv *loc)
        int jj = 0;
        do {
                *dest++ = *src--;
-               if (loc->grouping[ii] && ++jj == loc->grouping[ii]) {
+               if (loc.grouping[ii] && ++jj == loc.grouping[ii]) {
                        if (src >= original) {  /* only add if more digits 
coming */
-                               const char *ts = loc->thousands_sep;
+                               const char *ts = loc.thousands_sep;
                                int k;
 
                                for (k = strlen(ts) - 1; k >= 0; k--)
                                        *dest++ = ts[k];
                        }
-                       if (loc->grouping[ii+1] == 0)
+                       if (loc.grouping[ii+1] == 0)
                                jj = 0;         /* keep using current val in 
loc.grouping[ii] */
-                       else if (loc->grouping[ii+1] == CHAR_MAX) {
+                       else if (loc.grouping[ii+1] == CHAR_MAX) {
                                // copy in the rest and be done
                                while (src >= original)
                                        *dest++ = *src--;
@@ -1428,6 +2528,491 @@ add_thousands(const char *original, struct lconv *loc)
 
        *dest++ = '\0';
        reverse(newbuf);
+#else
+       strcpy(newbuf, original);
+#endif
 
        return newbuf;
 }
+
+/* format_integer_value --- format just the actual value of an integer. caller 
frees return value */
+
+static const char *
+format_integer_value(NODE *arg, struct flags *flags)
+{
+#define VALUE_SIZE 40
+       char *buf = NULL;
+       size_t buflen;
+       static const char lchbuf[] = "0123456789abcdef";
+       static const char Uchbuf[] = "0123456789ABCDEF";
+       const char *chbuf;
+       char *cp;
+       bool quote_flag = false;
+       bool negative = false;
+       uintmax_t uval;
+
+#define growbuffer(buf, buflen, cp) { \
+               erealloc(buf, char *, buflen * 2, "format_integer"); \
+               cp = buf + buflen; \
+               buflen *= 2; \
+       }
+
+#if defined(HAVE_LOCALE_H)
+       quote_flag = (flags->quote && loc.thousands_sep[0] != '\0');
+#endif
+
+       chbuf = (flags->format == 'X' ? Uchbuf : lchbuf);
+       emalloc(buf, char *, VALUE_SIZE, "format_integer_value");
+       buflen = VALUE_SIZE;
+       cp = buf;
+
+       // C 2023 says negative zeros get a minus sign
+       if (flags->base == 10 && (arg->numbr < 0 || arg->numbr == -0)) {
+               negative = true;
+               arg->numbr = -arg->numbr;
+       }
+       uval = get_number_uj(arg);
+
+       // generate the digits backwards.
+       do {
+               if (cp >= buf + buflen)
+                       growbuffer(buf, buflen, cp);
+
+               *cp++ = chbuf[uval % flags->base];
+               uval /= flags->base;
+       } while (uval > 0);
+       if (negative)
+               *cp++ = '-';
+       *cp = '\0';
+
+       // turn it back around
+       reverse(buf);
+
+       if (flags->base == 10 && quote_flag) {
+               const char *with_commas = add_thousands(buf);
+
+               free((void *) buf);
+               buf = (char *) with_commas;
+       }
+
+       return buf;
+}
+
+/* format_integer --- format a signed or unsigned integer value. caller frees 
return value */
+
+static const char *
+format_integer(NODE *arg, struct flags *flags)
+{
+       const char *number_value;
+       double tmpval;
+       char *buf1 = NULL;
+       size_t buflen;
+       char *buf2 = NULL;
+       uintmax_t uval;
+       char fill[] = " ";
+
+       if (out_of_range(arg))
+               return format_out_of_range(arg, flags);
+
+       if (is_mpg_integer(arg) || is_mpg_float(arg))
+               return format_mpg_integer(arg, flags);
+
+       tmpval = arg->numbr;
+       if (tmpval < 0) {
+               uval = (uintmax_t) (intmax_t) tmpval;
+               if ((AWKNUM)(intmax_t)uval != double_to_int(tmpval))
+                       return format_out_of_range(arg, flags);
+       } else {
+               uval = (intmax_t) tmpval;
+               if ((AWKNUM)uval != double_to_int(tmpval))
+                       return format_out_of_range(arg, flags);
+       }
+
+       // we now have an integer we can format, do so
+       number_value = format_integer_value(arg, flags);
+
+       // We now have the initial *integer* decimal, octal, or hex value in 
hand.
+       // If it's decimal, we've added commas if appropriate. If it's negative
+       // and decimal, it has a minus sign.
+       
+       // The next step is deal with the rest of the printf flags.
+
+       if (tmpval == 0 && flags->field_width == 0
+           && ! flags->have_prec) {
+               // relatively simple case
+               if (flags->base == 16 && flags->alt) {
+                       size_t len = strlen(number_value) + 2 + 1;
+
+                       emalloc(buf1, char *, len, "format_integer");
+                       sprintf(buf1, "0%c%s", flags->format, number_value);
+                       free((void *) number_value);
+
+                       return buf1;
+               } else if (flags->base == 10 && (flags->plus || flags->space)
+                       && number_value[0] != '-') {
+                       size_t len = strlen(number_value) + 1 + 1;
+
+                       emalloc(buf1, char *, len, "format_integer");
+                       if (flags->plus)
+                               sprintf(buf1, "+%s", number_value);
+                       else
+                               sprintf(buf1, " %s", number_value);
+                       free((void *) number_value);
+
+                       return buf1;
+               } else
+                       return number_value;
+       }
+
+       // Now it gets messy...
+
+
+       /*
+        * ``The result of converting a zero value with a
+        * precision of zero is no characters.''
+        *
+        * If I remember the ANSI C standard, though,
+        * it says that for octal conversions
+        * the precision is artificially increased
+        * to add an extra 0 if # is supplied.
+        * Indeed, in C,
+        *      printf("%#.0o\n", 0);
+        * prints a single 0.
+        */
+       if (! flags->alt && flags->have_prec && flags->precision == 0 && tmpval 
== 0) {
+               if (flags->base == 8)
+                       flags->precision = 1;
+               else {
+                       buf1 = (char *) number_value;
+                       goto pr_tail;
+               }
+       }
+
+       if (compute_zero_flag(flags))
+               fill[0] = '0';  // fill with zeros
+
+       /* add more output digits to match the precision */
+       if (flags->have_prec && flags->precision > 0) {
+               bool negative = (number_value[0] == '-');
+               size_t len = strlen(number_value);
+               char *cp1;
+
+               len += flags->precision + 1;
+               emalloc(buf1, char *, len, "format_integer");
+               strcpy(buf1, number_value);
+
+               reverse(buf1);
+               cp1 = buf1 + len;
+               if (negative)
+                       cp1--;  // overwrite the '-', we'll add it back in a 
minute
+               while (cp1 - buf1 < flags->precision) {
+                       if (cp1 - buf1 >= len)
+                               growbuffer(buf1, len, cp1);
+
+                       *cp1++ = '0';
+               }
+               if (negative)
+                       *cp1++ = '-';
+               *cp1 = '\0';
+               reverse(buf1);
+       } else
+               buf1 = (char *) number_value;
+
+       if (flags->alt) {
+               if (flags->base == 16 && tmpval != 0) {
+                       if (fill[0] == '0') {
+                               emalloc(buf2, char *, strlen(buf1) + 3, 
"format_integer");
+                               sprintf(buf2, "0%c%s", flags->format, buf1);
+                               flags->field_width -= 2;
+                               free((void *) buf1);
+                               buf1 = buf2;
+                       }
+               } else if (flags->base == 8 && tmpval == 0) {
+                       assert(number_value[0] == '\0');
+                       emalloc(buf2, char *, 2, "format_integer");
+                       strcpy(buf2, "0");
+                       flags->field_width--;
+                       free((void *) buf1);
+                       buf1 = buf2;
+               }
+       }
+       // deal with space or plus flags
+
+       if (flags->precision > flags->field_width)
+               flags->field_width = flags->precision;
+       flags->precision = strlen(buf1);
+pr_tail:
+       int st_len = strlen(buf1);
+       buflen = flags->field_width;
+       if (buflen < st_len)
+               buflen = st_len;
+       if (flags->plus || flags->space)
+               buflen++;
+       buflen++;       // for '\0'
+
+       emalloc(buf2, char *, buflen, "format_integer");
+       char *cp = buf2;
+
+       /*
+        * Order:
+        * 1. Create the number. This is what's in buf1.
+        * 2. If zero flag, pad number to field width.
+        * 3. If plus flag, first leading zero converts to +.
+        * 4. Else if space flag, first leading zero converts to space.
+        * 5. If not padded, just, insert plus or space if need be.
+        * 
+        * If we have not padded, and the field width is > length of
+        * number, two cases:
+        * 1. Left justified, stick plus or space in front of number,
+        *    then the number, then the spaces.
+        * 2. Right justified, do spaces, then plus, then number.
+        *
+        * Plus sign is only added if not a negative number and if signed;
+        * in practice this means base == 10.
+        */
+       bool padded = false;
+       int padlen = buflen - st_len;
+
+       if (flags->zero) {
+               for (; padlen > 0; padlen--)
+                       *cp++ = '0';
+               strcpy(cp, buf1);
+               padded = true;
+       }
+
+       if (padded) {
+               buf2[0] = (flags->plus ? '+' :
+                               flags->space ? ' ' : '0');
+       } else if (flags->plus || flags->space) {
+               sprintf(buf2, "%c%s",
+                       flags->plus ? '+' :  ' ', buf1);
+       }
+
+
+       if (flags->left_just) {
+       } else {
+               // normal - spaces or zeros first.
+               if (flags->plus && fill[0] == '0') {
+                       *cp++ = '+';
+                       flags->field_width--;
+               }
+               while (flags->field_width > flags->precision) {
+                       *cp++ = fill[0];
+                       flags->field_width--;
+               }
+               if (flags->plus && fill[0] == ' ')
+                       *--cp = '+';
+       }
+       // bchunk(cp, buf1);
+       free((void *) buf1);
+       while (flags->field_width > flags->precision) {
+               // bchunk_one(fill);
+               flags->field_width--;
+       }
+
+       return buf1;
+}
+
+/* format_out_of_range --- format an out of range value as %g. caller frees 
return value */
+
+static const char *
+format_out_of_range(NODE *arg, struct flags *flags)
+{
+       /*
+        * out of range - emergency use of %g format,
+        * or format NaN and INF values.
+        */
+
+       // nan_inf_val points to a static buffer, don't free it.
+       const char *nan_inf_val = format_nan_inf(arg, flags->format);
+
+       if (do_posix || flags->magic_posix_flag || nan_inf_val == NULL) {
+               if (do_lint && ! do_posix && ! flags->magic_posix_flag)
+                       lintwarn(_("[s]printf: value %g is out of range for 
`%%%c' format"),
+                                               (double) arg->numbr,
+                                               flags->format);
+
+               if (strchr("aAeEfFgG", flags->format) == NULL)
+                       flags->format = 'g';
+
+               return format_float(arg, flags);
+       }
+
+       if (do_lint)
+               lintwarn(_("[s]printf: value %s is out of range for `%%%c' 
format"),
+                                       nan_inf_val, flags->format);
+
+       // A NaN or Inf, deal with a field width, if any
+       size_t len = strlen(nan_inf_val);
+       if (flags->field_width > len) {
+               char *cp, *buf;
+               int fw = flags->field_width;
+
+               emalloc(buf, char *, fw + 1, "format_out_of_range");
+               if (flags->left_just) {
+                       strcpy(cp, nan_inf_val);
+                       cp += len;
+                       for (cp = buf; fw > len; fw--)
+                               *cp++ = ' ';
+                       *cp = '\0';
+               } else {
+                       for (cp = buf; fw > len; fw--)
+                               *cp++ = ' ';
+                       strcpy(cp, nan_inf_val);
+               }
+
+               return buf;
+       }
+
+       return strdup(nan_inf_val);
+}
+
+/* format_float --- format a floating point number. caller frees return value 
*/
+
+static const char *
+format_float(NODE *arg, struct flags *flags)
+{
+       char *buf;
+       size_t buflen;
+       char *cp;
+#undef cpbuf
+       char cpbuf[100];
+       
+       double tmpval;
+#ifdef HAVE_MPFR
+       mpz_ptr zi;
+       mpfr_ptr mf;
+#endif
+       bool quote_flag = false;
+       int nc;
+       bool mpfr_format = false;
+
+       if (out_of_range(arg))
+               return format_out_of_range(arg, flags);
+
+       (void) force_number(arg);
+
+       if (! is_mpg_number(arg))
+               tmpval = arg->numbr;
+#ifdef HAVE_MPFR
+       else if (is_mpg_float(arg)) {
+               mf = arg->mpg_numbr;
+               mpfr_format = true;
+       } else {
+               /* arbitrary-precision integer, convert to MPFR float */
+               assert(mf == NULL);
+               mf = mpz2mpfr(arg->mpg_i);
+               mpfr_format = true;
+       }
+#endif
+
+       if (! flags->have_prec)
+               flags->precision = DEFAULT_G_PRECISION;
+
+#if defined(HAVE_LOCALE_H)
+       quote_flag = (flags->quote && loc.thousands_sep[0] != 0);
+#endif
+
+       buflen = flags->field_width + flags->precision + 11;    /* 11 == slop */
+       emalloc(buf, char *, buflen, "format_float");
+
+       int signchar = '\0';
+       if (flags->plus)
+               signchar = '+';
+       else if (flags->space)
+               signchar = ' ';
+
+       cp = cpbuf;
+       *cp++ = '%';
+       if (flags->left_just)
+               *cp++ = '-';
+       if (signchar)
+               *cp++ = signchar;
+       if (flags->alt)
+               *cp++ = '#';
+       if (flags->zero)
+               *cp++ = '0';
+       if (quote_flag)
+               *cp++ = '\'';
+
+#if defined(LC_NUMERIC)
+       if (quote_flag && ! use_lc_numeric)
+               setlocale(LC_NUMERIC, "");
+#endif
+
+       bool need_to_add_thousands = false;
+       if (mpfr_format) {
+#ifdef HAVE_MPFR
+               sprintf(cp, "*.*R*%c", flags->format);
+               while ((nc = mpfr_snprintf(buf, buflen, cpbuf,
+                            flags->field_width, flags->precision, ROUND_MODE, 
mf)) >= (int) buflen) {
+                       erealloc(buf, char *, buflen * 2, "format_float");
+                       buflen *= 2;
+               }
+#else
+               cant_happen("trying to format GMP/MPFR number");
+#endif
+       } else {
+               if (flags->have_prec || tolower(flags->format) != 'a') {
+                       sprintf(cp, "*.*%c", flags->format);
+                       while ((nc = snprintf(buf, buflen, cpbuf,
+                                    flags->field_width, flags->precision,
+                                    (double) tmpval)) >= (int) buflen) {
+                               erealloc(buf, char *, buflen * 2, 
"format_float");
+                               buflen *= 2;
+                       }
+               } else {
+                       // For %a and %A, use the default precision if it
+                       // wasn't supplied by the user.
+                       sprintf(cp, "*%c", flags->format);
+                       while ((nc = snprintf(buf, buflen, cpbuf,
+                                    flags->field_width,
+                                    (double) tmpval)) >= (int) buflen) {
+                               erealloc(buf, char *, buflen * 2, 
"format_float");
+                               buflen *= 2;
+                       }
+               }
+       }
+
+#if defined(LC_NUMERIC)
+       if (quote_flag && ! use_lc_numeric)
+               setlocale(LC_NUMERIC, "C");
+#endif
+       if (quote_flag && need_to_add_thousands) {
+               const char *new_text = add_thousands(buf);
+
+               free((void *) buf);
+               buf = (char *) new_text;
+       }
+       return buf;
+}
+
+/* compute_zero_flag --- return true if we want to fill with zeros */
+
+static bool
+compute_zero_flag(struct flags *flags)
+{
+       bool zero_flag;
+
+       /*
+        * When to fill with zeroes is of course not simple.
+        * First: No zero fill if left-justifying.
+        * Next: There seem to be two cases:
+        *      A '0' without a precision, e.g. %06d
+        *      A precision with no field width, e.g. %.10d
+        * Any other case, we don't want to fill with zeroes.
+        */
+       zero_flag = (! flags->left_just
+                   && ((flags->zero && ! flags->have_prec)
+                        || (flags->field_width == 0 && flags->have_prec)));
+
+       return zero_flag;
+}
+
+/* format_mpg_integer --- format an MPZ or MPFR integer. caller frees return 
value */
+
+static const char *
+format_mpg_integer(NODE *arg, struct flags *flags)
+{
+       return strdup("mpg_int");
+}

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog |   15 +
 awk.h     |    4 +-
 main.c    |    7 +
 printf.c  | 1689 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 4 files changed, 1662 insertions(+), 53 deletions(-)


hooks/post-receive
-- 
gawk



reply via email to

[Prev in Thread] Current Thread [Next in Thread]