[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[SCM] gawk branch, stable/printf-rework, updated. gawk-4.1.0-5490-g8d181
From: |
Arnold Robbins |
Subject: |
[SCM] gawk branch, stable/printf-rework, updated. gawk-4.1.0-5490-g8d18169d |
Date: |
Tue, 2 Jul 2024 14:50:35 -0400 (EDT) |
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "gawk".
The branch, stable/printf-rework has been updated
via 8d18169d7124cee926d1755e64c6eb5ae3edef20 (commit)
from 95fc5822f3f0951df7371000e6cf255df0138643 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
http://git.sv.gnu.org/cgit/gawk.git/commit/?id=8d18169d7124cee926d1755e64c6eb5ae3edef20
commit 8d18169d7124cee926d1755e64c6eb5ae3edef20
Author: Arnold D. Robbins <arnold@skeeve.com>
Date: Tue Jul 2 21:50:10 2024 +0300
Start refactoring format_tree.
diff --git a/ChangeLog b/ChangeLog
index 6847f98d..9108e64e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,18 @@
+2024-07-02 Arnold D. Robbins <arnold@skeeve.com>
+
+ * awk.h (format_tree_new, format_tree_old): Add declarations.
+ (format_tree): Temporarily make into a function.
+ * main.c (format_tree): Declare it.
+ (main): Check PRINTF_NEW environment variable and if so,
+ use the new version. Otherwise use the old one.
+ * printf.c (struct flags): Declare.
+ (format_tree_old): The original version.
+ (format_tree_new): The new one, being refactored.
+ (add_thousands): Use the global `loc' variable, not a parameter.
+ static const char *format_integer(NODE *arg, struct flags *flags);
+ (format_mpg_integer, format_float, format_out_of_range,
+ compute_zero_flag): New functions.
+
2024-07-02 Arnold D. Robbins <arnold@skeeve.com>
* re.c (make_regexp): \u escapes also now treated literally
diff --git a/awk.h b/awk.h
index 490fbbc8..fb1a74a7 100644
--- a/awk.h
+++ b/awk.h
@@ -1516,7 +1516,9 @@ extern NODE *do_sub(int nargs, unsigned int flags);
extern NODE *call_sub(const char *name, int nargs);
extern NODE *call_match(int nargs);
extern NODE *call_split_func(const char *name, int nargs);
-extern NODE *format_tree(const char *, size_t, NODE **, long);
+extern NODE *format_tree_old(const char *, size_t, NODE **, long);
+extern NODE *format_tree_new(const char *, size_t, NODE **, long);
+extern NODE *(*format_tree)(const char *, size_t, NODE **, long);
extern NODE *do_lshift(int nargs);
extern NODE *do_rshift(int nargs);
extern NODE *do_and(int nargs);
diff --git a/main.c b/main.c
index 4ef0208d..f71d3584 100644
--- a/main.c
+++ b/main.c
@@ -161,6 +161,8 @@ GETGROUPS_T *groupset; /* current group set */
int ngroups; /* size of said set */
#endif
+NODE *(*format_tree)(const char *, size_t, NODE **, long);
+
void (*lintfunc)(const char *mesg, ...) = r_warning;
/* Sorted by long option name! */
@@ -218,6 +220,11 @@ main(int argc, char **argv)
const char *initial_locale;
#endif
+ if (getenv("PRINTF_NEW") != NULL)
+ format_tree = format_tree_new;
+ else
+ format_tree = format_tree_old;
+
myname = gawk_name(argv[0]);
check_pma_security(persist_file);
diff --git a/printf.c b/printf.c
index 31910f35..dadca64e 100644
--- a/printf.c
+++ b/printf.c
@@ -27,6 +27,21 @@
#include "awk.h"
+struct flags {
+ bool left_just;
+ bool alt;
+ bool zero;
+ bool space;
+ bool plus;
+ bool quote;
+ bool have_prec;
+ bool magic_posix_flag;
+ char format;
+ int base;
+ int field_width;
+ int precision;
+};
+
extern int max_args;
extern NODE **args_array;
extern FILE *output_fp;
@@ -36,7 +51,12 @@ extern FILE *output_fp;
static size_t mbc_byte_count(const char *ptr, size_t numchars);
static size_t mbc_char_count(const char *ptr, size_t numbytes);
static void reverse(char *str);
-static const char *add_thousands(const char *original, struct lconv *loc);
+static const char *add_thousands(const char *original);
+static const char *format_integer(NODE *arg, struct flags *flags);
+static const char *format_mpg_integer(NODE *arg, struct flags *flags);
+static const char *format_float(NODE *arg, struct flags *flags);
+static const char *format_out_of_range(NODE *arg, struct flags *flags);
+static bool compute_zero_flag(struct flags *flags);
#ifdef HAVE_MPFR
@@ -47,45 +67,1074 @@ static const char *add_thousands(const char *original,
struct lconv *loc);
*/
-static mpfr_ptr
-mpz2mpfr(mpz_ptr zi)
-{
- size_t prec;
- static mpfr_t mpfrval;
- static bool inited = false;
- int tval;
+static mpfr_ptr
+mpz2mpfr(mpz_ptr zi)
+{
+ size_t prec;
+ static mpfr_t mpfrval;
+ static bool inited = false;
+ int tval;
+
+ /* estimate minimum precision for exact conversion */
+ prec = mpz_sizeinbase(zi, 2); /* most significant 1 bit position
starting at 1 */
+ prec -= (size_t) mpz_scan1(zi, 0); /* least significant 1 bit
index starting at 0 */
+ if (prec < MPFR_PREC_MIN)
+ prec = MPFR_PREC_MIN;
+ else if (prec > MPFR_PREC_MAX)
+ prec = MPFR_PREC_MAX;
+
+ if (! inited) {
+ mpfr_init2(mpfrval, prec);
+ inited = true;
+ } else
+ mpfr_set_prec(mpfrval, prec);
+ tval = mpfr_set_z(mpfrval, zi, ROUND_MODE);
+ IEEE_FMT(mpfrval, tval);
+ return mpfrval;
+}
+#endif
+
+/*
+ * format_tree() formats arguments of sprintf,
+ * and accordingly to a fmt_string providing a format like in
+ * printf family from C library. Returns a string node which value
+ * is a formatted string. Called by sprintf function.
+ *
+ * It is one of the uglier parts of gawk. Thanks to Michal Jaegermann
+ * for taming this beast and making it compatible with ANSI C.
+ */
+
+NODE *
+format_tree_old(
+ const char *fmt_string,
+ size_t n0,
+ NODE **the_args,
+ long num_args)
+{
+/* copy 'l' bytes from 's' to 'obufout' checking for space in the process */
+/* difference of pointers should be of ptrdiff_t type, but let us be kind */
+#define bchunk(s, l) if (l) { \
+ while ((l) > ofre) { \
+ size_t olen = obufout - obuf; \
+ erealloc(obuf, char *, osiz * 2, "format_tree"); \
+ ofre += osiz; \
+ osiz *= 2; \
+ obufout = obuf + olen; \
+ } \
+ memcpy(obufout, s, (size_t) (l)); \
+ obufout += (l); \
+ ofre -= (l); \
+}
+
+/* copy one byte from 's' to 'obufout' checking for space in the process */
+#define bchunk_one(s) { \
+ if (ofre < 1) { \
+ size_t olen = obufout - obuf; \
+ erealloc(obuf, char *, osiz * 2, "format_tree"); \
+ ofre += osiz; \
+ osiz *= 2; \
+ obufout = obuf + olen; \
+ } \
+ *obufout++ = *s; \
+ --ofre; \
+}
+
+/* Is there space for something L big in the buffer? */
+#define chksize(l) if ((l) >= ofre) { \
+ size_t olen = obufout - obuf; \
+ size_t delta = osiz+l-ofre; \
+ erealloc(obuf, char *, osiz + delta, "format_tree"); \
+ obufout = obuf + olen; \
+ ofre += delta; \
+ osiz += delta; \
+}
+
+ size_t cur_arg = 0;
+ NODE *r = NULL;
+ int i, nc;
+ bool toofew = false;
+ char *obuf, *obufout;
+ size_t osiz, ofre, olen_final;
+ const char *chbuf;
+ const char *s0, *s1;
+ int cs1;
+ NODE *arg;
+ long fw, prec, argnum;
+ bool used_dollar;
+ bool lj, alt, have_prec, need_format;
+ long *cur = NULL;
+ uintmax_t uval;
+ bool sgn;
+ int base;
+ /*
+ * Although this is an array, the elements serve two different
+ * purposes. The first element is the general buffer meant
+ * to hold the entire result string. The second one is a
+ * temporary buffer for large floating point values. They
+ * could just as easily be separate variables, and the
+ * code might arguably be clearer.
+ */
+ struct {
+ char *buf;
+ size_t bufsize;
+ char stackbuf[30];
+ } cpbufs[2];
+#define cpbuf cpbufs[0].buf
+ char *cend = &cpbufs[0].stackbuf[sizeof(cpbufs[0].stackbuf)];
+ char *cp;
+ const char *fill;
+ AWKNUM tmpval = 0.0;
+ char signchar = '\0';
+ size_t len;
+ bool zero_flag = false;
+ bool quote_flag = false;
+ int ii, jj;
+ char *chp;
+ size_t copy_count, char_count;
+ char *nan_inf_val;
+ bool magic_posix_flag;
+#ifdef HAVE_MPFR
+ mpz_ptr zi;
+ mpfr_ptr mf;
+#endif
+ enum { MP_NONE = 0, MP_INT_WITH_PREC = 1, MP_INT_WITHOUT_PREC, MP_FLOAT
} fmt_type;
+
+ static const char sp[] = " ";
+ static const char zero_string[] = "0";
+ static const char lchbuf[] = "0123456789abcdef";
+ static const char Uchbuf[] = "0123456789ABCDEF";
+ static const char bad_modifiers[] = "hjlLtz";
+ static bool warned[sizeof(bad_modifiers)-1]; // auto-init to zero
+
+ bool modifier_seen[sizeof(bad_modifiers)-1];
+#define modifier_index(c) (strchr(bad_modifiers, c) - bad_modifiers)
+
+#define INITIAL_OUT_SIZE 64
+ emalloc(obuf, char *, INITIAL_OUT_SIZE, "format_tree");
+ obufout = obuf;
+ osiz = INITIAL_OUT_SIZE;
+ ofre = osiz - 1;
+
+ cur_arg = 1;
+
+ {
+ size_t k;
+ for (k = 0; k < sizeof(cpbufs)/sizeof(cpbufs[0]); k++) {
+ cpbufs[k].bufsize = sizeof(cpbufs[k].stackbuf);
+ cpbufs[k].buf = cpbufs[k].stackbuf;
+ }
+ }
+
+ /*
+ * The point of this goop is to grow the buffer
+ * holding the converted number, so that large
+ * values don't overflow a fixed length buffer.
+ */
+#define PREPEND(CH) do { \
+ if (cp == cpbufs[0].buf) { \
+ char *prev = cpbufs[0].buf; \
+ emalloc(cpbufs[0].buf, char *, 2*cpbufs[0].bufsize, \
+ "format_tree"); \
+ memcpy((cp = cpbufs[0].buf+cpbufs[0].bufsize), prev, \
+ cpbufs[0].bufsize); \
+ cpbufs[0].bufsize *= 2; \
+ if (prev != cpbufs[0].stackbuf) \
+ efree(prev); \
+ cend = cpbufs[0].buf+cpbufs[0].bufsize; \
+ } \
+ *--cp = (CH); \
+} while(0)
+
+ /*
+ * Check first for use of `count$'.
+ * If plain argument retrieval was used earlier, choke.
+ * Otherwise, return the requested argument.
+ * If not `count$' now, but it was used earlier, choke.
+ * If this format is more than total number of args, choke.
+ * Otherwise, return the current argument.
+ */
+#define parse_next_arg() { \
+ if (argnum > 0) { \
+ if (cur_arg > 1) { \
+ msg(_("fatal: must use `count$' on all formats or
none")); \
+ goto out; \
+ } \
+ arg = the_args[argnum]; \
+ } else if (used_dollar) { \
+ msg(_("fatal: must use `count$' on all formats or none")); \
+ arg = 0; /* shutup the compiler */ \
+ goto out; \
+ } else if (cur_arg >= num_args) { \
+ arg = 0; /* shutup the compiler */ \
+ toofew = true; \
+ break; \
+ } else { \
+ arg = the_args[cur_arg]; \
+ cur_arg++; \
+ } \
+}
+
+ need_format = false;
+ used_dollar = false;
+
+ s0 = s1 = fmt_string;
+ while (n0-- > 0) {
+ if (*s1 != '%') {
+ s1++;
+ continue;
+ }
+ need_format = true;
+ bchunk(s0, s1 - s0);
+ s0 = s1;
+ cur = &fw;
+ fw = 0;
+ prec = 0;
+ base = 0;
+ argnum = 0;
+ base = 0;
+ have_prec = false;
+ signchar = '\0';
+ zero_flag = false;
+ quote_flag = false;
+ nan_inf_val = NULL;
+#ifdef HAVE_MPFR
+ mf = NULL;
+ zi = NULL;
+#endif
+ fmt_type = MP_NONE;
+
+ lj = alt = false;
+ memset(modifier_seen, 0, sizeof(modifier_seen));
+ magic_posix_flag = false;
+ fill = sp;
+ cp = cend;
+ chbuf = lchbuf;
+ s1++;
+
+retry:
+ if (n0-- == 0) /* ran out early! */
+ break;
+
+ switch (cs1 = *s1++) {
+ case (-1): /* dummy case to allow for checking */
+check_pos:
+ if (cur != &fw)
+ break; /* reject as a valid format */
+ goto retry;
+ case '%':
+ need_format = false;
+ /*
+ * 29 Oct. 2002:
+ * The C99 standard pages 274 and 279 seem to imply that
+ * since there's no arg converted, the field width
doesn't
+ * apply. The code already was that way, but this
+ * comment documents it, at least in the code.
+ */
+ if (do_lint) {
+ const char *msg = NULL;
+
+ if (fw && ! have_prec)
+ msg = _("field width is ignored for
`%%' specifier");
+ else if (fw == 0 && have_prec)
+ msg = _("precision is ignored for `%%'
specifier");
+ else if (fw && have_prec)
+ msg = _("field width and precision are
ignored for `%%' specifier");
+
+ if (msg != NULL)
+ lintwarn("%s", msg);
+ }
+ bchunk_one("%");
+ s0 = s1;
+ break;
+
+ case '0':
+ /*
+ * Only turn on zero_flag if we haven't seen
+ * the field width or precision yet. Otherwise,
+ * screws up floating point formatting.
+ */
+ if (cur == & fw)
+ zero_flag = true;
+ if (lj)
+ goto retry;
+ /* fall through */
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ if (cur == NULL)
+ break;
+ if (prec >= 0)
+ *cur = cs1 - '0';
+ /*
+ * with a negative precision *cur is already set
+ * to -1, so it will remain negative, but we have
+ * to "eat" precision digits in any case
+ */
+ while (n0 > 0 && *s1 >= '0' && *s1 <= '9') {
+ --n0;
+ *cur = *cur * 10 + *s1++ - '0';
+ }
+ if (prec < 0) /* negative precision is discarded */
+ have_prec = false;
+ if (cur == &prec)
+ cur = NULL;
+ if (n0 == 0) /* badly formatted control string */
+ continue;
+ goto retry;
+ case '$':
+ if (do_traditional) {
+ msg(_("fatal: `$' is not permitted in awk
formats"));
+ goto out;
+ }
+
+ if (cur == &fw) {
+ argnum = fw;
+ fw = 0;
+ used_dollar = true;
+ if (argnum <= 0) {
+ msg(_("fatal: argument index with `$'
must be > 0"));
+ goto out;
+ }
+ if (argnum >= num_args) {
+ msg(_("fatal: argument index %ld
greater than total number of supplied arguments"), argnum);
+ goto out;
+ }
+ } else {
+ msg(_("fatal: `$' not permitted after period in
format"));
+ goto out;
+ }
+
+ goto retry;
+ case '*':
+ if (cur == NULL)
+ break;
+ if (! do_traditional && used_dollar && !
isdigit((unsigned char) *s1)) {
+ fatal(_("fatal: must use `count$' on all
formats or none"));
+ break; /* silence warnings */
+ } else if (! do_traditional && isdigit((unsigned char)
*s1)) {
+ int val = 0;
+
+ for (; n0 > 0 && *s1 && isdigit((unsigned char)
*s1); s1++, n0--) {
+ val *= 10;
+ val += *s1 - '0';
+ }
+ if (*s1 != '$') {
+ msg(_("fatal: no `$' supplied for
positional field width or precision"));
+ goto out;
+ } else {
+ s1++;
+ n0--;
+ }
+ // val could be less than zero if someone
provides a field width
+ // so large that it causes integer overflow.
Mainly fuzzers do this,
+ // but let's try to be good anyway.
+ if (val < 0 || val >= num_args) {
+ toofew = true;
+ break;
+ }
+ arg = the_args[val];
+ } else {
+ parse_next_arg();
+ }
+ (void) force_number(arg);
+ *cur = get_number_si(arg);
+ if (*cur < 0 && cur == &fw) {
+ *cur = -*cur;
+ lj = true;
+ }
+ if (cur == &prec) {
+ if (*cur >= 0)
+ have_prec = true;
+ else
+ have_prec = false;
+ cur = NULL;
+ }
+ goto retry;
+ case ' ': /* print ' ' or '-' */
+ /* 'space' flag is ignored */
+ /* if '+' already present */
+ if (signchar != false)
+ goto check_pos;
+ /* FALL THROUGH */
+ case '+': /* print '+' or '-' */
+ signchar = cs1;
+ goto check_pos;
+ case '-':
+ if (prec < 0)
+ break;
+ if (cur == &prec) {
+ prec = -1;
+ goto retry;
+ }
+ fill = sp; /* if left justified then other */
+ lj = true; /* filling is ignored */
+ goto check_pos;
+ case '.':
+ if (cur != &fw)
+ break;
+ cur = ≺
+ have_prec = true;
+ goto retry;
+ case '#':
+ alt = true;
+ goto check_pos;
+ case '\'':
+#if defined(HAVE_LOCALE_H)
+ quote_flag = true;
+ goto check_pos;
+#else
+ goto retry;
+#endif
+ case 'h':
+ case 'j':
+ case 'l':
+ case 'L':
+ case 't':
+ case 'z':
+ if (modifier_seen[modifier_index(cs1)])
+ break;
+ else {
+ int ind = modifier_index(cs1);
+
+ if (do_lint && ! warned[ind]) {
+ lintwarn(_("`%c' is meaningless in awk
formats; ignored"), cs1);
+ warned[ind] = true;
+ }
+ if (do_posix) {
+ msg(_("fatal: `%c' is not permitted in
POSIX awk formats"), cs1);
+ goto out;
+ }
+ }
+ modifier_seen[modifier_index(cs1)] = true;
+ goto retry;
+
+ case 'P':
+ if (magic_posix_flag)
+ break;
+ magic_posix_flag = true;
+ goto retry;
+ case 'c':
+ need_format = false;
+ parse_next_arg();
+ /* user input that looks numeric is numeric */
+ fixtype(arg);
+ if ((arg->flags & NUMBER) != 0) {
+ uval = get_number_uj(arg);
+ if (gawk_mb_cur_max > 1) {
+ char buf[100];
+ wchar_t wc;
+ mbstate_t mbs;
+ size_t count;
+
+ memset(& mbs, 0, sizeof(mbs));
+
+ /* handle systems with too small
wchar_t */
+ if (sizeof(wchar_t) < 4 && uval >
0xffff) {
+ if (do_lint)
+ lintwarn(
+ _("[s]printf: value %g is too
big for %%c format"),
+
arg->numbr);
+
+ goto out0;
+ }
+
+ wc = uval;
+
+ count = wcrtomb(buf, wc, & mbs);
+ if (count == 0
+ || count == (size_t) -1) {
+ if (do_lint)
+ lintwarn(
+ _("[s]printf: value %g is not a
valid wide character"),
+
arg->numbr);
+
+ goto out0;
+ }
+
+ memcpy(cpbuf, buf, count);
+ prec = count;
+ cp = cpbuf;
+ goto pr_tail;
+ }
+out0:
+ ;
+ /* else,
+ fall through */
+
+ cpbuf[0] = uval;
+ prec = 1;
+ cp = cpbuf;
+ goto pr_tail;
+ }
+ /*
+ * As per POSIX, only output first character of a
+ * string value. Thus, we ignore any provided
+ * precision, forcing it to 1. (Didn't this
+ * used to work? 6/2003.)
+ */
+ cp = arg->stptr;
+ prec = 1;
+ /*
+ * First character can be multiple bytes if
+ * it's a multibyte character. Grr.
+ */
+ if (gawk_mb_cur_max > 1) {
+ mbstate_t state;
+ size_t count;
+
+ memset(& state, 0, sizeof(state));
+ count = mbrlen(cp, arg->stlen, & state);
+ if (count != (size_t) -1 && count != (size_t)
-2 && count > 0) {
+ prec = count;
+ /* may need to increase fw so that
padding happens, see pr_tail code */
+ if (fw > 0)
+ fw += count - 1;
+ }
+ }
+ goto pr_tail;
+ case 's':
+ need_format = false;
+ parse_next_arg();
+ arg = force_string(arg);
+ if (fw == 0 && ! have_prec)
+ prec = arg->stlen;
+ else {
+ char_count = mbc_char_count(arg->stptr,
arg->stlen);
+ if (! have_prec || prec > char_count)
+ prec = char_count;
+ }
+ cp = arg->stptr;
+ goto pr_tail;
+ case 'd':
+ case 'i':
+ need_format = false;
+ parse_next_arg();
+ (void) force_number(arg);
+
+ /*
+ * Check for Nan or Inf.
+ */
+ if (out_of_range(arg))
+ goto out_of_range;
+#ifdef HAVE_MPFR
+ if (is_mpg_float(arg))
+ goto mpf0;
+ else if (is_mpg_integer(arg))
+ goto mpz0;
+ else
+#endif
+ tmpval = double_to_int(arg->numbr);
+
+ /*
+ * ``The result of converting a zero value with a
+ * precision of zero is no characters.''
+ */
+ if (have_prec && prec == 0 && tmpval == 0)
+ goto pr_tail;
+
+ if (tmpval < 0) {
+ tmpval = -tmpval;
+ sgn = true;
+ } else {
+ if (tmpval == -0.0)
+ /* avoid printing -0 */
+ tmpval = 0.0;
+ sgn = false;
+ }
+ /*
+ * Use snprintf return value to tell if there
+ * is enough room in the buffer or not.
+ */
+ while ((i = snprintf(cpbufs[1].buf,
+ cpbufs[1].bufsize, "%.0f",
+ tmpval)) >=
+ cpbufs[1].bufsize) {
+ if (cpbufs[1].buf == cpbufs[1].stackbuf)
+ cpbufs[1].buf = NULL;
+ if (i > 0) {
+ cpbufs[1].bufsize += ((i >
cpbufs[1].bufsize) ?
+ i :
cpbufs[1].bufsize);
+ }
+ else
+ cpbufs[1].bufsize *= 2;
+ assert(cpbufs[1].bufsize > 0);
+ erealloc(cpbufs[1].buf, char *,
+ cpbufs[1].bufsize, "format_tree");
+ }
+ if (i < 1)
+ goto out_of_range;
+#if defined(HAVE_LOCALE_H)
+ quote_flag = (quote_flag && loc.thousands_sep[0] != 0);
+#endif
+ chp = &cpbufs[1].buf[i-1];
+ ii = jj = 0;
+ do {
+ PREPEND(*chp);
+ chp--; i--;
+#if defined(HAVE_LOCALE_H)
+ if (quote_flag && loc.grouping[ii] && ++jj ==
loc.grouping[ii]) {
+ if (i) { /* only add if more
digits coming */
+ int k;
+ const char *ts =
loc.thousands_sep;
+
+ for (k = strlen(ts) - 1; k >=
0; k--) {
+ PREPEND(ts[k]);
+ }
+ }
+ if (loc.grouping[ii+1] == 0)
+ jj = 0; /* keep using
current val in loc.grouping[ii] */
+ else if (loc.grouping[ii+1] == CHAR_MAX)
+ quote_flag = false;
+ else {
+ ii++;
+ jj = 0;
+ }
+ }
+#endif
+ } while (i > 0);
+
+ /* add more output digits to match the precision */
+ if (have_prec) {
+ while (cend - cp < prec)
+ PREPEND('0');
+ }
+
+ if (sgn)
+ PREPEND('-');
+ else if (signchar)
+ PREPEND(signchar);
+ /*
+ * When to fill with zeroes is of course not simple.
+ * First: No zero fill if left-justifying.
+ * Next: There seem to be two cases:
+ * A '0' without a precision, e.g. %06d
+ * A precision with no field width, e.g. %.10d
+ * Any other case, we don't want to fill with zeroes.
+ */
+ if (! lj
+ && ((zero_flag && ! have_prec)
+ || (fw == 0 && have_prec)))
+ fill = zero_string;
+ if (prec > fw)
+ fw = prec;
+ prec = cend - cp;
+ if (fw > prec && ! lj && fill != sp
+ && (*cp == '-' || signchar)) {
+ bchunk_one(cp);
+ cp++;
+ prec--;
+ fw--;
+ }
+ goto pr_tail;
+ case 'X':
+ chbuf = Uchbuf; /* FALL THROUGH */
+ case 'x':
+ base += 6; /* FALL THROUGH */
+ case 'u':
+ base += 2; /* FALL THROUGH */
+ case 'o':
+ base += 8;
+ need_format = false;
+ parse_next_arg();
+ (void) force_number(arg);
+
+ if (out_of_range(arg))
+ goto out_of_range;
+#ifdef HAVE_MPFR
+ if (is_mpg_integer(arg)) {
+mpz0:
+ zi = arg->mpg_i;
+
+ if (cs1 != 'd' && cs1 != 'i') {
+ if (mpz_sgn(zi) <= 0) {
+ /*
+ * Negative value or 0 requires
special handling.
+ * Unlike MPFR, GMP does not
allow conversion
+ * to (u)intmax_t. So we first
convert GMP type to
+ * a MPFR type.
+ */
+ mf = mpz2mpfr(zi);
+ goto mpf1;
+ }
+ signchar = '\0'; /* Don't print
'+' */
+ }
+
+ /* See comments above about when to fill with
zeros */
+ zero_flag = (! lj
+ && ((zero_flag && !
have_prec)
+ || (fw == 0 &&
have_prec)));
+
+ fmt_type = have_prec ? MP_INT_WITH_PREC :
MP_INT_WITHOUT_PREC;
+ goto fmt0;
+
+ } else if (is_mpg_float(arg)) {
+mpf0:
+ mf = arg->mpg_numbr;
+ if (! mpfr_number_p(mf)) {
+ /* inf or NaN */
+ cs1 = 'g';
+ fmt_type = MP_FLOAT;
+ goto fmt1;
+ }
+
+ if (cs1 != 'd' && cs1 != 'i') {
+mpf1:
+ /*
+ * The output of printf("%#.0x", 0) is
0 instead of 0x, hence <= in
+ * the comparison below.
+ */
+ if (mpfr_sgn(mf) <= 0) {
+ if (! mpfr_fits_intmax_p(mf,
ROUND_MODE)) {
+ /* -ve number is too
large */
+ cs1 = 'g';
+ fmt_type = MP_FLOAT;
+ goto fmt1;
+ }
+
+ tmpval = uval = (uintmax_t)
mpfr_get_sj(mf, ROUND_MODE);
+ if (! alt && have_prec && prec
== 0 && tmpval == 0)
+ goto pr_tail; /*
printf("%.0x", 0) is no characters */
+ goto int0;
+ }
+ signchar = '\0'; /* Don't print
'+' */
+ }
+
+ /* See comments above about when to fill with
zeros */
+ zero_flag = (! lj
+ && ((zero_flag && !
have_prec)
+ || (fw == 0 &&
have_prec)));
+
+ (void) mpfr_get_z(mpzval, mf, MPFR_RNDZ);
/* convert to GMP integer */
+ fmt_type = have_prec ? MP_INT_WITH_PREC :
MP_INT_WITHOUT_PREC;
+ zi = mpzval;
+ goto fmt0;
+ } else
+#endif
+ tmpval = arg->numbr;
+
+ /*
+ * ``The result of converting a zero value with a
+ * precision of zero is no characters.''
+ *
+ * If I remember the ANSI C standard, though,
+ * it says that for octal conversions
+ * the precision is artificially increased
+ * to add an extra 0 if # is supplied.
+ * Indeed, in C,
+ * printf("%#.0o\n", 0);
+ * prints a single 0.
+ */
+ if (! alt && have_prec && prec == 0 && tmpval == 0)
+ goto pr_tail;
+
+ if (tmpval < 0) {
+ uval = (uintmax_t) (intmax_t) tmpval;
+ if ((AWKNUM)(intmax_t)uval !=
double_to_int(tmpval))
+ goto out_of_range;
+ } else {
+ uval = (uintmax_t) tmpval;
+ if ((AWKNUM)uval != double_to_int(tmpval))
+ goto out_of_range;
+ }
+#ifdef HAVE_MPFR
+ int0:
+#endif
+#if defined(HAVE_LOCALE_H)
+ quote_flag = (quote_flag && loc.thousands_sep[0] != 0);
+#endif
+ /*
+ * When to fill with zeroes is of course not simple.
+ * First: No zero fill if left-justifying.
+ * Next: There seem to be two cases:
+ * A '0' without a precision, e.g. %06d
+ * A precision with no field width, e.g. %.10d
+ * Any other case, we don't want to fill with zeroes.
+ */
+ if (! lj
+ && ((zero_flag && ! have_prec)
+ || (fw == 0 && have_prec)))
+ fill = zero_string;
+ ii = jj = 0;
+ do {
+ PREPEND(chbuf[uval % base]);
+ uval /= base;
+#if defined(HAVE_LOCALE_H)
+ if (base == 10 && quote_flag &&
loc.grouping[ii] && ++jj == loc.grouping[ii]) {
+ if (uval) { /* only add if more
digits coming */
+ int k;
+ const char *ts =
loc.thousands_sep;
+
+ for (k = strlen(ts) - 1; k >=
0; k--) {
+ PREPEND(ts[k]);
+ }
+ }
+ if (loc.grouping[ii+1] == 0)
+ jj = 0; /* keep using
current val in loc.grouping[ii] */
+ else if (loc.grouping[ii+1] == CHAR_MAX)
+ quote_flag = false;
+ else {
+ ii++;
+ jj = 0;
+ }
+ }
+#endif
+ } while (uval > 0);
+
+ /* add more output digits to match the precision */
+ if (have_prec) {
+ while (cend - cp < prec)
+ PREPEND('0');
+ }
+
+ if (alt && tmpval != 0) {
+ if (base == 16) {
+ PREPEND(cs1);
+ PREPEND('0');
+ if (fill != sp) {
+ bchunk(cp, 2);
+ cp += 2;
+ fw -= 2;
+ }
+ } else if (base == 8)
+ PREPEND('0');
+ }
+ base = 0;
+ if (prec > fw)
+ fw = prec;
+ prec = cend - cp;
+ pr_tail:
+ if (! lj) {
+ while (fw > prec) {
+ bchunk_one(fill);
+ fw--;
+ }
+ }
+ copy_count = prec;
+ if (fw == 0 && ! have_prec)
+ ;
+ else if (gawk_mb_cur_max > 1) {
+ if (cs1 == 's') {
+ assert(cp == arg->stptr || cp == cpbuf);
+ copy_count = mbc_byte_count(arg->stptr,
prec);
+ }
+ /* prec was set by code for %c */
+ /* else
+ copy_count = prec; */
+ }
+ bchunk(cp, copy_count);
+ while (fw > prec) {
+ bchunk_one(fill);
+ fw--;
+ }
+ s0 = s1;
+ break;
+
+ out_of_range:
+ /*
+ * out of range - emergency use of %g format,
+ * or format NaN and INF values.
+ */
+ nan_inf_val = format_nan_inf(arg, cs1);
+ if (do_posix || magic_posix_flag || nan_inf_val ==
NULL) {
+ if (do_lint && ! do_posix && ! magic_posix_flag)
+ lintwarn(_("[s]printf: value %g is out
of range for `%%%c' format"),
+ (double)
tmpval, cs1);
+ tmpval = arg->numbr;
+ if (strchr("aAeEfFgG", cs1) == NULL)
+ cs1 = 'g';
+ goto fmt1;
+ } else {
+ if (do_lint)
+ lintwarn(_("[s]printf: value %s is out
of range for `%%%c' format"),
+ nan_inf_val,
cs1);
+ bchunk(nan_inf_val, strlen(nan_inf_val));
+ s0 = s1;
+ break;
+ }
+
+ case 'F':
+#if ! defined(PRINTF_HAS_F_FORMAT) || PRINTF_HAS_F_FORMAT != 1
+ cs1 = 'f';
+ /* FALL THROUGH */
+#endif
+ case 'g':
+ case 'G':
+ case 'e':
+ case 'f':
+ case 'E':
+#if defined(PRINTF_HAS_A_FORMAT) && PRINTF_HAS_A_FORMAT == 1
+ case 'A':
+ case 'a':
+ {
+ static bool warned = false;
+
+ if (do_lint && tolower(cs1) == 'a' && ! warned) {
+ warned = true;
+ lintwarn(_("%%%c format is POSIX standard but
not portable to other awks"), cs1);
+ }
+ }
+#endif
+ need_format = false;
+ parse_next_arg();
+ (void) force_number(arg);
+
+ if (! is_mpg_number(arg))
+ tmpval = arg->numbr;
+#ifdef HAVE_MPFR
+ else if (is_mpg_float(arg)) {
+ mf = arg->mpg_numbr;
+ fmt_type = MP_FLOAT;
+ } else {
+ /* arbitrary-precision integer, convert to MPFR
float */
+ assert(mf == NULL);
+ mf = mpz2mpfr(arg->mpg_i);
+ fmt_type = MP_FLOAT;
+ }
+#endif
+ if (out_of_range(arg))
+ goto out_of_range;
+
+ fmt1:
+ if (! have_prec)
+ prec = DEFAULT_G_PRECISION;
+#ifdef HAVE_MPFR
+ fmt0:
+#endif
+ chksize(fw + prec + 11); /* 11 == slop */
+ cp = cpbuf;
+ *cp++ = '%';
+ if (lj)
+ *cp++ = '-';
+ if (signchar)
+ *cp++ = signchar;
+ if (alt)
+ *cp++ = '#';
+ if (zero_flag)
+ *cp++ = '0';
+ if (quote_flag)
+ *cp++ = '\'';
+
+#if defined(LC_NUMERIC)
+ if (quote_flag && ! use_lc_numeric)
+ setlocale(LC_NUMERIC, "");
+#endif
+
+ bool need_to_add_thousands = false;
+ switch (fmt_type) {
+#ifdef HAVE_MPFR
+ case MP_INT_WITH_PREC:
+ sprintf(cp, "*.*Z%c", cs1);
+ while ((nc = mpfr_snprintf(obufout, ofre, cpbuf,
+ (int) fw, (int) prec, zi)) >=
(int) ofre)
+ chksize(nc)
+ need_to_add_thousands = true;
+ break;
+ case MP_INT_WITHOUT_PREC:
+ sprintf(cp, "*Z%c", cs1);
+ while ((nc = mpfr_snprintf(obufout, ofre, cpbuf,
+ (int) fw, zi)) >= (int) ofre)
+ chksize(nc)
+ need_to_add_thousands = true;
+ break;
+ case MP_FLOAT:
+ sprintf(cp, "*.*R*%c", cs1);
+ while ((nc = mpfr_snprintf(obufout, ofre, cpbuf,
+ (int) fw, (int) prec, ROUND_MODE,
mf)) >= (int) ofre)
+ chksize(nc)
+ break;
+#endif
+ default:
+ if (have_prec || tolower(cs1) != 'a') {
+ sprintf(cp, "*.*%c", cs1);
+ while ((nc = snprintf(obufout, ofre,
cpbuf,
+ (int) fw, (int) prec,
+ (double) tmpval)) >= (int)
ofre)
+ chksize(nc)
+ } else {
+ // For %a and %A, use the default
precision if it
+ // wasn't supplied by the user.
+ sprintf(cp, "*%c", cs1);
+ while ((nc = snprintf(obufout, ofre,
cpbuf,
+ (int) fw,
+ (double) tmpval)) >= (int)
ofre)
+ chksize(nc)
+ }
+ }
+
+#if defined(LC_NUMERIC)
+ if (quote_flag && ! use_lc_numeric)
+ setlocale(LC_NUMERIC, "C");
+#endif
+ len = strlen(obufout);
+ if (quote_flag && need_to_add_thousands) {
+ const char *new_text = add_thousands(obufout/*,
& loc*/);
+
+ len = strlen(new_text);
+ chksize(len)
+ strcpy(obufout, new_text);
+ free((void *) new_text);
+ }
- /* estimate minimum precision for exact conversion */
- prec = mpz_sizeinbase(zi, 2); /* most significant 1 bit position
starting at 1 */
- prec -= (size_t) mpz_scan1(zi, 0); /* least significant 1 bit
index starting at 0 */
- if (prec < MPFR_PREC_MIN)
- prec = MPFR_PREC_MIN;
- else if (prec > MPFR_PREC_MAX)
- prec = MPFR_PREC_MAX;
+ ofre -= len;
+ obufout += len;
+ s0 = s1;
+ break;
+ default:
+ if (do_lint && is_alpha(cs1))
+ lintwarn(_("ignoring unknown format specifier
character `%c': no argument converted"), cs1);
+ break;
+ }
+ if (toofew) {
+ msg("%s\n\t`%s'\n\t%*s%s",
+ _("fatal: not enough arguments to satisfy format
string"),
+ fmt_string, (int) (s1 - fmt_string - 1), "",
+ _("^ ran out for this one"));
+ goto out;
+ }
+ }
+ if (do_lint) {
+ if (need_format)
+ lintwarn(
+ _("[s]printf: format specifier does not have control
letter"));
+ if (cur_arg < num_args)
+ lintwarn(
+ _("too many arguments supplied for format string"));
+ }
+ bchunk(s0, s1 - s0);
+ olen_final = obufout - obuf;
+#define GIVE_BACK_SIZE (INITIAL_OUT_SIZE * 2)
+ if (ofre > GIVE_BACK_SIZE)
+ erealloc(obuf, char *, olen_final + 1, "format_tree");
+ r = make_str_node(obuf, olen_final, ALREADY_MALLOCED);
+ obuf = NULL;
+out:
+ {
+ size_t k;
+ size_t count = sizeof(cpbufs)/sizeof(cpbufs[0]);
+ for (k = 0; k < count; k++) {
+ if (cpbufs[k].buf != cpbufs[k].stackbuf)
+ efree(cpbufs[k].buf);
+ }
+ if (obuf != NULL)
+ efree(obuf);
+ }
- if (! inited) {
- mpfr_init2(mpfrval, prec);
- inited = true;
- } else
- mpfr_set_prec(mpfrval, prec);
- tval = mpfr_set_z(mpfrval, zi, ROUND_MODE);
- IEEE_FMT(mpfrval, tval);
- return mpfrval;
+ if (r == NULL)
+ gawk_exit(EXIT_FATAL);
+ return r;
}
-#endif
-
-/*
- * format_tree() formats arguments of sprintf,
- * and accordingly to a fmt_string providing a format like in
- * printf family from C library. Returns a string node which value
- * is a formatted string. Called by sprintf function.
- *
- * It is one of the uglier parts of gawk. Thanks to Michal Jaegermann
- * for taming this beast and making it compatible with ANSI C.
- */
+#undef bchunk
+#undef bchunk_one
+#undef chksize
NODE *
-format_tree(
+format_tree_new(
const char *fmt_string,
size_t n0,
NODE **the_args,
@@ -146,6 +1195,9 @@ format_tree(
uintmax_t uval;
bool sgn;
int base;
+ bool space_flag;
+ bool plus_flag;
+
/*
* Although this is an array, the elements serve two different
* purposes. The first element is the general buffer meant
@@ -178,6 +1230,7 @@ format_tree(
mpfr_ptr mf;
#endif
enum { MP_NONE = 0, MP_INT_WITH_PREC = 1, MP_INT_WITHOUT_PREC, MP_FLOAT
} fmt_type;
+ struct flags flags;
static const char sp[] = " ";
static const char zero_string[] = "0";
@@ -277,6 +1330,8 @@ format_tree(
zero_flag = false;
quote_flag = false;
nan_inf_val = NULL;
+ space_flag = false;
+ plus_flag = false;
#ifdef HAVE_MPFR
mf = NULL;
zi = NULL;
@@ -290,6 +1345,7 @@ format_tree(
cp = cend;
chbuf = lchbuf;
s1++;
+ memset(& flags, 0, sizeof(flags));
retry:
if (n0-- == 0) /* ran out early! */
@@ -309,11 +1365,15 @@ check_pos:
* since there's no arg converted, the field width
doesn't
* apply. The code already was that way, but this
* comment documents it, at least in the code.
+ *
+ * 27 June 2024:
+ * This is still the case. The 2023 standard says you
shouldn't
+ * have anything between the percents.
*/
if (do_lint) {
const char *msg = NULL;
- if (fw && ! have_prec)
+ if (fw != 0 && ! have_prec)
msg = _("field width is ignored for
`%%' specifier");
else if (fw == 0 && have_prec)
msg = _("precision is ignored for `%%'
specifier");
@@ -386,6 +1446,7 @@ check_pos:
goto out;
}
} else {
+ // FIXME
msg(_("fatal: `$' not permitted after period in
format"));
goto out;
}
@@ -439,10 +1500,13 @@ check_pos:
case ' ': /* print ' ' or '-' */
/* 'space' flag is ignored */
/* if '+' already present */
+ space_flag = true;
if (signchar != false)
goto check_pos;
- /* FALL THROUGH */
+ signchar = cs1;
+ goto check_pos;
case '+': /* print '+' or '-' */
+ plus_flag = true;
signchar = cs1;
goto check_pos;
case '-':
@@ -499,6 +1563,9 @@ check_pos:
break;
magic_posix_flag = true;
goto retry;
+ case 'C': // POSIX 2024
+ cs1 = 'c';
+ // FALL THROUGH
case 'c':
need_format = false;
parse_next_arg();
@@ -578,6 +1645,9 @@ out0:
}
}
goto pr_tail;
+ case 'S': // POSIX 2024
+ cs1 = 's';
+ // FALL THROUGH
case 's':
need_format = false;
parse_next_arg();
@@ -724,6 +1794,22 @@ out0:
parse_next_arg();
(void) force_number(arg);
+#define set_flags() \
+ flags.left_just = lj; \
+ flags.alt = alt; \
+ flags.zero = zero_flag; \
+ flags.space = space_flag; \
+ flags.plus = plus_flag; \
+ flags.quote = quote_flag; \
+ flags.have_prec = have_prec; \
+ flags.format = cs1; \
+ flags.base = base; \
+ flags.field_width = fw; \
+ flags.precision = prec
+
+ set_flags();
+
+
if (out_of_range(arg))
goto out_of_range;
#ifdef HAVE_MPFR
@@ -964,6 +2050,20 @@ mpf1:
parse_next_arg();
(void) force_number(arg);
+ set_flags();
+ {
+ const char *formatted = format_float(arg, & flags);
+ len = strlen(formatted);
+ chksize(len)
+ strcpy(obufout, formatted);
+ free((void *) formatted);
+
+ ofre -= len;
+ obufout += len;
+ s0 = s1;
+ break;
+ }
+
if (! is_mpg_number(arg))
tmpval = arg->numbr;
#ifdef HAVE_MPFR
@@ -1053,7 +2153,7 @@ mpf1:
#endif
len = strlen(obufout);
if (quote_flag && need_to_add_thousands) {
- const char *new_text = add_thousands(obufout, &
loc);
+ const char *new_text = add_thousands(obufout);
len = strlen(new_text);
chksize(len)
@@ -1298,7 +2398,7 @@ out_of_range(NODE *n)
return (isnan(n->numbr) || isinf(n->numbr));
}
-/* format_nan_inf --- format NaN and INF values */
+/* format_nan_inf --- format NaN and INF values. return value is to a static
buffer */
char *
format_nan_inf(NODE *n, char format)
@@ -1349,7 +2449,6 @@ fmt:
}
-
/* reverse --- reverse the contents of a string in place */
static void
@@ -1365,7 +2464,7 @@ reverse(char *str)
}
}
-/* add_thousands --- add the thousands separator. Needed for MPFR %d format */
+/* add_thousands --- add the thousands separator. caller free the return value
*/
/*
* Copy the source string into the destination string, backwards,
@@ -1375,25 +2474,26 @@ reverse(char *str)
*/
static const char *
-add_thousands(const char *original, struct lconv *loc)
+add_thousands(const char *original)
{
size_t orig_len = strlen(original);
- size_t new_len = orig_len + (orig_len * strlen(loc->thousands_sep)) +
1; // worst case
+ size_t new_len = orig_len + 1;
char *newbuf;
- char decimal_point = '\0';
- const char *dec = NULL;
const char *src;
char *dest;
emalloc(newbuf, char *, new_len, "add_thousands");
memset(newbuf, '\0', new_len);
+#if defined(HAVE_LOCALE_H)
+ new_len = orig_len + (orig_len * strlen(loc.thousands_sep)) + 1;
// worst case
src = original + strlen(original) - 1;
dest = newbuf;
- if (loc->decimal_point[0] != '\0') {
- decimal_point = loc->decimal_point[0];
- if ((dec = strchr(original, decimal_point)) != NULL) {
+ if (loc.decimal_point[0] != '\0') {
+ const char *dec = NULL;
+
+ if ((dec = strchr(original, loc.decimal_point[0])) != NULL) {
while (src >= dec)
*dest++ = *src--;
}
@@ -1404,17 +2504,17 @@ add_thousands(const char *original, struct lconv *loc)
int jj = 0;
do {
*dest++ = *src--;
- if (loc->grouping[ii] && ++jj == loc->grouping[ii]) {
+ if (loc.grouping[ii] && ++jj == loc.grouping[ii]) {
if (src >= original) { /* only add if more digits
coming */
- const char *ts = loc->thousands_sep;
+ const char *ts = loc.thousands_sep;
int k;
for (k = strlen(ts) - 1; k >= 0; k--)
*dest++ = ts[k];
}
- if (loc->grouping[ii+1] == 0)
+ if (loc.grouping[ii+1] == 0)
jj = 0; /* keep using current val in
loc.grouping[ii] */
- else if (loc->grouping[ii+1] == CHAR_MAX) {
+ else if (loc.grouping[ii+1] == CHAR_MAX) {
// copy in the rest and be done
while (src >= original)
*dest++ = *src--;
@@ -1428,6 +2528,491 @@ add_thousands(const char *original, struct lconv *loc)
*dest++ = '\0';
reverse(newbuf);
+#else
+ strcpy(newbuf, original);
+#endif
return newbuf;
}
+
+/* format_integer_value --- format just the actual value of an integer. caller
frees return value */
+
+static const char *
+format_integer_value(NODE *arg, struct flags *flags)
+{
+#define VALUE_SIZE 40
+ char *buf = NULL;
+ size_t buflen;
+ static const char lchbuf[] = "0123456789abcdef";
+ static const char Uchbuf[] = "0123456789ABCDEF";
+ const char *chbuf;
+ char *cp;
+ bool quote_flag = false;
+ bool negative = false;
+ uintmax_t uval;
+
+#define growbuffer(buf, buflen, cp) { \
+ erealloc(buf, char *, buflen * 2, "format_integer"); \
+ cp = buf + buflen; \
+ buflen *= 2; \
+ }
+
+#if defined(HAVE_LOCALE_H)
+ quote_flag = (flags->quote && loc.thousands_sep[0] != '\0');
+#endif
+
+ chbuf = (flags->format == 'X' ? Uchbuf : lchbuf);
+ emalloc(buf, char *, VALUE_SIZE, "format_integer_value");
+ buflen = VALUE_SIZE;
+ cp = buf;
+
+ // C 2023 says negative zeros get a minus sign
+ if (flags->base == 10 && (arg->numbr < 0 || arg->numbr == -0)) {
+ negative = true;
+ arg->numbr = -arg->numbr;
+ }
+ uval = get_number_uj(arg);
+
+ // generate the digits backwards.
+ do {
+ if (cp >= buf + buflen)
+ growbuffer(buf, buflen, cp);
+
+ *cp++ = chbuf[uval % flags->base];
+ uval /= flags->base;
+ } while (uval > 0);
+ if (negative)
+ *cp++ = '-';
+ *cp = '\0';
+
+ // turn it back around
+ reverse(buf);
+
+ if (flags->base == 10 && quote_flag) {
+ const char *with_commas = add_thousands(buf);
+
+ free((void *) buf);
+ buf = (char *) with_commas;
+ }
+
+ return buf;
+}
+
+/* format_integer --- format a signed or unsigned integer value. caller frees
return value */
+
+static const char *
+format_integer(NODE *arg, struct flags *flags)
+{
+ const char *number_value;
+ double tmpval;
+ char *buf1 = NULL;
+ size_t buflen;
+ char *buf2 = NULL;
+ uintmax_t uval;
+ char fill[] = " ";
+
+ if (out_of_range(arg))
+ return format_out_of_range(arg, flags);
+
+ if (is_mpg_integer(arg) || is_mpg_float(arg))
+ return format_mpg_integer(arg, flags);
+
+ tmpval = arg->numbr;
+ if (tmpval < 0) {
+ uval = (uintmax_t) (intmax_t) tmpval;
+ if ((AWKNUM)(intmax_t)uval != double_to_int(tmpval))
+ return format_out_of_range(arg, flags);
+ } else {
+ uval = (intmax_t) tmpval;
+ if ((AWKNUM)uval != double_to_int(tmpval))
+ return format_out_of_range(arg, flags);
+ }
+
+ // we now have an integer we can format, do so
+ number_value = format_integer_value(arg, flags);
+
+ // We now have the initial *integer* decimal, octal, or hex value in
hand.
+ // If it's decimal, we've added commas if appropriate. If it's negative
+ // and decimal, it has a minus sign.
+
+ // The next step is deal with the rest of the printf flags.
+
+ if (tmpval == 0 && flags->field_width == 0
+ && ! flags->have_prec) {
+ // relatively simple case
+ if (flags->base == 16 && flags->alt) {
+ size_t len = strlen(number_value) + 2 + 1;
+
+ emalloc(buf1, char *, len, "format_integer");
+ sprintf(buf1, "0%c%s", flags->format, number_value);
+ free((void *) number_value);
+
+ return buf1;
+ } else if (flags->base == 10 && (flags->plus || flags->space)
+ && number_value[0] != '-') {
+ size_t len = strlen(number_value) + 1 + 1;
+
+ emalloc(buf1, char *, len, "format_integer");
+ if (flags->plus)
+ sprintf(buf1, "+%s", number_value);
+ else
+ sprintf(buf1, " %s", number_value);
+ free((void *) number_value);
+
+ return buf1;
+ } else
+ return number_value;
+ }
+
+ // Now it gets messy...
+
+
+ /*
+ * ``The result of converting a zero value with a
+ * precision of zero is no characters.''
+ *
+ * If I remember the ANSI C standard, though,
+ * it says that for octal conversions
+ * the precision is artificially increased
+ * to add an extra 0 if # is supplied.
+ * Indeed, in C,
+ * printf("%#.0o\n", 0);
+ * prints a single 0.
+ */
+ if (! flags->alt && flags->have_prec && flags->precision == 0 && tmpval
== 0) {
+ if (flags->base == 8)
+ flags->precision = 1;
+ else {
+ buf1 = (char *) number_value;
+ goto pr_tail;
+ }
+ }
+
+ if (compute_zero_flag(flags))
+ fill[0] = '0'; // fill with zeros
+
+ /* add more output digits to match the precision */
+ if (flags->have_prec && flags->precision > 0) {
+ bool negative = (number_value[0] == '-');
+ size_t len = strlen(number_value);
+ char *cp1;
+
+ len += flags->precision + 1;
+ emalloc(buf1, char *, len, "format_integer");
+ strcpy(buf1, number_value);
+
+ reverse(buf1);
+ cp1 = buf1 + len;
+ if (negative)
+ cp1--; // overwrite the '-', we'll add it back in a
minute
+ while (cp1 - buf1 < flags->precision) {
+ if (cp1 - buf1 >= len)
+ growbuffer(buf1, len, cp1);
+
+ *cp1++ = '0';
+ }
+ if (negative)
+ *cp1++ = '-';
+ *cp1 = '\0';
+ reverse(buf1);
+ } else
+ buf1 = (char *) number_value;
+
+ if (flags->alt) {
+ if (flags->base == 16 && tmpval != 0) {
+ if (fill[0] == '0') {
+ emalloc(buf2, char *, strlen(buf1) + 3,
"format_integer");
+ sprintf(buf2, "0%c%s", flags->format, buf1);
+ flags->field_width -= 2;
+ free((void *) buf1);
+ buf1 = buf2;
+ }
+ } else if (flags->base == 8 && tmpval == 0) {
+ assert(number_value[0] == '\0');
+ emalloc(buf2, char *, 2, "format_integer");
+ strcpy(buf2, "0");
+ flags->field_width--;
+ free((void *) buf1);
+ buf1 = buf2;
+ }
+ }
+ // deal with space or plus flags
+
+ if (flags->precision > flags->field_width)
+ flags->field_width = flags->precision;
+ flags->precision = strlen(buf1);
+pr_tail:
+ int st_len = strlen(buf1);
+ buflen = flags->field_width;
+ if (buflen < st_len)
+ buflen = st_len;
+ if (flags->plus || flags->space)
+ buflen++;
+ buflen++; // for '\0'
+
+ emalloc(buf2, char *, buflen, "format_integer");
+ char *cp = buf2;
+
+ /*
+ * Order:
+ * 1. Create the number. This is what's in buf1.
+ * 2. If zero flag, pad number to field width.
+ * 3. If plus flag, first leading zero converts to +.
+ * 4. Else if space flag, first leading zero converts to space.
+ * 5. If not padded, just, insert plus or space if need be.
+ *
+ * If we have not padded, and the field width is > length of
+ * number, two cases:
+ * 1. Left justified, stick plus or space in front of number,
+ * then the number, then the spaces.
+ * 2. Right justified, do spaces, then plus, then number.
+ *
+ * Plus sign is only added if not a negative number and if signed;
+ * in practice this means base == 10.
+ */
+ bool padded = false;
+ int padlen = buflen - st_len;
+
+ if (flags->zero) {
+ for (; padlen > 0; padlen--)
+ *cp++ = '0';
+ strcpy(cp, buf1);
+ padded = true;
+ }
+
+ if (padded) {
+ buf2[0] = (flags->plus ? '+' :
+ flags->space ? ' ' : '0');
+ } else if (flags->plus || flags->space) {
+ sprintf(buf2, "%c%s",
+ flags->plus ? '+' : ' ', buf1);
+ }
+
+
+ if (flags->left_just) {
+ } else {
+ // normal - spaces or zeros first.
+ if (flags->plus && fill[0] == '0') {
+ *cp++ = '+';
+ flags->field_width--;
+ }
+ while (flags->field_width > flags->precision) {
+ *cp++ = fill[0];
+ flags->field_width--;
+ }
+ if (flags->plus && fill[0] == ' ')
+ *--cp = '+';
+ }
+ // bchunk(cp, buf1);
+ free((void *) buf1);
+ while (flags->field_width > flags->precision) {
+ // bchunk_one(fill);
+ flags->field_width--;
+ }
+
+ return buf1;
+}
+
+/* format_out_of_range --- format an out of range value as %g. caller frees
return value */
+
+static const char *
+format_out_of_range(NODE *arg, struct flags *flags)
+{
+ /*
+ * out of range - emergency use of %g format,
+ * or format NaN and INF values.
+ */
+
+ // nan_inf_val points to a static buffer, don't free it.
+ const char *nan_inf_val = format_nan_inf(arg, flags->format);
+
+ if (do_posix || flags->magic_posix_flag || nan_inf_val == NULL) {
+ if (do_lint && ! do_posix && ! flags->magic_posix_flag)
+ lintwarn(_("[s]printf: value %g is out of range for
`%%%c' format"),
+ (double) arg->numbr,
+ flags->format);
+
+ if (strchr("aAeEfFgG", flags->format) == NULL)
+ flags->format = 'g';
+
+ return format_float(arg, flags);
+ }
+
+ if (do_lint)
+ lintwarn(_("[s]printf: value %s is out of range for `%%%c'
format"),
+ nan_inf_val, flags->format);
+
+ // A NaN or Inf, deal with a field width, if any
+ size_t len = strlen(nan_inf_val);
+ if (flags->field_width > len) {
+ char *cp, *buf;
+ int fw = flags->field_width;
+
+ emalloc(buf, char *, fw + 1, "format_out_of_range");
+ if (flags->left_just) {
+ strcpy(cp, nan_inf_val);
+ cp += len;
+ for (cp = buf; fw > len; fw--)
+ *cp++ = ' ';
+ *cp = '\0';
+ } else {
+ for (cp = buf; fw > len; fw--)
+ *cp++ = ' ';
+ strcpy(cp, nan_inf_val);
+ }
+
+ return buf;
+ }
+
+ return strdup(nan_inf_val);
+}
+
+/* format_float --- format a floating point number. caller frees return value
*/
+
+static const char *
+format_float(NODE *arg, struct flags *flags)
+{
+ char *buf;
+ size_t buflen;
+ char *cp;
+#undef cpbuf
+ char cpbuf[100];
+
+ double tmpval;
+#ifdef HAVE_MPFR
+ mpz_ptr zi;
+ mpfr_ptr mf;
+#endif
+ bool quote_flag = false;
+ int nc;
+ bool mpfr_format = false;
+
+ if (out_of_range(arg))
+ return format_out_of_range(arg, flags);
+
+ (void) force_number(arg);
+
+ if (! is_mpg_number(arg))
+ tmpval = arg->numbr;
+#ifdef HAVE_MPFR
+ else if (is_mpg_float(arg)) {
+ mf = arg->mpg_numbr;
+ mpfr_format = true;
+ } else {
+ /* arbitrary-precision integer, convert to MPFR float */
+ assert(mf == NULL);
+ mf = mpz2mpfr(arg->mpg_i);
+ mpfr_format = true;
+ }
+#endif
+
+ if (! flags->have_prec)
+ flags->precision = DEFAULT_G_PRECISION;
+
+#if defined(HAVE_LOCALE_H)
+ quote_flag = (flags->quote && loc.thousands_sep[0] != 0);
+#endif
+
+ buflen = flags->field_width + flags->precision + 11; /* 11 == slop */
+ emalloc(buf, char *, buflen, "format_float");
+
+ int signchar = '\0';
+ if (flags->plus)
+ signchar = '+';
+ else if (flags->space)
+ signchar = ' ';
+
+ cp = cpbuf;
+ *cp++ = '%';
+ if (flags->left_just)
+ *cp++ = '-';
+ if (signchar)
+ *cp++ = signchar;
+ if (flags->alt)
+ *cp++ = '#';
+ if (flags->zero)
+ *cp++ = '0';
+ if (quote_flag)
+ *cp++ = '\'';
+
+#if defined(LC_NUMERIC)
+ if (quote_flag && ! use_lc_numeric)
+ setlocale(LC_NUMERIC, "");
+#endif
+
+ bool need_to_add_thousands = false;
+ if (mpfr_format) {
+#ifdef HAVE_MPFR
+ sprintf(cp, "*.*R*%c", flags->format);
+ while ((nc = mpfr_snprintf(buf, buflen, cpbuf,
+ flags->field_width, flags->precision, ROUND_MODE,
mf)) >= (int) buflen) {
+ erealloc(buf, char *, buflen * 2, "format_float");
+ buflen *= 2;
+ }
+#else
+ cant_happen("trying to format GMP/MPFR number");
+#endif
+ } else {
+ if (flags->have_prec || tolower(flags->format) != 'a') {
+ sprintf(cp, "*.*%c", flags->format);
+ while ((nc = snprintf(buf, buflen, cpbuf,
+ flags->field_width, flags->precision,
+ (double) tmpval)) >= (int) buflen) {
+ erealloc(buf, char *, buflen * 2,
"format_float");
+ buflen *= 2;
+ }
+ } else {
+ // For %a and %A, use the default precision if it
+ // wasn't supplied by the user.
+ sprintf(cp, "*%c", flags->format);
+ while ((nc = snprintf(buf, buflen, cpbuf,
+ flags->field_width,
+ (double) tmpval)) >= (int) buflen) {
+ erealloc(buf, char *, buflen * 2,
"format_float");
+ buflen *= 2;
+ }
+ }
+ }
+
+#if defined(LC_NUMERIC)
+ if (quote_flag && ! use_lc_numeric)
+ setlocale(LC_NUMERIC, "C");
+#endif
+ if (quote_flag && need_to_add_thousands) {
+ const char *new_text = add_thousands(buf);
+
+ free((void *) buf);
+ buf = (char *) new_text;
+ }
+ return buf;
+}
+
+/* compute_zero_flag --- return true if we want to fill with zeros */
+
+static bool
+compute_zero_flag(struct flags *flags)
+{
+ bool zero_flag;
+
+ /*
+ * When to fill with zeroes is of course not simple.
+ * First: No zero fill if left-justifying.
+ * Next: There seem to be two cases:
+ * A '0' without a precision, e.g. %06d
+ * A precision with no field width, e.g. %.10d
+ * Any other case, we don't want to fill with zeroes.
+ */
+ zero_flag = (! flags->left_just
+ && ((flags->zero && ! flags->have_prec)
+ || (flags->field_width == 0 && flags->have_prec)));
+
+ return zero_flag;
+}
+
+/* format_mpg_integer --- format an MPZ or MPFR integer. caller frees return
value */
+
+static const char *
+format_mpg_integer(NODE *arg, struct flags *flags)
+{
+ return strdup("mpg_int");
+}
-----------------------------------------------------------------------
Summary of changes:
ChangeLog | 15 +
awk.h | 4 +-
main.c | 7 +
printf.c | 1689 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
4 files changed, 1662 insertions(+), 53 deletions(-)
hooks/post-receive
--
gawk
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [SCM] gawk branch, stable/printf-rework, updated. gawk-4.1.0-5490-g8d18169d,
Arnold Robbins <=