bug-gawk
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: print: unexpected gawk's behaviour in case indirect function call


From: arnold
Subject: Re: print: unexpected gawk's behaviour in case indirect function call
Date: Mon, 07 Feb 2022 08:18:53 -0700
User-agent: Heirloom mailx 12.5 7/5/10

Greetings.

Thanks for this report. The problem was mismanagement of the stack
when doing an indirect call of builtin or extension functions.
The fix is below.  The primary fix is in interpret.h, but the
subsequent emails showed that gawk wasn't protecting against calling
builtins with an incorrect number of arguments, so that also is fixed.

I still have documentation updates to make and test suite additions
to do, but the patch below should cover the code.

Arnold

Denis Shirokov <cosmogen@gmail.com> wrote:

> Hi Gawk Team!
>
> i found strange gawk's behaviour that is completely makes me crazy
>
> example:
>
>       BEGIN{
>             print "DIRECT:   "      match( a, b )
>             f = "match"
>             print "INDIRECT: "      @f( a, b )
>             print "END" }
>
> output:
>
>       D:\CPU\DEV\PROJECT\XASM>gawk -f ./a.txt
>       DIRECT:   1
>       match1
>      < 'match' ??? wtf?
>       END
>
> it's looks like it's lost leading "INDIRECT: " string and then outputs
> content of the globvar `f: "match" and THEN indirectly call built-in
> match() function and output it's result.
>
> please note that this behaviour was found while reporting about
> another gawk issue that's will be reported soon. the provided example
> is from there,
>
> Windows 10(x64)
> GNU Awk 5.1.1, API: 3.1 (GNU MPFR 4.0.2, GNU MP 6.1.2)
>
> Kind Regards
> Denis

------------- cut here ---------------
diff --git a/ChangeLog b/ChangeLog
index 6f89d61..6f99be9 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,35 @@
+2022-02-07         Arnold D. Robbins     <arnold@skeeve.com>
+
+       Continue fixing indirect calls of builtins.
+
+       * awk.h (check_exact_args, check_args_min_max): Add declarations.
+       * builtin.c (check_exact_args, check_args_min_max): New functions.
+       (do_exp, do_fflush, do_index, do_int, do_isarray, do_length, do_log,
+       do_sqrt, do_strftime, do_systime, do_mktime, do_system, do_tolower,
+       do_toupper, do_atan2, do_sin, do_cos, do_rand, do_srand, do_match,
+       do_sub, do_lshift, do_rshift, do_compl, do_strtonum, do_dcgettext,
+       do_dcngettext, do_bindtextdomain, do_intdiv, do_typeof): Call
+       the argument checking functions.
+       (call_sub, call_match): Manually check argument count.
+       * field.c (do_split, do_patsplit): Call the argument checking
+       functions.
+       * interpret.h (r_interpret): For indirect call of extension functions,
+       pop the function name off the stack when done.
+       * mpfr.c (do_atan2, do_mpfr_func, do_mpfr_int, do_mpfr_compl,
+       do_mpfr_lshift, do_mpfr_rshift, do_mpfr_strtonum, do_mpfr_rand,
+       do_mpfr_srand, do_mpfr_intdiv): Call the argument checking functions.
+
+2022-02-04         Arnold D. Robbins     <arnold@skeeve.com>
+
+       Start fixing issues with indirect calls of builtins.
+       Thanks to Denis Shirokov <cosmogen@gmail.com> for the initial report.
+       Much more remains to be done.
+
+       * builtin.c (do_length): Check number of arguments, fatal if not one.
+       If passed Node_var_new, turn it into the null string.
+       * interpret.h (r_interpret): For Op_indirect_call, pop the function
+       name off the stack.
+
 2022-01-05         Arnold D. Robbins     <arnold@skeeve.com>
 
        * awkgram.y (change_namespace): New function. Extracted from
diff --git a/awk.h b/awk.h
index 9b09eab..09c929c 100644
--- a/awk.h
+++ b/awk.h
@@ -1514,6 +1514,8 @@ extern int strncasecmpmbs(const unsigned char *,
                          const unsigned char *, size_t);
 extern int sanitize_exit_status(int status);
 extern void check_symtab_functab(NODE *dest, const char *fname, const char 
*msg);
+extern void check_exact_args(int nargs, const char *fname, int count);
+extern void check_args_min_max(int nargs, const char *fname, int min, int max);
 /* debug.c */
 extern void init_debug(void);
 extern int debug_prog(INSTRUCTION *pc);
diff --git a/builtin.c b/builtin.c
index 91cd142..060b2f2 100644
--- a/builtin.c
+++ b/builtin.c
@@ -90,6 +90,22 @@ fatal(_("attempt to use array `%s' in a scalar context"), 
array_vname(s1)); \
 }} while (false)
 
 
+/* check argument counts --- for use when called indirectly */
+
+void
+check_exact_args(int nargs, const char *fname, int count)
+{
+       if (nargs != count)
+               fatal(_("%s: called with %d arguments"), fname, nargs);
+}
+
+void
+check_args_min_max(int nargs, const char *fname, int min, int max)
+{
+       if (nargs < min || nargs > max)
+               fatal(_("%s: called with %d arguments"), fname, nargs);
+}
+
 /*
  * Since we supply the version of random(), we know what
  * value to use here.
@@ -173,6 +189,8 @@ do_exp(int nargs)
        NODE *tmp;
        double d, res;
 
+       check_exact_args(nargs, "exp", 1);
+
        tmp = POP_SCALAR();
        if (do_lint && (fixtype(tmp)->flags & NUMBER) == 0)
                lintwarn(_("%s: received non-numeric argument"), "exp");
@@ -235,6 +253,8 @@ do_fflush(int nargs)
         * Now, both calls flush everything.
         */
 
+       check_args_min_max(nargs, "fflush", 0, 1);
+
        /* fflush() */
        if (nargs == 0) {
                status = flush_io();    // ERRNO updated
@@ -381,6 +401,8 @@ do_index(int nargs)
        bool do_single_byte = false;
        mbstate_t mbs1, mbs2;
 
+       check_exact_args(nargs, "index", 2);
+
        if (gawk_mb_cur_max > 1) {
                memset(& mbs1, 0, sizeof(mbstate_t));
                memset(& mbs2, 0, sizeof(mbstate_t));
@@ -503,6 +525,8 @@ do_int(int nargs)
        NODE *tmp;
        double d;
 
+       check_exact_args(nargs, "int", 1);
+
        tmp = POP_SCALAR();
        if (do_lint && (fixtype(tmp)->flags & NUMBER) == 0)
                lintwarn(_("%s: received non-numeric argument"), "int");
@@ -520,6 +544,8 @@ do_isarray(int nargs)
        NODE *tmp;
        int ret = 1;
 
+       check_exact_args(nargs, "isarray", 1);
+
        tmp = POP();
        if (tmp->type != Node_var_array) {
                ret = 0;
@@ -538,6 +564,8 @@ do_length(int nargs)
        NODE *tmp;
        size_t len;
 
+       check_exact_args(nargs, "length", 1);
+
        tmp = POP();
        if (tmp->type == Node_var_array) {
                static bool warned = false;
@@ -561,6 +589,10 @@ do_length(int nargs)
 
                size = assoc_length(tmp);
                return make_number(size);
+       } else if (tmp->type == Node_var_new) {
+               // this can happen from an indirect call
+               DEREF(tmp);
+               tmp = dupnode(Nnull_string);
        }
 
        assert(tmp->type == Node_val);
@@ -593,6 +625,8 @@ do_log(int nargs)
        NODE *tmp;
        double d, arg;
 
+       check_exact_args(nargs, "log", 1);
+
        tmp = POP_SCALAR();
        if (do_lint && (fixtype(tmp)->flags & NUMBER) == 0)
                lintwarn(_("%s: received non-numeric argument"), "log");
@@ -1796,6 +1830,8 @@ do_sqrt(int nargs)
        NODE *tmp;
        double arg;
 
+       check_exact_args(nargs, "sqrt", 1);
+
        tmp = POP_SCALAR();
        if (do_lint && (fixtype(tmp)->flags & NUMBER) == 0)
                lintwarn(_("%s: received non-numeric argument"), "sqrt");
@@ -1818,6 +1854,8 @@ do_substr(int nargs)
        double d_index = 0, d_length = 0;
        size_t src_len;
 
+       check_args_min_max(nargs, "substr", 2, 3);
+
        if (nargs == 3) {
                t1 = POP_NUMBER();
                d_length = get_number_d(t1);
@@ -1982,6 +2020,8 @@ do_strftime(int nargs)
        (void) time(& fclock);  /* current time of day */
        do_gmt = false;
 
+       check_args_min_max(nargs, "strftime", 0, 3);
+
        if (PROCINFO_node != NULL) {
                sub = make_string("strftime", 8);
                val = in_array(PROCINFO_node, sub);
@@ -2098,6 +2138,8 @@ do_systime(int nargs ATTRIBUTE_UNUSED)
 {
        time_t lclock;
 
+       check_exact_args(nargs, "systime", 0);
+
        (void) time(& lclock);
        return make_number((AWKNUM) lclock);
 }
@@ -2116,6 +2158,8 @@ do_mktime(int nargs)
        char save;
        bool do_gmt;
 
+       check_args_min_max(nargs, "mktime", 1, 2);
+
        if (nargs == 2) {
                t2 = POP_SCALAR();
                do_gmt = boolval(t2);
@@ -2179,6 +2223,8 @@ do_system(int nargs)
        char save;
        int status;
 
+       check_exact_args(nargs, "system", 1);
+
        if (do_sandbox)
                fatal(_("'system' function not allowed in sandbox mode"));
 
@@ -2435,6 +2481,8 @@ do_tolower(int nargs)
 {
        NODE *t1, *t2;
 
+       check_exact_args(nargs, "tolower", 1);
+
        t1 = POP_SCALAR();
        if (do_lint && (fixtype(t1)->flags & STRING) == 0)
                lintwarn(_("%s: received non-string argument"), "tolower");
@@ -2466,6 +2514,8 @@ do_toupper(int nargs)
 {
        NODE *t1, *t2;
 
+       check_exact_args(nargs, "toupper", 1);
+
        t1 = POP_SCALAR();
        if (do_lint && (fixtype(t1)->flags & STRING) == 0)
                lintwarn(_("%s: received non-string argument"), "toupper");
@@ -2498,6 +2548,8 @@ do_atan2(int nargs)
        NODE *t1, *t2;
        double d1, d2;
 
+       check_exact_args(nargs, "atan2", 2);
+
        POP_TWO_SCALARS(t1, t2);
        if (do_lint) {
                if ((fixtype(t1)->flags & NUMBER) == 0)
@@ -2520,6 +2572,8 @@ do_sin(int nargs)
        NODE *tmp;
        double d;
 
+       check_exact_args(nargs, "sin", 1);
+
        tmp = POP_SCALAR();
        if (do_lint && (fixtype(tmp)->flags & NUMBER) == 0)
                lintwarn(_("%s: received non-numeric argument"), "sin");
@@ -2536,6 +2590,8 @@ do_cos(int nargs)
        NODE *tmp;
        double d;
 
+       check_exact_args(nargs, "cos", 1);
+
        tmp = POP_SCALAR();
        if (do_lint && (fixtype(tmp)->flags & NUMBER) == 0)
                lintwarn(_("%s: received non-numeric argument"), "cos");
@@ -2557,6 +2613,9 @@ NODE *
 do_rand(int nargs ATTRIBUTE_UNUSED)
 {
        double tmprand;
+
+       check_exact_args(nargs, "rand", 0);
+
 #define RAND_DIVISOR ((double)GAWK_RANDOM_MAX+1.0)
        if (firstrand) {
                (void) initstate((unsigned) 1, state, SIZEOF_STATE);
@@ -2647,6 +2706,8 @@ do_srand(int nargs)
                (void) setstate(state);
        }
 
+       check_args_min_max(nargs, "srand", 0, 1);
+
        if (nargs == 0)
                srandom((unsigned int) (save_seed = (long) time((time_t *) 0)));
        else {
@@ -2679,6 +2740,8 @@ do_match(int nargs)
        char *subsepstr;
        size_t subseplen;
 
+       check_args_min_max(nargs, "match", 2, 3);
+
        dest = NULL;
        if (nargs == 3) {       /* 3rd optional arg for the subpatterns */
                dest = POP_PARAM();
@@ -2907,6 +2970,8 @@ do_sub(int nargs, unsigned int flags)
                double d;
                NODE *glob_flag;
 
+               check_exact_args(nargs, "gensub", 4);
+
                tmp = PEEK(3);
                rp = re_update(tmp);
 
@@ -2935,6 +3000,12 @@ do_sub(int nargs, unsigned int flags)
                }
                DEREF(glob_flag);
        } else {
+               if ((flags & GSUB) != 0) {
+                       check_exact_args(nargs, "gsub", 3);
+               } else {
+                       check_exact_args(nargs, "sub", 3);
+               }
+
                /* take care of regexp early, in case re_update is fatal */
 
                tmp = PEEK(2);
@@ -3301,6 +3372,9 @@ call_sub(const char *name, int nargs)
                PUSH_ADDRESS(lhs);
        } else {
                /* gensub */
+               if (nargs < 3 || nargs > 4)
+                       fatal(_("indirect call to gensub requires three to four 
arguments"));
+
                if (nargs == 4)
                        rhs = POP();
                else
@@ -3360,6 +3434,9 @@ call_match(int nargs)
        NODE *regex, *text, *array;
        NODE *result;
 
+       if (nargs < 2 || nargs > 3)
+               fatal(_("indirect call to match requires two or three 
arguments"));
+
        regex = text = array = NULL;
        if (nargs == 3)
                array = POP();
@@ -3401,8 +3478,8 @@ call_split_func(const char *name, int nargs)
        NODE *result;
 
        regex = seps = NULL;
-       if (nargs < 2)
-               fatal(_("indirect call to %s requires at least two arguments"),
+       if (nargs < 2 || nargs > 4)
+               fatal(_("indirect call to %s requires two to four arguments"),
                                name);
 
        if (nargs == 4)
@@ -3466,6 +3543,8 @@ do_lshift(int nargs)
        uintmax_t uval, ushift, res;
        AWKNUM val, shift;
 
+       check_exact_args(nargs, "lshift", 2);
+
        POP_TWO_SCALARS(s1, s2);
        if (do_lint) {
                if ((fixtype(s1)->flags & NUMBER) == 0)
@@ -3505,6 +3584,8 @@ do_rshift(int nargs)
        uintmax_t uval, ushift, res;
        AWKNUM val, shift;
 
+       check_exact_args(nargs, "rshift", 2);
+
        POP_TWO_SCALARS(s1, s2);
        if (do_lint) {
                if ((fixtype(s1)->flags & NUMBER) == 0)
@@ -3637,6 +3718,8 @@ do_compl(int nargs)
        double d;
        uintmax_t uval;
 
+       check_exact_args(nargs, "compl", 1);
+
        tmp = POP_SCALAR();
        if (do_lint && (fixtype(tmp)->flags & NUMBER) == 0)
                lintwarn(_("%s: received non-numeric argument"), "compl");
@@ -3662,6 +3745,8 @@ do_strtonum(int nargs)
        NODE *tmp;
        AWKNUM d;
 
+       check_exact_args(nargs, "strtonum", 1);
+
        tmp = fixtype(POP_SCALAR());
        if ((tmp->flags & NUMBER) != 0)
                d = (AWKNUM) tmp->numbr;
@@ -3863,6 +3948,8 @@ do_dcgettext(int nargs)
        char *domain;
        char save1 = '\0', save2 = '\0';
 
+       check_args_min_max(nargs, "dcgettext", 1, 3);
+
        if (nargs == 3) {       /* third argument */
                tmp = POP_STRING();
                lc_cat = localecategory_from_argument(tmp);
@@ -3924,6 +4011,8 @@ do_dcngettext(int nargs)
        char save = '\0', save1 = '\0', save2 = '\0';
        bool saved_end = false;
 
+       check_args_min_max(nargs, "dcngettext", 3, 5);
+
        if (nargs == 5) {       /* fifth argument */
                tmp = POP_STRING();
                lc_cat = localecategory_from_argument(tmp);
@@ -4005,6 +4094,8 @@ do_bindtextdomain(int nargs)
        const char *directory, *domain;
        const char *the_result;
 
+       check_args_min_max(nargs, "bindtextdomain", 1, 2);
+
        t1 = t2 = NULL;
        /* set defaults */
        directory = NULL;
@@ -4058,6 +4149,8 @@ do_intdiv(int nargs)
        NODE *numerator, *denominator, *result;
        double num, denom, quotient, remainder;
 
+       check_exact_args(nargs, "intdiv", 3);
+
        result = POP_PARAM();
        if (result->type != Node_var_array)
                fatal(_("intdiv: third argument is not an array"));
@@ -4115,6 +4208,8 @@ do_typeof(int nargs)
        bool deref = true;
        NODE *dbg;
 
+       check_args_min_max(nargs, "typeof", 1, 2);
+
        if (nargs == 2) {       /* 2nd optional arg for debugging */
                dbg = POP_PARAM();
                if (dbg->type != Node_var_array)
diff --git a/field.c b/field.c
index 1cbd547..88309a8 100644
--- a/field.c
+++ b/field.c
@@ -979,6 +979,8 @@ do_split(int nargs)
                         Regexp *, Setfunc, NODE *, NODE *, bool);
        Regexp *rp = NULL;
 
+       check_args_min_max(nargs, "split", 3, 4);
+
        if (nargs == 4) {
                static bool warned = false;
 
@@ -1081,6 +1083,8 @@ do_patsplit(int nargs)
        char *s;
        Regexp *rp = NULL;
 
+       check_args_min_max(nargs, "patsplit", 3, 4);
+
        if (nargs == 4) {
                sep_arr = POP_PARAM();
                if (sep_arr->type != Node_var_array)
diff --git a/interpret.h b/interpret.h
index d52d537..86a5c41 100644
--- a/interpret.h
+++ b/interpret.h
@@ -67,6 +67,7 @@ r_interpret(INSTRUCTION *code)
        Regexp *rp;
        NODE *set_array = NULL; /* array with a post-assignment routine */
        NODE *set_idx = NULL;   /* the index of the array element */
+       bool in_indirect_call = false;
 
 
 /* array subscript */
@@ -1059,6 +1060,14 @@ arrayfor:
                                        DEREF(t1);
                        }
                        free_api_string_copies();
+
+                       if (in_indirect_call) {
+                               // pop function name off the stack
+                               NODE *fname = POP();
+                               DEREF(fname);
+                               in_indirect_call = false;
+                       }
+
                        PUSH(r);
                }
                        break;
@@ -1132,6 +1141,7 @@ match_re:
                        NODE *f = NULL;
                        int arg_count;
                        char save;
+                       NODE *function_name;
 
                        arg_count = (pc + 1)->expr_count;
                        t1 = PEEK(arg_count);   /* indirect var */
@@ -1174,6 +1184,12 @@ match_re:
                                        r = the_func(arg_count);
                                str_restore(t1, save);
 
+                               // Normally, setup_frame() handles getting rid 
of the
+                               // function name.  Since we have called the 
builtin directly,
+                               // we have to manually do this here.
+                               function_name = POP();
+                               DEREF(function_name);
+
                                PUSH(r);
                                break;
                        } else if (f->type != Node_func) {
@@ -1195,6 +1211,7 @@ match_re:
                                        npc[1] = pc[1];
                                        npc[1].func_name = fname;       /* name 
of the builtin */
                                        npc[1].c_function = bc->c_function;
+                                       in_indirect_call = true;
                                        ni = npc;
                                        JUMPTO(ni);
                                } else
diff --git a/mpfr.c b/mpfr.c
index 4010d0c..25b7b46 100644
--- a/mpfr.c
+++ b/mpfr.c
@@ -720,6 +720,8 @@ do_mpfr_atan2(int nargs)
        mpfr_ptr p1, p2;
        int tval;
 
+       check_exact_args(nargs, "atan2", 2);
+
        t2 = POP_SCALAR();
        t1 = POP_SCALAR();
 
@@ -756,6 +758,8 @@ do_mpfr_func(const char *name,
        int tval;
        mpfr_prec_t argprec;
 
+       check_exact_args(nargs, name, 1);
+
        t1 = POP_SCALAR();
        if (do_lint && (fixtype(t1)->flags & NUMBER) == 0)
                lintwarn(_("%s: received non-numeric argument"), name);
@@ -827,6 +831,8 @@ do_mpfr_int(int nargs)
 {
        NODE *tmp, *r;
 
+       check_exact_args(nargs, "int", 1);
+
        tmp = POP_SCALAR();
        if (do_lint && (fixtype(tmp)->flags & NUMBER) == 0)
                lintwarn(_("int: received non-numeric argument"));
@@ -857,6 +863,8 @@ do_mpfr_compl(int nargs)
        NODE *tmp, *r;
        mpz_ptr zptr;
 
+       check_exact_args(nargs, "compl", 1);
+
        tmp = POP_SCALAR();
        if (do_lint && (fixtype(tmp)->flags & NUMBER) == 0)
                lintwarn(_("compl: received non-numeric argument"));
@@ -976,6 +984,8 @@ do_mpfr_lshift(int nargs)
        unsigned long shift;
        mpz_ptr pz1, pz2;
 
+       check_exact_args(nargs, "lshift", 2);
+
        t2 = POP_SCALAR();
        t1 = POP_SCALAR();
 
@@ -1008,6 +1018,8 @@ do_mpfr_rshift(int nargs)
        unsigned long shift;
        mpz_ptr pz1, pz2;
 
+       check_exact_args(nargs, "rshift", 2);
+
        t2 = POP_SCALAR();
        t1 = POP_SCALAR();
 
@@ -1128,6 +1140,8 @@ do_mpfr_strtonum(int nargs)
 {
        NODE *tmp, *r;
 
+       check_exact_args(nargs, "strtonum", 1);
+
        tmp = fixtype(POP_SCALAR());
        if ((tmp->flags & NUMBER) == 0) {
                r = mpg_integer();      /* will be changed to MPFR float if 
necessary in force_mpnum() */
@@ -1165,6 +1179,8 @@ do_mpfr_rand(int nargs ATTRIBUTE_UNUSED)
        NODE *res;
        int tval;
 
+       check_exact_args(nargs, "rand", 0);
+
        if (firstrand) {
 #if 0
                /* Choose the default algorithm */
@@ -1215,6 +1231,8 @@ do_mpfr_srand(int nargs)
                firstrand = false;
        }
 
+       check_args_min_max(nargs, "srand", 0, 1);
+
        res = mpg_integer();
        mpz_set(res->mpg_i, seed);      /* previous seed */
 
@@ -1256,6 +1274,8 @@ do_mpfr_intdiv(int nargs)
        NODE *quotient, *remainder;
        NODE *sub, **lhs;
 
+       check_exact_args(nargs, "intdiv", 3);
+
        result = POP_PARAM();
        if (result->type != Node_var_array)
                fatal(_("intdiv: third argument is not an array"));



reply via email to

[Prev in Thread] Current Thread [Next in Thread]