From cf2c6d847a14ba9d93c2bcc72a93345c6e077f88 Mon Sep 17 00:00:00 2001 From: Kaz Kylheku Date: Sat, 9 Apr 2022 11:55:51 -0700 Subject: [PATCH] Feature: @local declares local variables. We split the function parameter list into two areas: the parameter list proper, containing parameters that receive arguments, and additional entries that are pure local variables that do not receive arguments. Two new phrase structures are introduced. Feature: @local declares local variables. Two new phrase structures are introduced. The @local statement introduces variables that are local to the function. This may appear only in a function: @local var1, var2 = init2, var3, ... The @local: attribute specifier applied to a single variable, in any expression: for (@local:i = 0; i < N; i++) { ... } @local:i here behaves almost the same like the unadorned variable reference i, the difference being that i is introduced as a local. @local:VAR is allowed outside of a function, and in that situation is the same as VAR. Locals are subject to the same constraints as parameters: they cannot duplicate a parameter, or have the same name as a special variable such as NF and so on. Supporting changes to existing logic: We split the function parameter list into two areas: the parameter list proper, containing parameters that receive arguments, and additional entries that are pure local variables that do not receive arguments. The f->fparms array of a function NODE is now an array of NODE *, and not array of NODE, so that we can dynamically add local variables to it after the initial allocation of the function. Details: * awk.h (NODE): New union member sub.nodep.n. Member sub.nodep.rn is moved into this union becoming sub.nodep.n.rn. New member sub.nodep.n.rpn, of type NODE **. This is now the the implementation of fparms. (frame_cnt): New macro for sub.nodep.reserved. This is used to keep track of a function's frame size (number of local variables). If there are no local variables other than function parameters, then then f->frame_cnt == f->param_cnt. Otherwise f->frame_cnt > f->param_cnt. Note that traditional Awk local variables, which are indistinguishable from parameters, are part of param_cnt. We are talking about the new style local variables here. (fparms): Now sub.nodep.n.rpn. (for_array, xarray): Macros follow the move of sub.nodep.rn to sub.nodep.n.rn. (make_params): Return NODE **, rather than NODE *. (extend_locals): Declared. * awkgram.y (in_function): Global variable changes type from bool to INSTRUCTION *. It still functions as a Boolean, indicating that the parser is processing a function, but it also gives the instruction node. (LEX_LOCAL): New terminal symbol for the param keyword in the @ param notation. (function_prologue): Store the function node $1 into in_function. (statement): Add new production for '@' LEX_LOCAL. (local_var_list_opt, local_var_list): New nonterminal symbols. (simple_variable): New production for @ LEX_LOCAL ':' NAME. This is a copy of the NAME production, with the additional logic that when in_function is true, the symbol is added as a local to the current function via add_local before being processed as a variable reference. So it is as if it had been a parameter all along. (tokentab): Register "local" with the LEX_LOCAL token number, attributing it as a special symbol that is a Gawk extension, much like "include" and other "@" items. (parms_shadow): Check the entire frame, not just the parameters, for shadowing. Update to NODE ** fparms representation. (mk_function): Updated due to rename of remove_params to remove_locals. (install_function): Pass the new third parameter to make_params in order to receive the allocation size of the parameter vector. This is installed into f->param_alloc. The new add_local function makes use of this to diagnose it if there is no room to add parameters. (install_function): Update to NODE ** fparams, and initialize frame_cnt equal to param_cnt. (add_local): New static function for adding a parameter to the function currently being compiled. This has to check perform diagnostics on the local variable, similar to the checks done on a parameter. The extend_locals function in symbol.c is relied upon to do the reallocation to add the parameter and also register it in the symbol table. We strdup parm->lextok parameter name, because that will later be freed during parsing. (check_params): A few parameter checks are moved into the new check_param function. This was because they were shared with an earlier implementation of add_local. The function then got cloned into check_locals in order to have different wording. (check_param): New static function, split off from check_params. (check_locals): New static function, closely based on check_param. * symbol.c (make_params): Returns NODE ** now and allocates array of pointers, using individual getnode calls to allocate the NODE objects, rather than allocating params as a contiguous block of NODE objects. (extend_locals): New function. This reallocates the param array to hold one more parameter, on the assumption that the caller's lcount parameter is one greater than the current size. The parameter is initialized and registered in the symbol table. * debug.c (do_info): Report the locals as if they were parameters, so that they are visible under debugging. Update parameter access to reflect NODE ** representation. (find_param): Find a local variable too. Update to NODE ** fparms. (print_function, print_memory, print_instruction): Adjust to NODE ** fparms. (do_eval): Follow rename of remove_params to remove_locals. Append all the locals to the frame stack. Update to NODE ** fparms. (parse_condition): Follow rename of remove_params to remove_locals. * eval.c (setup_frame): Allocate the full locals frame (f->frame_cnt), but check the arguments against only the param count (f->param_cnt). Update to NODE ** fparms. (restore_frame): Destroy all the locals, not just parameters. * command.y (variable_generator): Update to NODE ** fparms. * profile.c (func_params): Change to NODE ** type. (pprint, pp_func): Update to NODE ** fparms. @ doc/gawk.texi: Documented. --- awk.h | 21 +++-- awkgram.y | 190 +++++++++++++++++++++++++++++++++++++------ command.y | 2 +- debug.c | 26 +++--- doc/gawk.texi | 220 +++++++++++++++++++++++++++++++++++++++++++++++++- eval.c | 17 ++-- profile.c | 8 +- symbol.c | 69 +++++++++++----- 8 files changed, 473 insertions(+), 80 deletions(-) diff --git a/awk.h b/awk.h index 732aec04..115ed41b 100644 --- a/awk.h +++ b/awk.h @@ -362,7 +362,10 @@ typedef struct exp_node { } x; char *name; size_t reserved; - struct exp_node *rn; + union { + struct exp_node *rn; + struct exp_node **rpn; + } n; unsigned long cnt; enum reflagvals { CONSTANT = 1, @@ -482,10 +485,13 @@ typedef struct exp_node { #define dup_ent sub.nodep.r.rptr /* Node_param_list, Node_func */ -#define param_cnt sub.nodep.l.ll +#define param_cnt sub.nodep.l.ll /* Number of locals that are params */ + +/* Node_func */ +#define frame_cnt sub.nodep.reserved /* No locals allocated at run-time */ /* Node_func */ -#define fparms sub.nodep.rn +#define fparms sub.nodep.n.rpn #define code_ptr sub.nodep.r.iptr /* Node_regex, Node_dynregex */ @@ -533,7 +539,7 @@ typedef struct exp_node { #define for_list sub.nodep.r.av #define for_list_size sub.nodep.reflags #define cur_idx sub.nodep.l.ll -#define for_array sub.nodep.rn +#define for_array sub.nodep.n.rn /* Node_frame: */ #define stack sub.nodep.r.av @@ -554,7 +560,7 @@ typedef struct exp_node { #define table_size sub.nodep.reflags #define array_size sub.nodep.cnt #define array_capacity sub.nodep.reserved -#define xarray sub.nodep.rn +#define xarray sub.nodep.n.rn #define parent_array sub.nodep.x.extra #define ainit array_funcs->init @@ -1777,9 +1783,10 @@ extern void destroy_symbol(NODE *r); extern void release_symbols(NODE *symlist, int keep_globals); extern void append_symbol(NODE *r); extern NODE *lookup(const char *name); -extern NODE *make_params(char **pnames, int pcount); +extern NODE **make_params(char **pnames, int pcount); +NODE **extend_locals(NODE **parms, const char *pname, int lcount); extern void install_params(NODE *func); -extern void remove_params(NODE *func); +extern void remove_locals(NODE *func); extern void release_all_vars(void); extern int foreach_func(NODE **table, int (*)(INSTRUCTION *, void *), void *); extern INSTRUCTION *bcalloc(OPCODE op, int size, int srcline); diff --git a/awkgram.y b/awkgram.y index 08ee381c..0ddf7125 100644 --- a/awkgram.y +++ b/awkgram.y @@ -44,7 +44,10 @@ static int yylex(void); int yyparse(void); static INSTRUCTION *snode(INSTRUCTION *subn, INSTRUCTION *op); static char **check_params(char *fname, int pcount, INSTRUCTION *list); +static void check_param(const char *fname, const char *name, INSTRUCTION *parm); +static void check_local(const char *fname, const char *name, INSTRUCTION *local); static int install_function(char *fname, INSTRUCTION *fi, INSTRUCTION *plist); +static void add_local(INSTRUCTION *fi, INSTRUCTION *parm); static NODE *mk_rexp(INSTRUCTION *exp); static void param_sanity(INSTRUCTION *arglist); static int parms_shadow(INSTRUCTION *pc, bool *shadow); @@ -120,7 +123,7 @@ static enum { FUNC_BODY, DONT_CHECK } want_param_names = DONT_CHECK; /* ditto */ -static bool in_function; /* parsing kludge */ +static INSTRUCTION *in_function; /* parsing kludge */ static int rule = 0; const char *const ruletab[] = { @@ -205,7 +208,7 @@ extern double fmod(double x, double y); %token LEX_AND LEX_OR INCREMENT DECREMENT %token LEX_BUILTIN LEX_LENGTH %token LEX_EOF -%token LEX_INCLUDE LEX_EVAL LEX_LOAD LEX_NAMESPACE +%token LEX_INCLUDE LEX_EVAL LEX_LOAD LEX_NAMESPACE LEX_LOCAL %token NEWLINE /* Lowest to highest */ @@ -533,7 +536,7 @@ function_prologue $1->comment = func_comment; if (install_function($2->lextok, $1, $5) < 0) YYABORT; - in_function = true; + in_function = $1; $2->lextok = NULL; bcfree($2); /* $5 already free'd in install_function */ @@ -1035,6 +1038,17 @@ statement break_allowed--; continue_allowed--; } + | '@' LEX_LOCAL + { + if (!in_function) + error_ln($1->source_line, + _("`local' may not appear outside a function")); + } + local_var_list_opt statement_term + { + $$ = $4; + yyerrok; + } | non_compound_stmt { if (do_pretty_print) @@ -1535,6 +1549,54 @@ param_list { $$ = $1; } ; +local_var_list_opt + : /* empty */ + { $$ = NULL; } + | local_var_list + { $$ = $1; } + ; + +local_var_list + : NAME + { + add_local(in_function, $1); + $$ = NULL; + } + | local_var_list comma NAME + { + add_local(in_function, $3); + $$ = $1; + } + | NAME ASSIGN exp + { + add_local(in_function, $1); + $1->opcode = Op_push; + $1->memory = variable($1->source_line, $1->lextok, Node_var_new); + $$ = list_append(mk_assignment(list_create($1), $3, $2), + instruction(Op_pop)); + + } + | local_var_list comma NAME ASSIGN exp + { + INSTRUCTION *assn; + add_local(in_function, $3); + $3->opcode = Op_push; + $3->memory = variable($3->source_line, $3->lextok, Node_var_new); + assn = list_append(mk_assignment(list_create($3), $5, $4), + instruction(Op_pop)); + if ($1 == NULL) + $$ = assn; + else + $$ = list_merge($1, assn); + } + | error + { $$ = NULL; } + | local_var_list error + { $$ = $1; } + | local_var_list comma error + { $$ = $1; } + ; + /* optional expression, as in for loop */ opt_exp : /* empty */ @@ -2167,6 +2229,14 @@ simple_variable $1->opcode = Op_push_array; $$ = list_prepend($2, $1); } + | '@' LEX_LOCAL ':' NAME + { + if (in_function) + add_local(in_function, $4); + $4->opcode = Op_push; + $4->memory = variable($4->source_line, $4->lextok, Node_var_new); + $$ = list_create($4); + } ; variable @@ -2326,6 +2396,7 @@ static const struct token tokentab[] = { {"isarray", Op_builtin, LEX_BUILTIN, GAWKX|A(1), do_isarray, 0}, {"length", Op_builtin, LEX_LENGTH, A(0)|A(1), do_length, 0}, {"load", Op_symbol, LEX_LOAD, GAWKX, 0, 0}, +{"local", Op_symbol, LEX_LOCAL, GAWKX, 0, 0}, {"log", Op_builtin, LEX_BUILTIN, A(1), do_log, MPF(log)}, {"lshift", Op_builtin, LEX_BUILTIN, GAWKX|A(2), do_lshift, MPF(lshift)}, {"match", Op_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_match, 0}, @@ -4869,9 +4940,9 @@ snode(INSTRUCTION *subn, INSTRUCTION *r) static int parms_shadow(INSTRUCTION *pc, bool *shadow) { - int pcount, i; + int pcount, lcount, i; bool ret = false; - NODE *func, *fp; + NODE *func, **fp; char *fname; func = pc->func_body; @@ -4884,8 +4955,9 @@ parms_shadow(INSTRUCTION *pc, bool *shadow) #endif pcount = func->param_cnt; + lcount = func->frame_cnt; - if (pcount == 0) /* no args, no problem */ + if (lcount == 0) /* no locals, no problem */ return 0; source = pc->source_file; @@ -4894,11 +4966,12 @@ parms_shadow(INSTRUCTION *pc, bool *shadow) * Use warning() and not lintwarn() so that can warn * about all shadowed parameters. */ - for (i = 0; i < pcount; i++) { - if (lookup(fp[i].param) != NULL) { - warning( - _("function `%s': parameter `%s' shadows global variable"), - fname, fp[i].param); + for (i = 0; i < lcount; i++) { + if (lookup(fp[i]->param) != NULL) { + warning((i < pcount) + ? _("function `%s': parameter `%s' shadows global variable") + : _("function `%s': local `%s' shadows global variable"), + fname, fp[i]->param); ret = true; } } @@ -5046,8 +5119,8 @@ mk_function(INSTRUCTION *fi, INSTRUCTION *def) /* update lint table info */ func_use(thisfunc->vname, FUNC_DEFINE); - /* remove params from symbol table */ - remove_params(thisfunc); + /* remove params/locals from symbol table */ + remove_locals(thisfunc); return fi; } @@ -5078,7 +5151,12 @@ install_function(char *fname, INSTRUCTION *fi, INSTRUCTION *plist) } fi->func_body = f; - f->param_cnt = pcount; + /* + * param_cnt and frame_cnt stay the same if there are no @local + * variables. add_local increments frame_cnt, and frame_cnt + * is what is allocated when a function is invoked. + */ + f->frame_cnt = f->param_cnt = pcount; f->code_ptr = fi; f->fparms = NULL; if (pcount > 0) { @@ -5091,6 +5169,38 @@ install_function(char *fname, INSTRUCTION *fi, INSTRUCTION *plist) return 0; } +static void +add_local(INSTRUCTION *fi, INSTRUCTION *local) +{ + NODE *f = fi->func_body; + NODE **parms = f->fparms; + int pcount = f->param_cnt, lcount = f->frame_cnt, i; + const char *fname = f->vname; + const char *name = estrdup(local->lextok, strlen(local->lextok)); + + /* Basic checks:*/ + check_local(fname, name, local); + + /* Duplicate check: implemented a bit differently from + * check_params because we have to iterate the list + * of already installed parameters in the function object. + */ + for (i = 0; parms != NULL && i < lcount; i++) { + if (strcmp(name, parms[i]->param) == 0) + error_ln(local->source_line, + (i < pcount) + ? _("function `%s': local #%d, `%s', duplicates parameter #%d") + : _("function `%s': local #%d, `%s', duplicates parameter #%d"), + fname, lcount + 1, name, i + 1); + } + + /* Reallocate the function's param vector to accommodate + * the new one, or allocate if null. + */ + lcount++; + f->fparms = extend_locals(parms, name, lcount); + f->frame_cnt = lcount; +} /* check_params --- build a list of function parameter names after * making sure that the names are valid and there are no duplicates. @@ -5113,18 +5223,7 @@ check_params(char *fname, int pcount, INSTRUCTION *list) name = p->lextok; p->lextok = NULL; - if (strcmp(name, fname) == 0) { - /* check for function foo(foo) { ... }. bleah. */ - error_ln(p->source_line, - _("function `%s': cannot use function name as parameter name"), fname); - } else if (is_std_var(name)) { - error_ln(p->source_line, - _("function `%s': cannot use special variable `%s' as a function parameter"), - fname, name); - } else if (strchr(name, ':') != NULL) - error_ln(p->source_line, - _("function `%s': parameter `%s' cannot contain a namespace"), - fname, name); + check_param(fname, name, p); /* check for duplicate parameters */ for (j = 0; j < i; j++) { @@ -5143,6 +5242,45 @@ check_params(char *fname, int pcount, INSTRUCTION *list) return pnames; } +/* check_param --- perform basic checks on one parameter. + */ +static void +check_param(const char *fname, const char *name, INSTRUCTION *parm) +{ + if (strcmp(name, fname) == 0) { + /* check for function foo(foo) { ... }. bleah. */ + error_ln(parm->source_line, + _("function `%s': cannot use function name as parameter name"), fname); + } else if (is_std_var(name)) { + error_ln(parm->source_line, + _("function `%s': cannot use special variable `%s' as a function parameter"), + fname, name); + } else if (strchr(name, ':') != NULL) { + error_ln(parm->source_line, + _("function `%s': parameter `%s' cannot contain a namespace"), + fname, name); + } +} + +/* check_local == like check_param but with wording about locals + */ +static void +check_local(const char *fname, const char *name, INSTRUCTION *local) +{ + if (strcmp(name, fname) == 0) { + /* check for function foo(foo) { ... }. bleah. */ + error_ln(local->source_line, + _("function `%s': cannot use function name as local variable name"), fname); + } else if (is_std_var(name)) { + error_ln(local->source_line, + _("function `%s': cannot use special variable `%s' as a local variable"), + fname, name); + } else if (strchr(name, ':') != NULL) { + error_ln(local->source_line, + _("function `%s': local variable `%s' cannot contain a namespace"), + fname, name); + } +} #ifdef HASHSIZE undef HASHSIZE diff --git a/command.y b/command.y index 18980d38..b43f54bb 100644 --- a/command.y +++ b/command.y @@ -1697,7 +1697,7 @@ variable_generator(const char *text, int state) idx = 0; break; } - name = func->fparms[idx++].param; + name = func->fparms[idx++]->param; if (strncmp(name, text, textlen) == 0) return estrdup(name, strlen(name)); } diff --git a/debug.c b/debug.c index 2849a4c1..093984dd 100644 --- a/debug.c +++ b/debug.c @@ -841,7 +841,7 @@ do_info(CMDARG *arg, int cmd ATTRIBUTE_UNUSED) return false; } - pcount = func->param_cnt; /* # of defined params */ + pcount = func->frame_cnt; /* # of defined params/locals */ pc = (INSTRUCTION *) f->reti; /* Op_func_call instruction */ arg_count = (pc + 1)->expr_count; /* # of arguments supplied */ @@ -861,7 +861,7 @@ do_info(CMDARG *arg, int cmd ATTRIBUTE_UNUSED) r = f->stack[i]; if (r->type == Node_array_ref) r = r->orig_array; - fprintf(out_fp, "%s = ", func->fparms[i].param); + fprintf(out_fp, "%s = ", func->fparms[i]->param); print_symbol(r, true); } if (to < from) @@ -998,7 +998,7 @@ find_frame(long num) return fcall_list[num]; } -/* find_param --- find a function parameter in a given frame number */ +/* find_param --- find a function parameter/local in a given frame number */ static NODE * find_param(const char *name, long num, char **pname) @@ -1018,9 +1018,9 @@ find_param(const char *name, long num, char **pname) int i, pcount; func = f->func_node; - pcount = func->param_cnt; + pcount = func->frame_cnt; for (i = 0; i < pcount; i++) { - fparam = func->fparms[i].param; + fparam = func->fparms[i]->param; if (strcmp(name, fparam) == 0) { r = f->stack[i]; if (r->type == Node_array_ref) @@ -1917,7 +1917,7 @@ print_function(INSTRUCTION *pc, void *x) print_func(fp, "%s(", func->vname); for (i = 0; i < pcount; i++) { - print_func(fp, "%s", func->fparms[i].param); + print_func(fp, "%s", func->fparms[i]->param); if (i < pcount - 1) print_func(fp, ", "); } @@ -3744,7 +3744,7 @@ print_memory(NODE *m, NODE *func, Func_print print_func, FILE *fp) case Node_param_list: assert(func != NULL); - print_func(fp, "%s", func->fparms[m->param_cnt].param); + print_func(fp, "%s", func->fparms[m->param_cnt]->param); break; case Node_var: @@ -3781,7 +3781,7 @@ print_instruction(INSTRUCTION *pc, Func_print print_func, FILE *fp, int in_dump) int j; print_func(fp, "\n\t# Function: %s (", func->vname); for (j = 0; j < pcount; j++) { - print_func(fp, "%s", func->fparms[j].param); + print_func(fp, "%s", func->fparms[j]->param); if (j < pcount - 1) print_func(fp, ", "); } @@ -3990,7 +3990,7 @@ print_instruction(INSTRUCTION *pc, Func_print print_func, FILE *fp, int in_dump) case Op_arrayfor_incr: print_func(fp, "[array_var = %s] [target_jmp = " PTRFMT "]\n", pc->array_var->type == Node_param_list ? - func->fparms[pc->array_var->param_cnt].param : pc->array_var->vname, + func->fparms[pc->array_var->param_cnt]->param : pc->array_var->vname, pc->target_jmp); break; @@ -5614,7 +5614,7 @@ do_eval(CMDARG *arg, int cmd ATTRIBUTE_UNUSED) do_flags &= DO_MPFR; // preserve this flag only ret = parse_program(&code, true); do_flags = save_flags; - remove_params(this_func); + remove_locals(this_func); if (ret != 0) { pop_context(); /* switch to prev context */ free_context(ctxt, false /* keep_globals */); @@ -5650,7 +5650,7 @@ do_eval(CMDARG *arg, int cmd ATTRIBUTE_UNUSED) t->opcode = Op_stop; /* add or append eval locals to the current frame stack */ - ecount = f->param_cnt; /* eval local count */ + ecount = f->frame_cnt; /* eval local count */ pcount = this_func->param_cnt; if (ecount > 0) { @@ -5663,7 +5663,7 @@ do_eval(CMDARG *arg, int cmd ATTRIBUTE_UNUSED) for (i = 0; i < ecount; i++) { NODE *np; - np = f->fparms + i; + np = f->fparms[i]; np->param_cnt += pcount; /* appending eval locals: fixup param_cnt */ getnode(r); @@ -5806,7 +5806,7 @@ parse_condition(int type, int num, char *expr) do_flags = false; ret = parse_program(&code, true); do_flags = save_flags; - remove_params(this_func); + remove_locals(this_func); pop_context(); if (ret != 0 || invalid_symbol) { diff --git a/doc/gawk.texi b/doc/gawk.texi index 68b52536..a279a348 100644 --- a/doc/gawk.texi +++ b/doc/gawk.texi @@ -783,6 +783,7 @@ particular records in a file and perform operations upon them. * Function Calling:: Calling user-defined functions. * Calling A Function:: Don't use spaces. * Variable Scope:: Controlling variable scope. +* Local Variables:: @command{gawk} local variables. * Pass By Value/Reference:: Passing parameters. * Function Caveats:: Other points to know about functions. * Return Statement:: Specifying the value a function @@ -21464,6 +21465,7 @@ the function. @menu * Calling A Function:: Don't use spaces. * Variable Scope:: Controlling variable scope. +* Local Variables:: @command{gawk} local variables. * Pass By Value/Reference:: Passing parameters. * Function Caveats:: Other points to know about functions. @end menu @@ -21502,8 +21504,11 @@ there is no way to make a variable local to a @code{@{} @dots{} @code{@}} block good practice to do so whenever a variable is needed only in that function. -To make a variable local to a function, simply declare the variable as -an argument after the actual function arguments +@command{gawk} has language extensions in this area, described in +@ref{Local Variables}. + +In standard @command{awk}, to make a variable local to a function, simply declare the +variable as an argument after the actual function arguments (@pxref{Definition Syntax}). Look at the following example, where variable @code{i} is a global variable used by both functions @code{foo()} and @@ -21628,6 +21633,217 @@ At level 2, index 1 is not found in a At level 2, index 2 is found in a @end example +@node Local Variables +@subsubsection @command{gawk} Local Variable Extension +@cindex @code{@@local} statement +@cindex @code{@@local} variable annotation +This @value{SECTION} describes a @command{gawk}-specific extension. + +As documented in @ref{Variable Scope}, function-wide local variables +are defined as function parameters in standard @command{awk}. The +language does not distinguish parameters used as local variables +from true parameters that receive arguments. This is only a programmer +convention, which is enforced by discipline and the use of traditional +annotation devices, such as visually separating the parameters intended +for argument passing from the parameters intended to serve as local +variables. + +@command{gawk} provides a language extension in this area, allowing +the programmer to specify conventional function-wide local variables which do +not appear in the parameter list and cannot receive arguments. + +The extension takes the form of two constructs: the @code{@@local} +statement and the @code{@@local} variable annotation. + +The @code{@@local} statement is introduced by the @code{@@} symbol +followed by the special keyword @code{local}. These tokens are +then followed by a comma-separated list of variable declarators. +Declarators consist of variable names, optionally initialized by expressions. +The initializing expressions are indicated by the @code{=} sign: + +@example +function fun(x) +{ + ... + @local a, b = 3, ir2 = 0.707 + ... +} +@end example + +The @code{@@local} attribute is a prefix which can appear on +a variable reference in an expression. At that point in the code, +the variable becomes known as a local variable. + +@example +for (@local:i = 0; i < N; i++) + print i +@end example + +Local variables are subject to all of the naming constraints as +function parameter. A local variable may not have the same name +as a previously defined local variable, or a function parameter. +A special variable such as @code{NF} may not be declared as a local variable. + +The @code{@@local} statement may not appear outside of a function. +The @code{@@local} variable annotation may appear outside of a function, +but then has no effect. + +Local variables are known from their point of declaration. In the following +example, the function's first reference to @code{accum} is a reference +to the global variable. The second reference is local. + +@example +function fun() +{ + accum = 42 + @local accum + print "fun: accum = ", accum + accum = 43 +} + +BEGIN { fun(); print "BEGIN: accum = ", accum } + +for (@local:i = 0; i < N; i++) + print i +@end example + +The output is + +@example +fun: accum = +BEGIN: accum = 42 +@end example + +After the @code{@@local} statement inside the function, @code{accum} no longer +appears to have a defined value, even though @code{accum} was just assigned the +value 42. This is because @code{@@local} has introduced a local variable +unrelated to any global variable; that variable shadows a global of the same +name. + +The @code{print} statement in the @code{BEGIN} block confirms that the +assigning the value 43 to the local @code{accum} had no effect on the global +@code{accum}. + +In the @code{@@local} statement, each local variable comes into existence +immediately at the point of its declarator. However, the initializing +expression is evaluated in a scope in which the variable is not yet +visible. + +@example +function helper() +{ + print "helper: level = ", level +} + +function main() +{ + @local level = level + 1 + print "main: level = ", level + helper() +} + +BEGIN { + level = 0 + main() +} +@end example + +the output is: + +@example +main: level = 1 +helper: level = 0 +@end example + +In this example, the function @code{main} locally shadows the global +variable @code{level}, giving a value which is one greater. + +This local variable is lexically scoped; when @code{main} invokes +@code{helper}, it is evident that @code{helper} is referring to the global +@code{level} variable. + +In the @code{@@local} variable annotation, the scoping is tricky. +If a variable is annotated in an expression, and also referenced +multiple times in that expression, the behavior is not specified, +like in these examples: + +@example +@local:x + x # strange +@end example + +Here, if the expression is processed left-to-right by the compiler, +the left operand will introduce @code{x} as a local variable and +so both sides of the @code{+} operator will access the newly +instantiated, local @code{x}. It is best not to depend on this. + +For the same reasons, the following isn't required to work: +@example +# wrong: contrast with reliable @local x = x + 1 statement +@local:x = x + 1 +@end example + +The @code{@@local} attribute is useful in statements that contain +subexpressions, such as @code{if} and @code{for}, keeping in +mind that the scope of these variables is function-wide in +spite of appearing in statements. + +@example +if (@local:w = get_widget("foo")) { + +} + +for (@local:i = 0; i < N; i++) { + +} +@end example + +In each of these examples, the @code{@@local} annotation appears +in an expression in which it is the only reference to the variable. + +Because local variables are function-wide, the effect of the @code{@@local} +statement or declaration is visible from its point of appearance to the +end of the function: + +@example +function fun(x, y) +{ + if (x > 0) { + switch (y) { + case 42: + @local z = y + 1 + break + } + } + + return z +} + +BEGIN { + z = 73 + print "fun returned", fun(0, 42) + print "fun returned", fun(1, 42) +} +@end example + +Output: + +@example +fun returned +fun returned 43 +@end example + +The output confirms that the @code{return z} statement refers to the +local variable @code{z} regardless of whether the @code{@@local} statement +is executed. This is a matter of compile-time scope, not a matter +of the run-time semantics of @code{@@local}. The scope of @code{z} extends from +the point just after its initializer, to the end of the function, +and so extends over the @code{return} statement. + +The run-time semantics of @code{@@local} is that if it is executed, +@code{z} receives the value of @code{y + 1}. If the statement is not +executed, then that initialization doesn't take place; we observe +the behavior that @code{fun} returns the uninitialized value of @code{z}. + @node Pass By Value/Reference @subsubsection Passing Function Arguments by Value Or by Reference diff --git a/eval.c b/eval.c index c6f8bcb9..537c1b8f 100644 --- a/eval.c +++ b/eval.c @@ -1262,17 +1262,18 @@ static INSTRUCTION * setup_frame(INSTRUCTION *pc) { NODE *r = NULL; - NODE *m, *f, *fp; + NODE *m, *f, **fp; NODE **sp = NULL; - int pcount, arg_count, i, j; + int pcount, lcount, arg_count, i, j; f = pc->func_body; pcount = f->param_cnt; + lcount = f->frame_cnt; fp = f->fparms; arg_count = (pc + 1)->expr_count; - if (pcount > 0) { - ezalloc(sp, NODE **, pcount * sizeof(NODE *), "setup_frame"); + if (lcount > 0) { + ezalloc(sp, NODE **, lcount * sizeof(NODE *), "setup_frame"); } /* check for extra args */ @@ -1287,7 +1288,7 @@ setup_frame(INSTRUCTION *pc) } while (--arg_count > pcount); } - for (i = 0, j = arg_count - 1; i < pcount; i++, j--) { + for (i = 0, j = arg_count - 1; i < lcount; i++, j--) { getnode(r); memset(r, 0, sizeof(NODE)); sp[i] = r; @@ -1295,7 +1296,7 @@ setup_frame(INSTRUCTION *pc) if (i >= arg_count) { /* local variable */ r->type = Node_var_new; - r->vname = fp[i].param; + r->vname = fp[i]->param; continue; } @@ -1347,7 +1348,7 @@ setup_frame(INSTRUCTION *pc) default: cant_happen("unexpected parameter type %s", nodetype2str(m->type)); } - r->vname = fp[i].param; + r->vname = fp[i]->param; } stack_adj(-arg_count); /* adjust stack pointer */ @@ -1390,7 +1391,7 @@ restore_frame(NODE *fp) INSTRUCTION *ri; func = frame_ptr->func_node; - n = func->param_cnt; + n = func->frame_cnt; sp = frame_ptr->stack; for (; n > 0; n--) { diff --git a/profile.c b/profile.c index 15b33721..f7de0f51 100644 --- a/profile.c +++ b/profile.c @@ -59,7 +59,7 @@ static void just_dump(int signum); /* pretty printing related functions and variables */ static NODE *pp_stack = NULL; -static NODE *func_params; /* function parameters */ +static NODE **func_params; /* function parameters */ static FILE *prof_fp; /* where to send the profile */ static long indent_level = 0; @@ -356,7 +356,7 @@ pprint(INSTRUCTION *startp, INSTRUCTION *endp, int flags) m = pc->memory; switch (m->type) { case Node_param_list: - pp_push(pc->opcode, func_params[m->param_cnt].param, DONT_FREE, pc->comment); + pp_push(pc->opcode, func_params[m->param_cnt]->param, DONT_FREE, pc->comment); break; case Node_var: @@ -973,7 +973,7 @@ pprint(INSTRUCTION *startp, INSTRUCTION *endp, int flags) array = t1->pp_str; m = ip1->forloop_cond->array_var; if (m->type == Node_param_list) - item = func_params[m->param_cnt].param; + item = func_params[m->param_cnt]->param; else item = m->vname; indent(ip1->forloop_body->exec_count); @@ -2000,7 +2000,7 @@ pp_func(INSTRUCTION *pc, void *data ATTRIBUTE_UNUSED) pcount = func->param_cnt; func_params = func->fparms; for (j = 0; j < pcount; j++) { - fprintf(prof_fp, "%s", func_params[j].param); + fprintf(prof_fp, "%s", func_params[j]->param); if (j < pcount - 1) fprintf(prof_fp, ", "); } diff --git a/symbol.c b/symbol.c index 78b29bba..0207eec5 100644 --- a/symbol.c +++ b/symbol.c @@ -126,23 +126,54 @@ lookup(const char *name) /* make_params --- allocate function parameters for the symbol table */ -NODE * +NODE ** make_params(char **pnames, int pcount) { - NODE *p, *parms; + NODE **pp, **parms; int i; if (pcount <= 0 || pnames == NULL) return NULL; - ezalloc(parms, NODE *, pcount * sizeof(NODE), "make_params"); + emalloc(parms, NODE **, pcount * sizeof(NODE *), "make_params"); - for (i = 0, p = parms; i < pcount; i++, p++) { + for (i = 0, pp = parms; i < pcount; i++, pp++) { + NODE *p; + getnode(p); + memset(p, 0, sizeof *p); p->type = Node_param_list; p->param = pnames[i]; /* shadows pname and vname */ p->param_cnt = i; + *pp = p; + } + + return parms; +} + +/* extend_locals --- add a parameter to an existing param vector */ + +NODE **extend_locals(NODE **parms, const char *pname, int lcount) +{ + NODE *p; + + if (parms == NULL) { + emalloc(parms, NODE **, lcount * sizeof(NODE *), "extend_locals"); + } else { + erealloc(parms, NODE **, lcount * sizeof(NODE *), "extend_locals"); } + getnode(p); + memset(p, 0, sizeof *p); + + p->type = Node_param_list; + p->param = (char *) pname; + p->param_cnt = lcount - 1; + + parms[lcount - 1] = p; + + /* Unlike make_params, this also installs. */ + install(pname, p, Node_param_list); + return parms; } @@ -152,7 +183,7 @@ void install_params(NODE *func) { int i, pcount; - NODE *parms; + NODE **parms; if (func == NULL) return; @@ -164,7 +195,7 @@ install_params(NODE *func) return; for (i = 0; i < pcount; i++) - (void) install(parms[i].param, parms + i, Node_param_list); + (void) install(parms[i]->param, parms[i], Node_param_list); } @@ -173,9 +204,9 @@ install_params(NODE *func) */ void -remove_params(NODE *func) +remove_locals(NODE *func) { - NODE *parms, *p; + NODE **parms, *p; int i, pcount; if (func == NULL) @@ -183,7 +214,7 @@ remove_params(NODE *func) assert(func->type == Node_func); - if ( (pcount = func->param_cnt) <= 0 + if ( (pcount = func->frame_cnt) <= 0 || (parms = func->fparms) == NULL) return; @@ -191,7 +222,7 @@ remove_params(NODE *func) NODE *tmp; NODE *tmp2; - p = parms + i; + p = parms[i]; assert(p->type == Node_param_list); tmp = make_string(p->vname, strlen(p->vname)); tmp2 = in_array(param_table, tmp); @@ -239,14 +270,14 @@ destroy_symbol(NODE *r) switch (r->type) { case Node_func: - if (r->param_cnt > 0) { + if (r->frame_cnt > 0) { NODE *n; int i; - int pcount = r->param_cnt; + int pcount = r->frame_cnt; /* function parameters of type Node_param_list */ for (i = 0; i < pcount; i++) { - n = r->fparms + i; + n = r->fparms[i]; efree(n->param); } efree(r->fparms); @@ -686,22 +717,22 @@ check_param_names(void) for (i = 0; i < max; i += 2) { f = list[i+1]; - if (f->type == Node_builtin_func || f->param_cnt == 0) + if (f->type == Node_builtin_func || f->frame_cnt == 0) continue; - /* loop over each param in function i */ - for (j = 0; j < f->param_cnt; j++) { + /* loop over each param/local in function i */ + for (j = 0; j < f->frame_cnt; j++) { /* compare to function names */ /* use a fake node to avoid malloc/free of make_string */ - n.stptr = f->fparms[j].param; - n.stlen = strlen(f->fparms[j].param); + n.stptr = f->fparms[j]->param; + n.stlen = strlen(f->fparms[j]->param); if (in_array(func_table, & n)) { error( _("function `%s': cannot use function `%s' as a parameter name"), list[i]->stptr, - f->fparms[j].param); + f->fparms[j]->param); result = false; } } -- 2.17.1