Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Function argument fixes #198

Open
wants to merge 4 commits into
base: staging
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions parse.c
Original file line number Diff line number Diff line change
Expand Up @@ -191,9 +191,7 @@ Node *makearr(Node *p)
if (isfcn(cp))
SYNTAX( "%s is a function, not an array", cp->nval );
else if (!isarr(cp)) {
xfree(cp->sval);
cp->sval = (char *) makesymtab(NSYMTAB);
cp->tval = ARR;
toarray(cp);
}
}
return p;
Expand Down
3 changes: 3 additions & 0 deletions proto.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,9 @@ extern Node *itonp(int);
extern void syminit(void);
extern void arginit(int, char **);
extern void envinit(char **);
extern void toarray(Cell *);
extern Array *makesymtab(int);
extern void clearsymtab(Cell *);
extern void freesymtab(Cell *);
extern void freeelem(Cell *, const char *);
extern Cell *setsymtab(const char *, const char *, double, unsigned int, Array *);
Expand Down Expand Up @@ -155,6 +157,7 @@ extern Cell *execute(Node *);
extern Cell *program(Node **, int);
extern Cell *call(Node **, int);
extern Cell *copycell(Cell *);
extern void updateargs(Cell *);
extern Cell *arg(Node **, int);
extern Cell *jump(Node **, int);
extern Cell *awkgetline(Node **, int);
Expand Down
201 changes: 142 additions & 59 deletions run.c
Original file line number Diff line number Diff line change
Expand Up @@ -228,11 +228,9 @@ struct Frame *frp = NULL; /* frame pointer. bottom level unused */

Cell *call(Node **a, int n) /* function call. very kludgy and fragile */
{
static const Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL };
int i, ncall, ndef;
int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */
Node *x;
Cell *args[NARGS], *oargs[NARGS]; /* BUG: fixed size arrays */
Cell *args[NARGS]; /* BUG: fixed size arrays */
Cell *y, *z, *fcn;
char *s;

Expand Down Expand Up @@ -262,17 +260,12 @@ Cell *call(Node **a, int n) /* function call. very kludgy and fragile */
if (isfcn(y))
FATAL("can't use function %s as argument in %s", y->nval, s);
if (i < ndef) {
oargs[i] = y;
if (isarr(y))
args[i] = y; /* arrays by ref */
else
args[i] = copycell(y);
args[i] = copycell(y);
}
tempfree(y);
}
for ( ; i < ndef; i++) { /* add null args for ones not provided */
args[i] = gettemp();
*args[i] = newcopycell;
args[i] = copycell(NULL);
}
frp++; /* now ok to up frame */
if (frp >= frame + nframe) {
Expand All @@ -290,38 +283,41 @@ Cell *call(Node **a, int n) /* function call. very kludgy and fragile */
DPRINTF("start exec of %s, frp=%d\n", s, (int) (frp-frame));
y = execute((Node *)(fcn->sval)); /* execute body */
DPRINTF("finished exec of %s, frp=%d\n", s, (int) (frp-frame));
tempfree(fcn);

/*
* If y is ...
*
* ret -> jump() has already set the return value.
* exit -> Set return value.
* next -> Set return value.
* copy -> To be visited while freeing args.
* temp -> Useless. Return it to the templist.
*
* Ignore everything else.
*/

if (isexit(y) || isnext(y)) {
tempfree(frp->retval);
frp->retval = y;
} else {
tempfree(y);
}

/* free args */
for (i = 0; i < ndef; i++) {
Cell *t = frp->args[i];
if (isarr(t)) {
if (t->csub == CCOPY) {
if (i >= ncall) {
freesymtab(t);
t->csub = CTEMP;
tempfree(t);
} else {
oargs[i]->tval = t->tval;
oargs[i]->tval &= ~(STR|NUM|DONTFREE);
oargs[i]->sval = t->sval;
tempfree(t);
}
}
} else if (t != y) { /* kludge to prevent freeing twice */
t->csub = CTEMP;
tempfree(t);
} else if (t == y && t->csub == CCOPY) {
t->csub = CTEMP;
tempfree(t);
freed = 1;
y = frp->args[i];
if (isarr(y)) {
if (y->cnext == NULL)
freesymtab(y);
else if (y->cnext->sval != y->sval)
FATAL("can't happen: symbol table moved %s", y->cnext->nval);
}
y->csub = CTEMP;
tempfree(y);
}
tempfree(fcn);
if (isexit(y) || isnext(y))
return y;
if (freed == 0) {
tempfree(y); /* don't free twice! */
}
z = frp->retval; /* return value */

z = frp->retval; /* return value */
DPRINTF("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval);
frp--;
return(z);
Expand All @@ -331,21 +327,117 @@ Cell *copycell(Cell *x) /* make a copy of a cell in a temp */
{
Cell *y;

/* copy is not constant or field */

y = gettemp();
y->csub = CCOPY;

/* default to the uninitialized value */
if (x == NULL)
return y;

/* copy is not constant or field */
y->tval = x->tval & ~(CON|FLD|REC);
y->csub = CCOPY; /* prevents freeing until call is over */
y->nval = x->nval; /* BUG? */
if (isstr(x) /* || x->ctype == OCELL */) {

/*
* What should we name this argument? The parser does not preserve
* the names of parameters, so we don't know what the source calls
* it. Is it worth the effort to change that?
*
* Note: Simply pointing to the incoming cell's name (as had been
* done for a long time) creates a dangling pointer if that cell
* is deleted (think array element).
*
* For now, args are anonymous. Debugging can use frame/arg indices.
*/
y->nval = NULL;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a bit of a concern since nval is printed by the debug and warning code. On Linux and BSD this is safe, since printf(3) will print "(null)" but on other systems it will cause a NULL dereference. So either we need to wrap nval with the NN macro everywhere it is printed or set it to some non-NULL value. Setting to x->nval(or a copy of it) is not really correct but perhaps is better than NULL.

Copy link
Contributor Author

@mpinjr mpinjr Sep 29, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The debugging statements need to be able to cope with anonymous cells. The temp cells used for expressions (including function return values) are anonymous. The jump cells are as well.

A quick grep found 13 instances of nval in DPRINTF statements, 10 wrapped, 3 unwrapped. I will wrap them all.

Also, parsing a function leaks the previous function definition's arglist. Compare 'function f(a) {}' with 'function f(a) {} function g() {}'. I will see about preserving the argument names while also eliminating the leak.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is not just DPRINTF calls, also SYNTAX, WARNING and FATAL. Some of those may not be reachable but the ones in funnyvar() are.

Copy link
Collaborator

@plan9 plan9 Sep 30, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there is a lot to digest and review here. miguel, please keep nval NN wrapping as a separate PR. thanks

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@plan9, I will keep the NN wrapping separate. I hope to add a commit to this branch later today (perhaps tomorrow) to provide meaningful names for the args.

if (isstr(x)) {
y->sval = tostring(x->sval);
y->tval &= ~DONTFREE;
} else
} else {
y->sval = x->sval; /* by ref */
y->tval |= DONTFREE;
}
y->fval = x->fval;
y->fmt = x->fmt;

/*
* When a tempcell leaves the templist, cnext can be repurposed.
* Arguments use it to maintain a direct link to their origin.
* If an argument that should have been passed by reference was
* passed by value, updateargs() uses these links to determine
* which cells need updating.
*
* An origin is either a global variable, an array element, or a
* function argument without an origin.
*
* A missing value or a tempcell produces an argument without an
* origin. If passed along, it becomes an origin.
*
* If an argument has an origin, it can't become an origin. When
* passed along, its origin becomes the receiver's origin.
*
* Intermediaries between an argument and its origin all point to
* the same origin.
*/
if (x == NULL || istemp(x)) {
y->cnext = NULL;
} else if (x->csub == CCOPY && x->cnext != NULL) {
y->cnext = x->cnext;
} else {
y->cnext = x;
}
return y;
}

void updateargs(Cell *a) /* convert by-value to by-reference */
{
struct Frame *f;
Cell *o;
int i;

/*
* The argument, a, was an uninitialized function argument that
* has become an array. It was passed by-value but should have
* been passed by-reference. To compensate for the mistake, we
* must update a's origin, o, and every other descendant of o,
* so that they share a symbol table.
*
* We start at the top of the stack and work our way down.
* If we encounter the origin on the stack, the scan concludes
* because all of its descendants are in higher frames that
* have already been visited. Otherwise, we continue until
* we've inspected every argument.
*/

#define UPDATE_CELL(d) \
if (freeable((d))) \
xfree((d)->sval) \
(d)->sval = a->sval; \
(d)->tval = a->tval;

o = a->cnext;
DPRINTF("updateargs triggered by %p for %p\n", (void *) a, (void *) o);
if (o == NULL) /* nothing to update */
return;

UPDATE_CELL(o); /* may not be on the stack */
for (f = frp; f > frame; --f) {
DPRINTF("frame %td\n", f-frame);
for (i = 0; i < f->nargs; i++) {
if (f->args[i] == o)
return;
if (f->args[i]->cnext == o) {
if (f->args[i] == a) {
DPRINTF("\ttrigger arg %d\n", i);
continue;
}
UPDATE_CELL(f->args[i]);
DPRINTF("\tupdating arg %d\n", i);
}
}
}
#undef UPDATE_CELL
}

Cell *arg(Node **a, int n) /* nth argument of a function */
{

Expand Down Expand Up @@ -518,12 +610,7 @@ Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */
x = execute(a[0]); /* Cell* for symbol table */
buf = makearraystring(a[1], __func__);
if (!isarr(x)) {
DPRINTF("making %s into an array\n", NN(x->nval));
if (freeable(x))
xfree(x->sval);
x->tval &= ~(STR|NUM|DONTFREE);
x->tval |= ARR;
x->sval = (char *) makesymtab(NSYMTAB);
toarray(x);
}
z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval);
z->ctype = OCELL;
Expand All @@ -544,10 +631,7 @@ Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts *
if (!isarr(x))
return True;
if (a[1] == NULL) { /* delete the elements, not the table */
freesymtab(x);
x->tval &= ~STR;
x->tval |= ARR;
x->sval = (char *) makesymtab(NSYMTAB);
clearsymtab(x);
} else {
char *buf = makearraystring(a[1], __func__);
freeelem(x, buf);
Expand Down Expand Up @@ -1692,12 +1776,11 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */
}
sep = *fs;
ap = execute(a[1]); /* array name */
/* BUG 7/26/22: this appears not to reset array: see C1/asplit */
freesymtab(ap);
if (isarr(ap))
clearsymtab(ap);
else
toarray(ap);
DPRINTF("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs);
ap->tval &= ~STR;
ap->tval |= ARR;
ap->sval = (char *) makesymtab(NSYMTAB);

n = 0;
if (arg3type == REGEXPR && strlen((char*)((fa*)a[2])->restr) == 0) {
Expand Down
61 changes: 61 additions & 0 deletions testdir/T.func
Original file line number Diff line number Diff line change
Expand Up @@ -194,3 +194,64 @@ $awk '
function foo() { i = 0 }
BEGIN { x = foo(); printf "<%s> %d\n", x, x }' >foo2
diff foo1 foo2 || echo 'BAD: T.func (fall off end)'

$awk '
function f(x, y) {
x = "x"
y[0] = "y"
print x, y[0]
}
function g(z) {
print z[0]
}
BEGIN {
f(a, a)
g(a)
}' 2>foo1 >/dev/null
ret=$?
if [ $ret -eq 0 ] || ! grep -q 'can.t read value of .* an array' foo1; then
echo "BAD: T.func (simultaneously scalar and array: $ret)"
fi

$awk '
function f(f1, f2, f3) {
f1[0] = "*"
print "f: " f1[0], f2[0], f3[0]
}
function g(g1, g2, g3) {
f(g1, g1, g1)
print "g: " g1[0], g2[0], g3[0]
}
function h(h1, h2, h3) {
g(h1, h2, h3)
print "h: " h1[0], h2[0], h3[0]
}
function i(i1, i2, i3) {
print "i: " i1[0], i2[0], i3[0]
}
BEGIN {
OFS = "-"
f(a, a, a)
g(b, b, b)
h(c, c, c)
i(a, b, c)
}' >foo2
cat <<! >foo1
f: *-*-*
f: *-*-*
g: *-*-*
f: *-*-*
g: *-*-*
h: *-*-*
i: *-*-*
!
diff foo1 foo2 || echo 'BAD: T.func (cousin coherency)'

$awk '
function f(x) { x[0]="updated"; exit }
function g(y) { print y[0] }
BEGIN { f(a) }
END { g(a) }
' >foo2
echo 'updated' >foo1
diff foo1 foo2 || echo 'BAD: T.func (exit skips arg conversion updates)'
2 changes: 1 addition & 1 deletion testdir/T.misc
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ function bug2(arg) {
bot[arg]=arg;
}
' 2>foo
grep "can.t assign to foo" foo >/dev/null || echo 1>&2 "BAD: T.misc foo bug"
grep "can.t assign to .* array" foo >/dev/null || echo 1>&2 "BAD: T.misc foo bug"


# This should be a syntax error
Expand Down
Loading