[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[gawk-diffs] [SCM] gawk branch, feature/fixtype, updated. gawk-4.1.0-189
From: |
Andrew J. Schorr |
Subject: |
[gawk-diffs] [SCM] gawk branch, feature/fixtype, updated. gawk-4.1.0-1894-g5de217e |
Date: |
Wed, 6 Jul 2016 21:44:06 +0000 (UTC) |
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "gawk".
The branch, feature/fixtype has been updated
via 5de217ef7d347b8afa6f6f1fbd20cb84a20ae187 (commit)
from f4dfa789e973f98aab5e091492dbcf542747c3b4 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
http://git.sv.gnu.org/cgit/gawk.git/commit/?id=5de217ef7d347b8afa6f6f1fbd20cb84a20ae187
commit 5de217ef7d347b8afa6f6f1fbd20cb84a20ae187
Author: Andrew J. Schorr <address@hidden>
Date: Wed Jul 6 17:43:18 2016 -0400
Terminate all $n field values with a '\0' character.
diff --git a/ChangeLog b/ChangeLog
index ab1bcaf..2699ef0 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,27 @@
2016-07-06 Andrew J. Schorr <address@hidden>
+ Make sure that all field values, and therefore all strings inside gawk,
+ are terminated with a '\0' character!
+ * field.c (databuf): New static struct to hold info about our buffer to
+ contain the field string values.
+ (allocate_databuf): New function to make sure the databuf is large
+ enough to hold $0 and copies of $1 through $NF.
+ (set_field): Copy $n into free space previously allocated in databuf
+ and add a '\0' at the end.
+ (rebuild_record): Call allocate_databuf to ensure sufficient space
+ for copying non-malloced field values. When copying field values,
+ use databuf to create a NUL-terminated copy.
+ (purge_record): New function extracted from reset_record to initialize
+ $1 through $NF to null values.
+ (set_record): Buffer management moved to new allocate_databuf function.
+ Call purge_record instead of reset_record, since reset_record contains
+ some extra logic not needed in this case.
+ (reset_record): Call purge_record to do most of the work, and call
+ allocate_databuf to make sure we have a big enough buffer to contain
+ copies of the $1 through $NF.
+
+2016-07-06 Andrew J. Schorr <address@hidden>
+
* awk.h: Renumber flags to remove gap created when FIELD was removed.
2016-07-05 Andrew J. Schorr <address@hidden>
diff --git a/field.c b/field.c
index 39b4f96..3307ace 100644
--- a/field.c
+++ b/field.c
@@ -55,6 +55,8 @@ static long fpat_parse_field(long, char **, int, NODE *,
static void set_element(long num, char * str, long len, NODE *arr);
static void grow_fields_arr(long num);
static void set_field(long num, char *str, long len, NODE *dummy);
+static void purge_record(void);
+static void allocate_databuf(size_t, bool);
static char *parse_extent; /* marks where to restart parse of record */
static long parse_high_water = 0; /* field number that we have parsed so far */
@@ -115,6 +117,15 @@ grow_fields_arr(long num)
nf_high_water = num;
}
+static struct {
+ char *p; /* buffer for $0 and field copies */
+ size_t size; /* buffer size */
+ char *space; /*
+ * Pointer to free space in databuf.p for making
+ * NUL-terminated copies of $1 thru $NF
+ */
+} databuf;
+
/* set_field --- set the value of a particular field */
/*ARGSUSED*/
@@ -129,7 +140,18 @@ set_field(long num,
if (num > nf_high_water)
grow_fields_arr(num);
n = fields_arr[num];
- n->stptr = str;
+ /*
+ * Make a NUL-terminated copy. It is tempting to do this only if
+ * str[len] != '\0', but the parse methods cannot be relied upon to
+ * avoid altering the contents of the record during parsing. For
+ * example, def_parse_field changes the final NUL to a space. In
+ * principle, the method could change other characters, so it does
+ * not seem safe to rely upon the value of str[len].
+ */
+ memcpy(databuf.space, str, len);
+ databuf.space[len] = '\0';
+ n->stptr = databuf.space;
+ databuf.space += len+1;
n->stlen = len;
n->flags = (STRCUR|STRING|MAYBE_NUM); /* do not set MALLOC */
}
@@ -185,6 +207,8 @@ rebuild_record()
}
}
tmp = make_str_node(ops, tlen, ALREADY_MALLOCED);
+ allocate_databuf(tlen, false);
+ databuf.space = databuf.p;
/*
* Since we are about to unref fields_arr[0], we want to find
@@ -195,25 +219,26 @@ rebuild_record()
for (cops = ops, i = 1; i <= NF; i++) {
NODE *r = fields_arr[i];
/*
- * I see no reason to copy malloc'ed fields to point into
- * the new $0 buffer, but that's how previous versions did it.
- * It seems faster to leave the malloc'ed fields in place.
+ * There is no reason to copy malloc'ed fields to point into
+ * the new $0 buffer, although that's how previous versions did
+ * it. It seems faster to leave the malloc'ed fields in place.
*/
- if (r->stlen > 0 && (r->flags & MALLOC) == 0) {
+ if ((r->flags & MALLOC) == 0) {
NODE *n;
getnode(n);
*n = *r;
if (r->valref > 1) {
/*
- * I don't think this should happen, since it
+ * This probably never happens, since it
* was not considered by previous versions of
- * this function. But it seems clear to me that
+ * this function. But it seems clear that
* we can't leave r's stptr pointing into the
* old $0 buffer that we are about to unref.
- * It's not obvious to me that valref must be
+ * It's not a priori obvious that valref must be
* 1 in all cases, so it seems wise to suppport
- * this corner case.
+ * this corner case. The only question is
+ * whether to add a warning message.
*/
emalloc(r->stptr, char *, r->stlen + 1,
"rebuild_record");
memcpy(r->stptr, cops, r->stlen);
@@ -221,7 +246,16 @@ rebuild_record()
r->flags |= MALLOC;
}
- n->stptr = cops;
+ if (cops[n->stlen] == '\0')
+ /* should be the case for $NF */
+ n->stptr = cops;
+ else {
+ /* make a NUL-terminated copy */
+ memcpy(databuf.space, cops, n->stlen);
+ databuf.space[n->stlen] = '\0';
+ n->stptr = databuf.space;
+ databuf.space += n->stlen+1;
+ }
unref(r);
fields_arr[i] = n;
assert((n->flags & WSTRCUR) == 0);
@@ -229,12 +263,67 @@ rebuild_record()
cops += fields_arr[i]->stlen + OFSlen;
}
+#ifndef NDEBUG
+ if ((fields_arr[0]->flags & MALLOC) == 0)
+ assert(fields_arr[0]->valref == 1);
+#endif
unref(fields_arr[0]);
fields_arr[0] = tmp;
field0_valid = true;
}
+static void
+allocate_databuf(size_t reclen, bool need_zero)
+{
+ size_t needed;
+#define INITIAL_SIZE 512
+#define MAX_SIZE ((size_t) ~0) /* maximally portable ... */
+
+ /* buffer management: */
+ if (databuf.size == 0) { /* first time */
+ emalloc(databuf.p, char *, INITIAL_SIZE, "set_record");
+ databuf.size = INITIAL_SIZE;
+
+ }
+ /*
+ * Make sure there's enough room. We need space for $0 plus a NUL
+ * terminator plus room for NUL-terminated copies of $1 through $NF.
+ * We use reclen as an upper bound for NF, assuming at least 1 byte
+ * for a field and its field separator (or fixed-width column). So our
+ * total requirement is reclen + 1 + 2*reclen -> 3*reclen + 1.
+ * It is tempting to skip the copy if the field value is already
+ * terminated with a NUL; this should normally be the case for $NF.
+ * Unfortunately, the parse methods often alter the string while
+ * parsing, typically changing the final NUL to a sentinel. So when
+ * set_field is called, the value of the character after the string
+ * in question may not be the actual value once parsing is complete.
+ * To be safe, it is prudent to copy all of the fields.
+ */
+ needed = 2*reclen; /* for copying $1..$NF */
+ if (need_zero)
+ needed += reclen + 1; /* for $0 plus '\0' */
+#ifdef GAWKDEBUG
+ /* malloc precise size so we can check for overruns with valgrind */
+ if (needed == 0)
+ needed = 1; /* erealloc requires non-zero bytes */
+ databuf.size = needed;
+ erealloc(databuf.p, char *, databuf.size, "set_record");
+#else
+ if (needed > databuf.size) {
+ do {
+ if (databuf.size > MAX_SIZE/2)
+ fatal(_("input record too large"));
+ databuf.size *= 2;
+ } while (needed > databuf.size);
+ erealloc(databuf.p, char *, databuf.size, "set_record");
+ }
+#endif
+
+#undef INITIAL_SIZE
+#undef MAX_SIZE
+}
+
/*
* set_record:
* setup $0, but defer parsing rest of line until reference is made to $(>0)
@@ -249,53 +338,34 @@ void
set_record(const char *buf, int cnt)
{
NODE *n;
- static char *databuf;
- static unsigned long databuf_size;
-#define INITIAL_SIZE 512
-#define MAX_SIZE ((unsigned long) ~0) /* maximally portable ... */
-
- reset_record();
- /* buffer management: */
- if (databuf_size == 0) { /* first time */
- emalloc(databuf, char *, INITIAL_SIZE, "set_record");
- databuf_size = INITIAL_SIZE;
- memset(databuf, '\0', INITIAL_SIZE);
+ purge_record();
+ allocate_databuf(cnt, true);
- }
- /*
- * Make sure there's enough room. Since we sometimes need
- * to place a sentinel at the end, we make sure
- * databuf_size is > cnt after allocation.
- */
- if (cnt >= databuf_size) {
- while (cnt >= databuf_size && databuf_size <= MAX_SIZE)
- databuf_size *= 2;
- erealloc(databuf, char *, databuf_size, "set_record");
- memset(databuf, '\0', databuf_size);
- }
/* copy the data */
- memcpy(databuf, buf, cnt);
+ memcpy(databuf.p, buf, cnt);
/*
* Add terminating '\0' so that C library routines
* will know when to stop.
*/
- databuf[cnt] = '\0';
+ databuf.p[cnt] = '\0';
+ databuf.space = databuf.p + cnt + 1;
/* manage field 0: */
+#ifndef NDEBUG
+ if ((fields_arr[0]->flags & MALLOC) == 0)
+ assert(fields_arr[0]->valref == 1);
+#endif
unref(fields_arr[0]);
getnode(n);
- n->stptr = databuf;
+ n->stptr = databuf.p;
n->stlen = cnt;
n->valref = 1;
n->type = Node_val;
n->stfmt = STFMT_UNUSED;
n->flags = (STRING|STRCUR|MAYBE_NUM); /* do not set MALLOC */
fields_arr[0] = n;
-
-#undef INITIAL_SIZE
-#undef MAX_SIZE
}
/* reset_record --- start over again with current $0 */
@@ -303,13 +373,32 @@ set_record(const char *buf, int cnt)
void
reset_record()
{
+ fields_arr[0] = force_string(fields_arr[0]);
+ purge_record();
+ if ((fields_arr[0]->flags & MALLOC) != 0) {
+ allocate_databuf(fields_arr[0]->stlen, false);
+ databuf.space = databuf.p;
+ }
+ else {
+ allocate_databuf(fields_arr[0]->stlen, true);
+ /* may have been realloced, so set stptr */
+ fields_arr[0]->stptr = databuf.p;
+ databuf.space = databuf.p + fields_arr[0]->stlen + 1;
+ }
+}
+
+static void
+purge_record()
+{
int i;
NODE *n;
- fields_arr[0] = force_string(fields_arr[0]);
-
NF = -1;
for (i = 1; i <= parse_high_water; i++) {
+#ifndef NDEBUG
+ if ((fields_arr[i]->flags & MALLOC) == 0)
+ assert(fields_arr[i]->valref == 1);
+#endif
unref(fields_arr[i]);
getnode(n);
*n = *Null_field;
-----------------------------------------------------------------------
Summary of changes:
ChangeLog | 22 ++++++++
field.c | 171 ++++++++++++++++++++++++++++++++++++++++++++++---------------
2 files changed, 152 insertions(+), 41 deletions(-)
hooks/post-receive
--
gawk
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [gawk-diffs] [SCM] gawk branch, feature/fixtype, updated. gawk-4.1.0-1894-g5de217e,
Andrew J. Schorr <=