>From 3124c4515ac2dd59a44605b63eeec818d4f9b70b Mon Sep 17 00:00:00 2001 From: Assaf Gordon Date: Tue, 31 Jul 2018 01:42:46 -0600 Subject: [PATCH 1/7] sed: do not close stderr on exit Not needed, and prevents leak-sanitizing from working. * sed/utils.c (ck_fclose): Do not close stderr. --- sed/utils.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/sed/utils.c b/sed/utils.c index 5314ea4..329dec0 100644 --- a/sed/utils.c +++ b/sed/utils.c @@ -267,10 +267,7 @@ ck_fclose (FILE *stream) last output operations might fail and it is important to signal this as an error (perhaps to make). */ if (!stream) - { - do_ck_fclose (stdout); - do_ck_fclose (stderr); - } + do_ck_fclose (stdout); } /* Close a single file. */ -- 2.11.0 >From 83b0ed57e76ce06df39df1ce2729f898f1de8391 Mon Sep 17 00:00:00 2001 From: Assaf Gordon Date: Tue, 31 Jul 2018 02:13:12 -0600 Subject: [PATCH 2/7] sed: free allocated memory under lint, remove DEBUG_LEAKS Under normal operation there is no need for explicit freeing, as all memory will be released when sed terminates. During development (and specifically, valgrind and address-sanitizing) enabling lint prevents false-positive warnings about memory leaks. Lint mode can be enabled with CFLAGS="-Dlint". It is also automatically enabled by default when building from git (as opposed to tarball). For consistenchy all instances of "#ifdef DEBUG_LEAKS" are replaced with "#ifdef lint". * sed/sed.h (struct subst): Add member variable to keep the address of allocated buffer in compile.c:setup_replacement(). (release_regex): Add declaration. (finish_program): Function now takes an argument: the sed program vector. * sed/sed.c (main): Adjust call to finish_program. * sed/compile.c (finish_program): Release program allocations. (setup_replacement): Remember the allocated buffer address. (compile_program): Free temporary array in 'y' command. * sed/execute.c (execute_program): Free allocated but unused buffer in 'R' command when reaching EOF. * sed/regexp.c (release_regex): Remove 'static', free the allocated dfa struct. * sed/utils.c (panic): Free open files linked-list elements. --- sed/compile.c | 32 +++++++++++++++++++++++++++++--- sed/execute.c | 11 +++++++++-- sed/regexp.c | 10 ++++++++-- sed/sed.c | 2 +- sed/sed.h | 7 +++++-- sed/utils.c | 7 +++++++ 6 files changed, 59 insertions(+), 10 deletions(-) diff --git a/sed/compile.c b/sed/compile.c index 2637ba6..5e2032d 100644 --- a/sed/compile.c +++ b/sed/compile.c @@ -757,6 +757,8 @@ setup_replacement (struct subst *sub, const char *text, size_t length) base = MEMDUP (text, length, char); length = normalize_text (base, length, TEXT_REPLACEMENT); + IF_LINT (sub->replacement_buffer = base); + text_end = base + length; tail = &root; @@ -1330,6 +1332,8 @@ compile_program (struct vector *vector) trans_pairs[2 * i] = NULL; if (idx != dest_len) bad_prog (_(Y_CMD_LEN)); + + IF_LINT (free (src_lens)); } else { @@ -1678,7 +1682,7 @@ rewind_read_files (void) /* Release all resources which were allocated in this module. */ void -finish_program (void) +finish_program (struct vector *program) { /* close all files... */ { @@ -1708,7 +1712,29 @@ finish_program (void) file_read = file_write = NULL; } -#ifdef DEBUG_LEAKS +#ifdef lint + for (int i = 0; i < program->v_length; ++i) + { + const struct sed_cmd *sc = &program->v[i]; + + if (sc->a1 && sc->a1->addr_regex) + release_regex (sc->a1->addr_regex); + if (sc->a2 && sc->a2->addr_regex) + release_regex (sc->a2->addr_regex); + + switch (sc->cmd) + { + case 's': + free (sc->x.cmd_subst->replacement_buffer); + if (sc->x.cmd_subst->regx) + release_regex (sc->x.cmd_subst->regx); + break; + } + } + obstack_free (&obs, NULL); -#endif /*DEBUG_LEAKS*/ +#else + (void)program; +#endif /* lint */ + } diff --git a/sed/execute.c b/sed/execute.c index fe37ad3..58be321 100644 --- a/sed/execute.c +++ b/sed/execute.c @@ -1502,6 +1502,13 @@ execute_program (struct vector *vec, struct input *input) aq->text = text; aq->textlen = result; } + else + { + /* The external input file (for R command) reached EOF, + the 'text' buffer will not be added to the append queue + so release it */ + free (text); + } } break; @@ -1683,7 +1690,7 @@ process_files (struct vector *the_program, char **argv) } closedown (&input); -#ifdef DEBUG_LEAKS +#ifdef lint /* We're about to exit, so these free()s are redundant. But if we're running under a memory-leak detecting implementation of malloc(), we want to explicitly @@ -1694,7 +1701,7 @@ process_files (struct vector *the_program, char **argv) free (hold.text); free (line.text); free (s_accum.text); -#endif /*DEBUG_LEAKS*/ +#endif /* lint */ if (input.bad_count) status = EXIT_BAD_INPUT; diff --git a/sed/regexp.c b/sed/regexp.c index 8eba866..2801d6f 100644 --- a/sed/regexp.c +++ b/sed/regexp.c @@ -425,11 +425,17 @@ match_regex (struct regex *regex, char *buf, size_t buflen, } -#ifdef DEBUG_LEAKS +#ifdef lint void release_regex (struct regex *regex) { + if (regex->dfa) + { + dfafree (regex->dfa); + free (regex->dfa); + regex->dfa = NULL; + } regfree (®ex->pattern); free (regex); } -#endif /*DEBUG_LEAKS*/ +#endif /* lint */ diff --git a/sed/sed.c b/sed/sed.c index be1aaad..aa746aa 100644 --- a/sed/sed.c +++ b/sed/sed.c @@ -375,7 +375,7 @@ main (int argc, char **argv) return_code = process_files (the_program, argv+optind); - finish_program (); + finish_program (the_program); ck_fclose (NULL); return return_code; diff --git a/sed/sed.h b/sed/sed.h index 93b2c7f..0e3c601 100644 --- a/sed/sed.h +++ b/sed/sed.h @@ -126,6 +126,9 @@ struct subst { unsigned print : 2; /* 'p' option given (before/after eval) */ unsigned eval : 1; /* 'e' option given */ unsigned max_id : 4; /* maximum backreference on the RHS */ +#ifdef lint + char* replacement_buffer; +#endif }; #ifdef REG_PERL @@ -190,13 +193,13 @@ struct vector *compile_string (struct vector *, char *str, size_t len); struct vector *compile_file (struct vector *, const char *cmdfile); void check_final_program (struct vector *); void rewind_read_files (void); -void finish_program (void); +void finish_program (struct vector *); struct regex *compile_regex (struct buffer *b, int flags, int needed_sub); int match_regex (struct regex *regex, char *buf, size_t buflen, size_t buf_start_offset, struct re_registers *regarray, int regsize); -#ifdef DEBUG_LEAKS +#ifdef lint void release_regex (struct regex *); #endif diff --git a/sed/utils.c b/sed/utils.c index 329dec0..7b0c3c8 100644 --- a/sed/utils.c +++ b/sed/utils.c @@ -74,7 +74,14 @@ panic (const char *str, ...) strerror (errno)); } +#ifdef lint + struct open_file *next = open_files->link; + free (open_files->name); + free (open_files); + open_files = next; +#else open_files = open_files->link; +#endif } exit (EXIT_PANIC); -- 2.11.0 >From 3c4c0f3d447d958708d4f8445699a2b99369e18b Mon Sep 17 00:00:00 2001 From: Assaf Gordon Date: Tue, 31 Jul 2018 11:25:35 -0600 Subject: [PATCH 3/7] sed: fix memory leak * sed/regexp.c (match_regex): Free the previously allocated regex struct before re-building the regex during program execution. --- sed/regexp.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/sed/regexp.c b/sed/regexp.c index 2801d6f..b3a4b41 100644 --- a/sed/regexp.c +++ b/sed/regexp.c @@ -269,7 +269,19 @@ match_regex (struct regex *regex, char *buf, size_t buflen, return (ret == 0); #else if (regex->pattern.no_sub && regsize) - compile_regex_1 (regex, regsize); + { + /* Re-compiling an existing regex, free the previously allocated + structures. */ + if (regex->dfa) + { + dfafree (regex->dfa); + free (regex->dfa); + regex->dfa = NULL; + } + regfree (®ex->pattern); + + compile_regex_1 (regex, regsize); + } regex->pattern.regs_allocated = REGS_REALLOCATE; -- 2.11.0 >From 050bc270d49389eb027bf13e8d67ea27ac14a07d Mon Sep 17 00:00:00 2001 From: Assaf Gordon Date: Wed, 1 Aug 2018 20:15:08 -0600 Subject: [PATCH 4/7] gnulib: update to latest (for regex memory leaks) Specifically for gnulib commits 66b99e5259,c5e76a9560. see https://lists.gnu.org/r/bug-gnulib/2018-07/msg00127.html --- gnulib | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gnulib b/gnulib index ad52c65..c5e76a9 160000 --- a/gnulib +++ b/gnulib @@ -1 +1 @@ -Subproject commit ad52c652a64ebe3523437fb9bb714f99c1ffa9e6 +Subproject commit c5e76a95605b1ef70559f90ebb656e0a4efb8fd9 -- 2.11.0 >From 1678ecb2ac72e5c63b5b62a92e7aaf27affa2b32 Mon Sep 17 00:00:00 2001 From: Assaf Gordon Date: Wed, 1 Aug 2018 22:23:12 -0600 Subject: [PATCH 5/7] maint: add address-sanitizer build target use 'make build-asan' to rebuild sed with gcc's address sanitizer. * cfg.mk (build-asan): New target. --- cfg.mk | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/cfg.mk b/cfg.mk index 3e8b123..65f8c19 100644 --- a/cfg.mk +++ b/cfg.mk @@ -356,3 +356,15 @@ static-analysis-make: .PHONY: static-analysis static-analysis: static-analysis-init static-analysis-config \ static-analysis-make + +ASAN_FLAGS=-fsanitize=address -fno-omit-frame-pointer +ASAN_CFLAGS=-O0 -g -Dlint $(ASAN_FLAGS) +ASAN_LDFLAGS=$(ASAN_FLAGS) + +.PHONY: build-asan +build-asan: + test -x ./configure || \ + { echo "./configure script not found" >&2; exit 1; } + ./configure CFLAGS="$(ASAN_CFLAGS)" LDFLAGS="$(ASAN_LDFLAGS)" + make + -- 2.11.0 >From 14574ab003ebed32f3d78c4bdb4ab5d5c4bc31eb Mon Sep 17 00:00:00 2001 From: Assaf Gordon Date: Wed, 1 Aug 2018 23:40:48 -0600 Subject: [PATCH 6/7] maint: add undefined-behavior build target Using gcc-specific options, a recent gcc is required. build with: make build-ubsan CC=gcc-8.2 * cfg.mk (build-ubsan): New target. --- cfg.mk | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/cfg.mk b/cfg.mk index 65f8c19..227a7d0 100644 --- a/cfg.mk +++ b/cfg.mk @@ -368,3 +368,30 @@ build-asan: ./configure CFLAGS="$(ASAN_CFLAGS)" LDFLAGS="$(ASAN_LDFLAGS)" make +# NOTE: These options require a recent gcc (tested with gcc 8.2) +UBSAN_FLAGS=-fsanitize=undefined \ + -fsanitize=signed-integer-overflow \ + -fsanitize=bounds-strict \ + -fsanitize=shift \ + -fsanitize=shift-exponent \ + -fsanitize=shift-base \ + -fsanitize=integer-divide-by-zero \ + -fsanitize=null \ + -fsanitize=return \ + -fsanitize=alignment \ + -fsanitize=object-size \ + -fsanitize=nonnull-attribute \ + -fsanitize=returns-nonnull-attribute \ + -fsanitize=bool \ + -fsanitize=enum \ + -fsanitize=pointer-overflow \ + -fsanitize=builtin +UBSAN_CFLAGS=-O0 -g -Dlint $(UBSAN_FLAGS) +UBSAN_LDFLAGS=$(UBSAN_FLAGS) + +.PHONY: build-ubsan +build-ubsan: + test -x ./configure || \ + { echo "./configure script not found" >&2; exit 1; } + ./configure CFLAGS="$(UBSAN_CFLAGS)" LDFLAGS="$(UBSAN_LDFLAGS)" + make -- 2.11.0 >From 6a342850f13820dd3d3a36d36b0d48c5cb72e88b Mon Sep 17 00:00:00 2001 From: Assaf Gordon Date: Mon, 6 Aug 2018 15:34:02 -0600 Subject: [PATCH 7/7] sed: mark function as attribute(malloc) Suggested by gcc-8.2.0. * sed/compile.c (read_label): Mark as _GL_ATTRIBUTE_MALLOC. --- sed/compile.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sed/compile.c b/sed/compile.c index 5e2032d..936fd28 100644 --- a/sed/compile.c +++ b/sed/compile.c @@ -673,7 +673,7 @@ mark_subst_opts (struct subst *cmd) } /* read in a label for a `:', `b', or `t' command */ -static char * +static char * _GL_ATTRIBUTE_MALLOC read_label (void) { struct buffer *b; -- 2.11.0