autoconf-patches
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: awk for config headers


From: Paul Eggert
Subject: Re: awk for config headers
Date: Tue, 02 Oct 2007 16:43:01 -0700
User-agent: Gnus/5.11 (Gnus v5.11) Emacs/22.1 (gnu/linux)

Ralf Wildenhues <address@hidden> writes:

>> The 'split' doesn't need to be done unless the line starts with '#'.
>
> Why is that a problem?  The above is short to write, and I don't see how
> I can avoid regex evaluation at all on nonmatching lines.

It's mostly a style issue, I think it's clearer to express the
computation as something like "if the line is an interesting one, then
change it" rather than "do some computation on the line, then test
whether it's interesting, and change it if it's interesting".

> Hmm.  I would like to extract white space before `#', and between `#'
> and `define' or `undef'.  Is this easier to read?
>
>       split(line, ws1, "#")
>       split(ws1[2], ws2, substr(defundef, 1, 1))
>       line = ws1[1] "#" ws2[1] "define " macro " " value

Better, but I think it's even clearer to use "index".  Here's a
proposed revision of the patch, which I've tested only on Debian
stable.  I tweaked it to simplify it a bit more (avoid the D_is_set
business, for example, by making sure the entry cannot be the empty
string).  I ran it once; it's a tad slower (0.06% slower for "make
check" on Debian stable) but I don't know whether the speed difference
is in my machine's noise.

diff --git a/NEWS b/NEWS
index 73dc8f7..2734f8d 100644
--- a/NEWS
+++ b/NEWS
@@ -2,6 +2,13 @@ GNU Autoconf NEWS - User visible changes.
 
 * Major changes in Autoconf 2.61b (????-??-??)
 
+** config.status now uses awk instead of sed also for config headers.
+
+   - As a side effect, AC_DEFINE and AC_DEFINE_UNQUOTED now handle multi-line
+     values, i.e., backslash-newline combinations are handled correctly.
+     Further, for config headers, the total size of values is not limited by
+     the POSIX length limit of text lines any more, only each single line.
+
 ** Autoconf is now licensed under the General Public License version 3
    or later (GPLv3+).  As with earlier versions, the license includes
    an exception clause so that you may release a configure script
diff --git a/lib/autoconf/status.m4 b/lib/autoconf/status.m4
index 6364629..0c00af2 100644
--- a/lib/autoconf/status.m4
+++ b/lib/autoconf/status.m4
@@ -321,11 +321,11 @@ m4_define([_AC_AWK_LITERAL_LIMIT],
 
 # _AC_OUTPUT_FILES_PREPARE
 # ------------------------
-# Create the sed scripts needed for CONFIG_FILES.
+# Create the awk scripts needed for CONFIG_FILES.
 # Support multiline substitutions and make sure that the substitutions are
 # not evaluated recursively.
 # The intention is to have readable config.status and configure, even
-# though this m4 code might be scaring.
+# though this m4 code might be scary.
 #
 # This code was written by Dan Manthey and rewritten by Ralf Wildenhues.
 #
@@ -675,6 +675,147 @@ AC_DEFUN([AC_CONFIG_HEADER],
 [AC_CONFIG_HEADERS([$1])])
 
 
+# _AC_OUTPUT_HEADERS_PREPARE
+# --------------------------
+# Create the awk scripts needed for CONFIG_HEADERS.
+# Support multiline #defines.
+#
+# This macro is expanded inside a here document.  If the here document is
+# closed, it has to be reopened with "cat >>$CONFIG_STATUS <<\_ACEOF".
+#
+m4_define([_AC_OUTPUT_HEADERS_PREPARE],
+[# Set up the scripts for CONFIG_HEADERS section.
+# No need to generate them if there are no CONFIG_HEADERS.
+# This happens for instance with `./config.status Makefile'.
+if test -n "$CONFIG_HEADERS"; then
+cat >"$tmp/defines.awk" <<\_CEOF
+BEGIN {
+_ACEOF
+
+# Transform confdefs.h into an awk script `defines.awk', embedded as
+# here-document in config.status, that substitutes the proper values into
+# config.h.in to produce config.h.
+
+# Create a delimiter string that does not exist in confdefs.h, to ease
+# handling of long lines.
+ac_delim='%!_!# '
+for ac_last_try in false false :; do
+  ac_t=`sed -n "/$ac_delim/p" confdefs.h`
+  if test -z "$ac_t"; then
+    break
+  elif $ac_last_try; then
+    AC_MSG_ERROR([could not make $CONFIG_HEADERS])
+  else
+    ac_delim="$ac_delim!$ac_delim _$ac_delim!! "
+  fi
+done
+
+# For the awk script, D is an array of macro values keyed by name,
+# likewise P contains macro parameters if any.  Preserve backslash
+# newline sequences.
+dnl
+dnl Structure of the sed script that reads confdefs.h:
+dnl rset:  main loop, searches for `#define' lines
+dnl def:   deal with a `#define' line
+dnl bsnl:  deal with a `#define' line that ends with backslash-newline
+dnl cont:  handle a continuation line
+dnl bsnlc: handle a continuation line that ends with backslash-newline
+dnl
+dnl Each sub part escapes the awk special characters and outputs a statement
+dnl inserting the macro value into the array D, keyed by name.  If the macro
+dnl uses parameters, they are added in the array P, keyed by name.
+dnl
+dnl Long values are split into several string literals with help of ac_delim.
+dnl Assume nobody uses macro names of nearly 150 bytes length.
+dnl
+dnl The initial replace for `#define' lines inserts a leading space
+dnl in order to ease later matching; otherwise, output lines may be
+dnl repeatedly matched.
+dnl
+dnl m4-double-quote most of this for [, ], define, and substr:
+[
+ac_word_re=[_$as_cr_Letters][_$as_cr_alnum]*
+sed -n '
+s/.\{]_AC_AWK_LITERAL_LIMIT[\}/&'"$ac_delim"'/g
+t rset
+:rset
+s/^[    ]*#[    ]*define[       ][      ]*/ /
+t def
+d
+:def
+s/\\$//
+t bsnl
+s/["\\]/\\&/g
+s/^ \('"$ac_word_re"'\)\(([^()]*)\)[    ]*\(.*\)/P["\1"]="\2"\
+D["\1"]=" \3"/p
+s/^ \('"$ac_word_re"'\)[        ]*\(.*\)/D["\1"]=" \2"/p
+d
+:bsnl
+s/["\\]/\\&/g
+s/^ \('"$ac_word_re"'\)\(([^()]*)\)[    ]*\(.*\)/P["\1"]="\2"\
+D["\1"]=" \3\\\\\\n"\\/p
+t cont
+s/^ \('"$ac_word_re"'\)[        ]*\(.*\)/D["\1"]=" \2\\\\\\n"\\/p
+t cont
+d
+:cont
+n
+s/.\{]_AC_AWK_LITERAL_LIMIT[\}/&'"$ac_delim"'/g
+t clear
+:clear
+s/\\$//
+t bsnlc
+s/["\\]/\\&/g; s/^/"/; s/$/"/p
+d
+:bsnlc
+s/["\\]/\\&/g; s/^/"/; s/$/\\\\\\n"\\/p
+b cont
+' <confdefs.h | sed '
+s/'"$ac_delim"'/"\\\
+"/g' >>$CONFIG_STATUS
+
+cat >>$CONFIG_STATUS <<_ACEOF
+  FS = ""
+}
+/^[\t ]*#[\t ]*(define|undef)[\t ]+$ac_word_re([\t (]|\$)/ {
+  line = \$ 0
+  split(line, arg, " ")
+  if (arg[1] == "#") {
+    defundef = arg[2]
+    mac1 = arg[3]
+  } else {
+    defundef = substr(arg[1], 2)
+    mac1 = arg[2]
+  }
+  split(mac1, mac2, "(") #)
+  macro = mac2[1]
+  definiens = D[macro]
+  if (definiens) {
+    # Preserve the white space surrounding the "#".
+    prefix = substr(line, 1, index(line, defundef) - 1)
+    print prefix "define", macro P[macro] definiens
+    next
+  } else {
+    # Replace #undef with comments.  This is necessary, for example,
+    # in the case of _POSIX_SOURCE, which is predefined and required
+    # on some systems where configure will not decide to define it.
+    if (defundef == "undef") {
+      print "/*", line, "*/"
+      next
+    }
+  }
+}
+{ print }
+]dnl End of double-quoted section
+_CEOF
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF
+fi # test -n "$CONFIG_HEADERS"
+
+])# _AC_OUTPUT_HEADERS_PREPARE
+
+
 # _AC_OUTPUT_HEADER
 # -----------------
 #
@@ -689,110 +830,9 @@ m4_define([_AC_OUTPUT_HEADER],
   #
   # CONFIG_HEADER
   #
-_ACEOF
-
-# Transform confdefs.h into a sed script `conftest.defines', that
-# substitutes the proper values into config.h.in to produce config.h.
-rm -f conftest.defines conftest.tail
-# First, append a space to every undef/define line, to ease matching.
-echo 's/$/ /' >conftest.defines
-# Then, protect against being on the right side of a sed subst, or in
-# an unquoted here document, in config.status.  If some macros were
-# called several times there might be several #defines for the same
-# symbol, which is useless.  But do not sort them, since the last
-# AC_DEFINE must be honored.
-dnl
-dnl Quote, for `[ ]' and `define'.
-[ac_word_re=[_$as_cr_Letters][_$as_cr_alnum]*
-# These sed commands are passed to sed as "A NAME B PARAMS C VALUE D", where
-# NAME is the cpp macro being defined, VALUE is the value it is being given.
-# PARAMS is the parameter list in the macro definition--in most cases, it's
-# just an empty string.
-ac_dA='s,^\\([  #]*\\)[^        ]*\\([  ]*'
-ac_dB='\\)[     (].*,\\1define\\2'
-ac_dC=' '
-ac_dD=' ,']
-dnl ac_dD used to contain `;t' at the end, but that was both slow and 
incorrect.
-dnl 1) Since the script must be broken into chunks containing 100 commands,
-dnl the extra command meant extra calls to sed.
-dnl 2) The code was incorrect: in the unusual case where a symbol has multiple
-dnl different AC_DEFINEs, the last one should be honored.
-dnl
-dnl ac_dB works because every line has a space appended.  ac_dD reinserts
-dnl the space, because some symbol may have been AC_DEFINEd several times.
-dnl
-dnl The first use of ac_dA has a space prepended, so that the second
-dnl use does not match the initial 's' of $ac_dA.
-[
-uniq confdefs.h |
-  sed -n '
-       t rset
-       :rset
-       s/^[     ]*#[    ]*define[       ][      ]*//
-       t ok
-       d
-       :ok
-       s/[\\&,]/\\&/g
-       s/^\('"$ac_word_re"'\)\(([^()]*)\)[      ]*\(.*\)/ 
'"$ac_dA"'\1'"$ac_dB"'\2'"${ac_dC}"'\3'"$ac_dD"'/p
-       s/^\('"$ac_word_re"'\)[  
]*\(.*\)/'"$ac_dA"'\1'"$ac_dB$ac_dC"'\2'"$ac_dD"'/p
-  ' >>conftest.defines
-]
-# Remove the space that was appended to ease matching.
-# Then replace #undef with comments.  This is necessary, for
-# example, in the case of _POSIX_SOURCE, which is predefined and required
-# on some systems where configure will not decide to define it.
-# (The regexp can be short, since the line contains either #define or #undef.)
-echo 's/ $//
-[s,^[   #]*u.*,/* & */,]' >>conftest.defines
-
-# Break up conftest.defines:
-dnl If we cared only about not exceeding line count limits, we would use this:
-dnl ac_max_sed_lines=m4_eval(_AC_SED_CMD_LIMIT - 3)
-dnl But in practice this can generate scripts that contain too many bytes;
-dnl and this can cause obscure 'sed' failures, e.g.,
-dnl http://lists.gnu.org/archive/html/bug-coreutils/2006-05/msg00127.html
-dnl So instead, we use the following, which is about half the size we'd like:
-ac_max_sed_lines=50
-dnl In the future, let's use awk or sh instead of sed to do substitutions,
-dnl since we have so many problems with sed.
-
-# First sed command is:         sed -f defines.sed $ac_file_inputs >"$tmp/out1"
-# Second one is:        sed -f defines.sed "$tmp/out1" >"$tmp/out2"
-# Third one will be:    sed -f defines.sed "$tmp/out2" >"$tmp/out1"
-# et cetera.
-ac_in='$ac_file_inputs'
-ac_out='"$tmp/out1"'
-ac_nxt='"$tmp/out2"'
-
-while :
-do
-  # Write a here document:
-  dnl Quote, for the `[ ]' and `define'.
-[  cat >>$CONFIG_STATUS <<_ACEOF
-    # First, check the format of the line:
-    cat >"\$tmp/defines.sed" <<\\CEOF
-/^[     ]*#[    ]*undef[        ][      ]*$ac_word_re[  ]*\$/b def
-/^[     ]*#[    ]*define[       ][      ]*$ac_word_re[(         ]/b def
-b
-:def
-_ACEOF]
-  sed ${ac_max_sed_lines}q conftest.defines >>$CONFIG_STATUS
-  echo 'CEOF
-    sed -f "$tmp/defines.sed"' "$ac_in >$ac_out" >>$CONFIG_STATUS
-  ac_in=$ac_out; ac_out=$ac_nxt; ac_nxt=$ac_in
-  sed 1,${ac_max_sed_lines}d conftest.defines >conftest.tail
-  grep . conftest.tail >/dev/null || break
-  rm -f conftest.defines
-  mv conftest.tail conftest.defines
-done
-rm -f conftest.defines conftest.tail
-
-dnl Now back to your regularly scheduled config.status.
-echo "ac_result=$ac_in" >>$CONFIG_STATUS
-cat >>$CONFIG_STATUS <<\_ACEOF
   if test x"$ac_file" != x-; then
     AS_ECHO(["/* $configure_input  */"]) >"$tmp/config.h"
-    cat "$ac_result" >>"$tmp/config.h"
+    $AWK -f "$tmp/defines.awk" $ac_file_inputs >>"$tmp/config.h"
     if diff $ac_file "$tmp/config.h" >/dev/null 2>&1; then
       AC_MSG_NOTICE([$ac_file is unchanged])
     else
@@ -801,9 +841,8 @@ cat >>$CONFIG_STATUS <<\_ACEOF
     fi
   else
     AS_ECHO(["/* $configure_input  */"])
-    cat "$ac_result"
+    $AWK -f "$tmp/defines.awk" $ac_file_inputs
   fi
-  rm -f "$tmp/out[12]"
 dnl If running for Automake, be ready to perform additional
 dnl commands to set up the timestamp files.
 m4_ifdef([_AC_AM_CONFIG_HEADER_HOOK],
@@ -1513,6 +1552,7 @@ dnl The comment above AS_TMPDIR says at most 4 chars are 
allowed.
 AS_TMPDIR([conf], [.])
 
 m4_ifdef([_AC_SEEN_CONFIG(FILES)], [_AC_OUTPUT_FILES_PREPARE])[]dnl
+m4_ifdef([_AC_SEEN_CONFIG(HEADERS)], [_AC_OUTPUT_HEADERS_PREPARE])[]dnl
 
 for ac_tag in[]dnl
   m4_ifdef([_AC_SEEN_CONFIG(FILES)],    [:F $CONFIG_FILES])[]dnl
@@ -1611,6 +1651,12 @@ m4_define([AC_OUTPUT_MAKE_DEFS],
 # take arguments), then branch to the quote section.  Otherwise,
 # look for a macro that doesn't take arguments.
 ac_script='
+:mline
+/\\$/{
+ N
+ s,\\\n,,
+ b mline
+}
 t clear
 :clear
 s/^[    ]*#[    ]*define[       ][      ]*\([^  (][^    (]*([^)]*)\)[   
]*\(.*\)/-D\1=\2/g
diff --git a/tests/torture.at b/tests/torture.at
index 268c15b..1d601e0 100644
--- a/tests/torture.at
+++ b/tests/torture.at
@@ -333,6 +333,14 @@ AC_DEFINE(fubar, tutu)
 AC_DEFINE(a, A)
 AC_DEFINE(aaa, AAA)
 AC_DEFINE(aa, AA)
+
+# backslash-newline combinations
+AC_DEFINE([multiline], [line1\
+line2\
+line3 \
+line4])
+AC_DEFINE([multiline_args(ARG1, ARG2)], [ARG2 \
+ARG1])
 AC_CONFIG_FILES(defs)
 
 # Things included in confdefs.h, but which make no sense in
@@ -359,13 +367,22 @@ AT_DATA([config.hin],
 [[#define foo   0
 #  define bar bar
 #  define baz   "Archimedes was sinking in his baz"
-#  define fubar                                tutu
+  #  define fubar                              tutu
 #define a B
-#define aa BB
-#define aaa BBB
+ #define aa BB
+ #  define aaa BBB
 #undef a
-#undef aa
+ #  undef aa
 #undef aaa
+#define aaa(a, aa) aa a
+#define aaab
+#define aaac(a, aa) aa a
+#undef multiline
+#  undef multiline_args
+/* an ugly one: */
+#define str(define) \
+#define
+#define stringify(arg) str(arg)
 ]])
 
 AT_CHECK_AUTOCONF
@@ -376,19 +393,32 @@ AT_DATA([expout],
 #define foo toto
 #  define bar tata
 #  define baz titi
-#  define fubar tutu
+  #  define fubar tutu
 #define a A
-#define aa AA
-#define aaa AAA
+ #define aa AA
+ #  define aaa AAA
 #define a A
-#define aa AA
+ #  define aa AA
+#define aaa AAA
 #define aaa AAA
+#define aaab
+#define aaac(a, aa) aa a
+#define multiline line1\
+line2\
+line3 \
+line4
+#  define multiline_args(ARG1, ARG2) ARG2 \
+ARG1
+/* an ugly one: */
+#define str(define) \
+#define
+#define stringify(arg) str(arg)
 ]])
 AT_CHECK([cat config.h], 0, expout)
 
 # Check the value of DEFS.
 AT_DATA([expout],
-[[-DPACKAGE_NAME=\"\" -DPACKAGE_TARNAME=\"\" -DPACKAGE_VERSION=\"\" 
-DPACKAGE_STRING=\"\" -DPACKAGE_BUGREPORT=\"\" -Dfoo=toto -Dbar=tata -Dbaz=titi 
-Dfubar=tutu -Da=A -Daaa=AAA -Daa=AA
+[[-DPACKAGE_NAME=\"\" -DPACKAGE_TARNAME=\"\" -DPACKAGE_VERSION=\"\" 
-DPACKAGE_STRING=\"\" -DPACKAGE_BUGREPORT=\"\" -Dfoo=toto -Dbar=tata -Dbaz=titi 
-Dfubar=tutu -Da=A -Daaa=AAA -Daa=AA -Dmultiline=line1line2line3\ line4 
-Dmultiline_args\(ARG1,\ ARG2\)=ARG2\ ARG1
 ]])
 
 # Because we strip trailing spaces in `testsuite' we can't leave one in
@@ -476,7 +506,7 @@ m4_dquote(m4_defn([AT_BIG_VALUE]))[)]]
 # used in the torture tests.
 m4_defun([AC_DEFUBST],
 [AC_DUMMY_VAR($1)="AC_BIG_VALUE"
-AC_DEFINE_UNQUOTED(AC_DUMMY_VAR($1), "$AC_DUMMY_VAR($1)",
+AC_DEFINE_UNQUOTED(AC_DUMMY_VAR($1), "$AC_DUMMY_VAR($1) $AC_DUMMY_VAR($1)",
                   AC_DESCRIPTION)
 AC_SUBST(AC_DUMMY_VAR($1))
 AC_SUBST_FILE([f]$1)
@@ -517,7 +547,7 @@ for awk_arg in FOO= AWK=awk; do
 m4_for(AT_Count, 1, 100, 1,
 [
 /* AT_DESCRIPTION */
-[#define] AT_DUMMY_VAR(AT_Count) "AT_BIG_VALUE"
+[#define] AT_DUMMY_VAR(AT_Count) "AT_BIG_VALUE AT_BIG_VALUE"
 ])])
 
   AT_CHECK([sed -n '4,$ p' config.h], 0, expout)
@@ -657,6 +687,7 @@ AC_SUBST([zardoz])
 file=File
 AC_SUBST_FILE([file])
 AC_DEFINE([foo], [[X*'[]+ ",& &`\($foo !]], [Awful value.])
+AC_DEFINE([bar], [[%!_!# X]], [Value that is used as special delimiter.])
 AC_PROG_AWK
 AC_CONFIG_FILES([Foo Zardoz])]])
 
@@ -679,7 +710,8 @@ address@hidden@
 @address@hidden
 ]])
   AT_CHECK([cmp allowed-chars Zardoz])
-  AT_CHECK_DEFINES([[#define foo X*'[]+ ",& &`\($foo !
+  AT_CHECK_DEFINES([[#define bar %!_!# X
+#define foo X*'[]+ ",& &`\($foo !
 ]])
 done
 AT_CLEANUP




reply via email to

[Prev in Thread] Current Thread [Next in Thread]