autoconf-patches
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: awk for config headers


From: Ralf Wildenhues
Subject: Re: awk for config headers
Date: Tue, 2 Oct 2007 07:00:49 +0200
User-agent: Mutt/1.5.13 (2006-08-11)

Hello Paul,

Thanks for the review!

* Paul Eggert wrote on Tue, Oct 02, 2007 at 01:40:47AM CEST:
> Ralf Wildenhues <address@hidden> writes:
> 
> > +  for (key in P) P_is_set[key] = 1
> 
> I don't see why this is necessary.  P[x] is either empty (which counts
> as false), or is a value that counts as true.  So we should be able
> to dispense with P_is_set, and use "if (P[x])" rather than
> "if (P_is_set[x])"

Yes, right.

> I assume D_is_set is necessary because the macro value might be
> 0 or the empty string.

Yes.

> > +  split(line, arg, " ")
> > +  if (arg[1] == "#") {
> > +    defundef = arg[2] ""
> > +    mac1 = arg[3] ""
> > +  } else if (substr(arg[1], 1, 1) == "#") {
> > +    defundef = substr(arg[1], 2)
> > +    mac1 = arg[2] ""
> > +  } else {
> > +    mac1 = ""
> > +  }
> 
> The 'split' doesn't need to be done unless the line starts with '#'.

Why is that a problem?  The above is short to write, and I don't see how
I can avoid regex evaluation at all on nonmatching lines.

> I don't see why one needs to concatenate the empty string to values;
> this is done several times in the code.

Hmm, a habit that comes from having been bitten by weird conversion 
to numbers at times; I may just have misunderstood precedence then,
though.  I've removed them now.

> > +}
> > +macro != "" && (defundef == "define" || defundef == "undef") {
> 
> This is a bit confusing.  Why not omit the "}" and put the test
> in an "if"?

Yep, that's better.

> > +    if (P_is_set[macro])
> > +      macro = macro "" P[macro]
> 
> This can be simply "macro = macro P[macro]", no?

I guess.

> > +    # Decompose white space to preserve indentation:
> > +    split(line, ws1, "#")
> > +    split(ws1[2], ws2, "u")
> > +    split(ws2[1], ws3, "d")
> > +    line = ws1[1] "#" ws3[1] "define " macro " " value
> 
> Sorry, I'm a bit lost with this sequence.  Is there some simpler way
> to express what's going on?  It seems a bit weird to split on "u", for
> example.

Hmm.  I would like to extract white space before `#', and between `#'
and `define' or `undef'.  Is this easier to read?

      split(line, ws1, "#")
      split(ws1[2], ws2, substr(defundef, 1, 1))
      line = ws1[1] "#" ws2[1] "define " macro " " value

Updated patch below.  (IMHO using git-format-patch and sending as two
separate changes, the original plus corrections, would be less readable
for review, no?  OTOH git-diff requires manual tracking of the ChangeLog
entry again.)

I've lightly retested only with gawk and Solaris awk.

Cheers,
Ralf
---
diff --git a/NEWS b/NEWS
index 73dc8f7..2734f8d 100644
--- a/NEWS
+++ b/NEWS
@@ -2,6 +2,13 @@ GNU Autoconf NEWS - User visible changes.
 
 * Major changes in Autoconf 2.61b (????-??-??)
 
+** config.status now uses awk instead of sed also for config headers.
+
+   - As a side effect, AC_DEFINE and AC_DEFINE_UNQUOTED now handle multi-line
+     values, i.e., backslash-newline combinations are handled correctly.
+     Further, for config headers, the total size of values is not limited by
+     the POSIX length limit of text lines any more, only each single line.
+
 ** Autoconf is now licensed under the General Public License version 3
    or later (GPLv3+).  As with earlier versions, the license includes
    an exception clause so that you may release a configure script
diff --git a/lib/autoconf/status.m4 b/lib/autoconf/status.m4
index 6364629..36d0577 100644
--- a/lib/autoconf/status.m4
+++ b/lib/autoconf/status.m4
@@ -321,11 +321,11 @@ m4_define([_AC_AWK_LITERAL_LIMIT],
 
 # _AC_OUTPUT_FILES_PREPARE
 # ------------------------
-# Create the sed scripts needed for CONFIG_FILES.
+# Create the awk scripts needed for CONFIG_FILES.
 # Support multiline substitutions and make sure that the substitutions are
 # not evaluated recursively.
 # The intention is to have readable config.status and configure, even
-# though this m4 code might be scaring.
+# though this m4 code might be scary.
 #
 # This code was written by Dan Manthey and rewritten by Ralf Wildenhues.
 #
@@ -675,6 +675,154 @@ AC_DEFUN([AC_CONFIG_HEADER],
 [AC_CONFIG_HEADERS([$1])])
 
 
+# _AC_OUTPUT_HEADERS_PREPARE
+# --------------------------
+# Create the awk scripts needed for CONFIG_HEADERS.
+# Support multiline #defines.
+#
+# This macro is expanded inside a here document.  If the here document is
+# closed, it has to be reopened with "cat >>$CONFIG_STATUS <<\_ACEOF".
+#
+m4_define([_AC_OUTPUT_HEADERS_PREPARE],
+[# Set up the scripts for CONFIG_HEADERS section.
+# No need to generate them if there are no CONFIG_HEADERS.
+# This happens for instance with `./config.status Makefile'.
+if test -n "$CONFIG_HEADERS"; then
+cat >"$tmp/defines.awk" <<\_CEOF
+BEGIN {
+_ACEOF
+
+# Transform confdefs.h into an awk script `defines.awk', embedded as
+# here-document in config.status, that substitutes the proper values into
+# config.h.in to produce config.h.
+
+# Create a delimiter string that does not exist in confdefs.h, to ease
+# handling of long lines.
+ac_delim='%!_!# '
+for ac_last_try in false false :; do
+  ac_t=`sed -n "/$ac_delim/p" confdefs.h`
+  if test -z "$ac_t"; then
+    break
+  elif $ac_last_try; then
+    AC_MSG_ERROR([could not make $CONFIG_HEADERS])
+  else
+    ac_delim="$ac_delim!$ac_delim _$ac_delim!! "
+  fi
+done
+
+# For the awk script, D is an array of macro values keyed by name,
+# likewise P contains macro parameters if any.  Preserve backslash
+# newline sequences.
+dnl
+dnl Structure of the sed script that reads confdefs.h:
+dnl rset:  main loop, searches for `#define' lines
+dnl def:   deal with a `#define' line
+dnl bsnl:  deal with a `#define' line that ends with backslash-newline
+dnl cont:  handle a continuation line
+dnl bsnlc: handle a continuation line that ends with backslash-newline
+dnl
+dnl Each sub part escapes the awk special characters and outputs a statement
+dnl inserting the macro value into the array D, keyed by name.  If the macro
+dnl uses parameters, they are added in the array P, keyed by name.
+dnl
+dnl Long values are split into several string literals with help of ac_delim.
+dnl Assume nobody uses macro names of nearly 150 bytes length.
+dnl
+dnl The initial replace for `#define' lines inserts a leading space
+dnl in order to ease later matching; otherwise, output lines may be
+dnl repeatedly matched.
+dnl
+dnl m4-double-quote most of this for [, ], define, and substr:
+[
+ac_word_re=[_$as_cr_Letters][_$as_cr_alnum]*
+sed -n '
+s/.\{]_AC_AWK_LITERAL_LIMIT[\}/&'"$ac_delim"'/g
+t rset
+:rset
+s/^[    ]*#[    ]*define[       ][      ]*/ /
+t def
+d
+:def
+s/\\$//
+t bsnl
+s/["\\]/\\&/g
+s/^ \('"$ac_word_re"'\)\(([^()]*)\)[    ]*\(.*\)/P["\1"]="\2"\
+D["\1"]="\3"/p
+s/^ \('"$ac_word_re"'\)[        ]*\(.*\)/D["\1"]="\2"/p
+d
+:bsnl
+s/["\\]/\\&/g
+s/^ \('"$ac_word_re"'\)\(([^()]*)\)[    ]*\(.*\)/P["\1"]="\2"\
+D["\1"]="\3\\\\\\n"\\/p
+t cont
+s/^ \('"$ac_word_re"'\)[        ]*\(.*\)/D["\1"]="\2\\\\\\n"\\/p
+t cont
+d
+:cont
+n
+s/.\{]_AC_AWK_LITERAL_LIMIT[\}/&'"$ac_delim"'/g
+t clear
+:clear
+s/\\$//
+t bsnlc
+s/["\\]/\\&/g; s/^/"/; s/$/"/p
+d
+:bsnlc
+s/["\\]/\\&/g; s/^/"/; s/$/\\\\\\n"\\/p
+b cont
+' <confdefs.h | sed '
+s/'"$ac_delim"'/"\\\
+"/g' >>$CONFIG_STATUS
+
+cat >>$CONFIG_STATUS <<_ACEOF
+  for (key in D) D_is_set[key] = 1
+  FS = ""
+}
+{
+  macro = ""
+  line = \$ 0
+  split(line, arg, " ")
+  if (arg[1] == "#") {
+    defundef = arg[2]
+    mac1 = arg[3]
+  } else if (substr(arg[1], 1, 1) == "#") {
+    defundef = substr(arg[1], 2)
+    mac1 = arg[2]
+  } else {
+    mac1 = ""
+  }
+  split(mac1, mac2, "(") #)
+  if (mac2[1] ~ /^$ac_word_re\$/)
+    macro = mac2[1]
+  if (macro != "" && (defundef == "define" || defundef == "undef")) {
+    if (D_is_set[macro]) {
+      value = D[macro]
+      if (P[macro])
+       macro = macro P[macro]
+      # Decompose white space to preserve indentation:
+      split(line, ws1, "#")
+      split(ws1[2], ws2, substr(defundef, 1, 1))
+      line = ws1[1] "#" ws2[1] "define " macro " " value
+    } else {
+      # Replace #undef with comments.  This is necessary, for example,
+      # in the case of _POSIX_SOURCE, which is predefined and required
+      # on some systems where configure will not decide to define it.
+      if (defundef == "undef")
+       line = "/* " line " */"
+    }
+  }
+}
+{ print line }
+]dnl End of double-quoted section
+_CEOF
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF
+fi # test -n "$CONFIG_HEADERS"
+
+])# _AC_OUTPUT_HEADERS_PREPARE
+
+
 # _AC_OUTPUT_HEADER
 # -----------------
 #
@@ -689,110 +837,9 @@ m4_define([_AC_OUTPUT_HEADER],
   #
   # CONFIG_HEADER
   #
-_ACEOF
-
-# Transform confdefs.h into a sed script `conftest.defines', that
-# substitutes the proper values into config.h.in to produce config.h.
-rm -f conftest.defines conftest.tail
-# First, append a space to every undef/define line, to ease matching.
-echo 's/$/ /' >conftest.defines
-# Then, protect against being on the right side of a sed subst, or in
-# an unquoted here document, in config.status.  If some macros were
-# called several times there might be several #defines for the same
-# symbol, which is useless.  But do not sort them, since the last
-# AC_DEFINE must be honored.
-dnl
-dnl Quote, for `[ ]' and `define'.
-[ac_word_re=[_$as_cr_Letters][_$as_cr_alnum]*
-# These sed commands are passed to sed as "A NAME B PARAMS C VALUE D", where
-# NAME is the cpp macro being defined, VALUE is the value it is being given.
-# PARAMS is the parameter list in the macro definition--in most cases, it's
-# just an empty string.
-ac_dA='s,^\\([  #]*\\)[^        ]*\\([  ]*'
-ac_dB='\\)[     (].*,\\1define\\2'
-ac_dC=' '
-ac_dD=' ,']
-dnl ac_dD used to contain `;t' at the end, but that was both slow and 
incorrect.
-dnl 1) Since the script must be broken into chunks containing 100 commands,
-dnl the extra command meant extra calls to sed.
-dnl 2) The code was incorrect: in the unusual case where a symbol has multiple
-dnl different AC_DEFINEs, the last one should be honored.
-dnl
-dnl ac_dB works because every line has a space appended.  ac_dD reinserts
-dnl the space, because some symbol may have been AC_DEFINEd several times.
-dnl
-dnl The first use of ac_dA has a space prepended, so that the second
-dnl use does not match the initial 's' of $ac_dA.
-[
-uniq confdefs.h |
-  sed -n '
-       t rset
-       :rset
-       s/^[     ]*#[    ]*define[       ][      ]*//
-       t ok
-       d
-       :ok
-       s/[\\&,]/\\&/g
-       s/^\('"$ac_word_re"'\)\(([^()]*)\)[      ]*\(.*\)/ 
'"$ac_dA"'\1'"$ac_dB"'\2'"${ac_dC}"'\3'"$ac_dD"'/p
-       s/^\('"$ac_word_re"'\)[  
]*\(.*\)/'"$ac_dA"'\1'"$ac_dB$ac_dC"'\2'"$ac_dD"'/p
-  ' >>conftest.defines
-]
-# Remove the space that was appended to ease matching.
-# Then replace #undef with comments.  This is necessary, for
-# example, in the case of _POSIX_SOURCE, which is predefined and required
-# on some systems where configure will not decide to define it.
-# (The regexp can be short, since the line contains either #define or #undef.)
-echo 's/ $//
-[s,^[   #]*u.*,/* & */,]' >>conftest.defines
-
-# Break up conftest.defines:
-dnl If we cared only about not exceeding line count limits, we would use this:
-dnl ac_max_sed_lines=m4_eval(_AC_SED_CMD_LIMIT - 3)
-dnl But in practice this can generate scripts that contain too many bytes;
-dnl and this can cause obscure 'sed' failures, e.g.,
-dnl http://lists.gnu.org/archive/html/bug-coreutils/2006-05/msg00127.html
-dnl So instead, we use the following, which is about half the size we'd like:
-ac_max_sed_lines=50
-dnl In the future, let's use awk or sh instead of sed to do substitutions,
-dnl since we have so many problems with sed.
-
-# First sed command is:         sed -f defines.sed $ac_file_inputs >"$tmp/out1"
-# Second one is:        sed -f defines.sed "$tmp/out1" >"$tmp/out2"
-# Third one will be:    sed -f defines.sed "$tmp/out2" >"$tmp/out1"
-# et cetera.
-ac_in='$ac_file_inputs'
-ac_out='"$tmp/out1"'
-ac_nxt='"$tmp/out2"'
-
-while :
-do
-  # Write a here document:
-  dnl Quote, for the `[ ]' and `define'.
-[  cat >>$CONFIG_STATUS <<_ACEOF
-    # First, check the format of the line:
-    cat >"\$tmp/defines.sed" <<\\CEOF
-/^[     ]*#[    ]*undef[        ][      ]*$ac_word_re[  ]*\$/b def
-/^[     ]*#[    ]*define[       ][      ]*$ac_word_re[(         ]/b def
-b
-:def
-_ACEOF]
-  sed ${ac_max_sed_lines}q conftest.defines >>$CONFIG_STATUS
-  echo 'CEOF
-    sed -f "$tmp/defines.sed"' "$ac_in >$ac_out" >>$CONFIG_STATUS
-  ac_in=$ac_out; ac_out=$ac_nxt; ac_nxt=$ac_in
-  sed 1,${ac_max_sed_lines}d conftest.defines >conftest.tail
-  grep . conftest.tail >/dev/null || break
-  rm -f conftest.defines
-  mv conftest.tail conftest.defines
-done
-rm -f conftest.defines conftest.tail
-
-dnl Now back to your regularly scheduled config.status.
-echo "ac_result=$ac_in" >>$CONFIG_STATUS
-cat >>$CONFIG_STATUS <<\_ACEOF
   if test x"$ac_file" != x-; then
     AS_ECHO(["/* $configure_input  */"]) >"$tmp/config.h"
-    cat "$ac_result" >>"$tmp/config.h"
+    $AWK -f "$tmp/defines.awk" $ac_file_inputs >>"$tmp/config.h"
     if diff $ac_file "$tmp/config.h" >/dev/null 2>&1; then
       AC_MSG_NOTICE([$ac_file is unchanged])
     else
@@ -801,9 +848,8 @@ cat >>$CONFIG_STATUS <<\_ACEOF
     fi
   else
     AS_ECHO(["/* $configure_input  */"])
-    cat "$ac_result"
+    $AWK -f "$tmp/defines.awk" $ac_file_inputs
   fi
-  rm -f "$tmp/out[12]"
 dnl If running for Automake, be ready to perform additional
 dnl commands to set up the timestamp files.
 m4_ifdef([_AC_AM_CONFIG_HEADER_HOOK],
@@ -1513,6 +1559,7 @@ dnl The comment above AS_TMPDIR says at most 4 chars are 
allowed.
 AS_TMPDIR([conf], [.])
 
 m4_ifdef([_AC_SEEN_CONFIG(FILES)], [_AC_OUTPUT_FILES_PREPARE])[]dnl
+m4_ifdef([_AC_SEEN_CONFIG(HEADERS)], [_AC_OUTPUT_HEADERS_PREPARE])[]dnl
 
 for ac_tag in[]dnl
   m4_ifdef([_AC_SEEN_CONFIG(FILES)],    [:F $CONFIG_FILES])[]dnl
@@ -1611,6 +1658,12 @@ m4_define([AC_OUTPUT_MAKE_DEFS],
 # take arguments), then branch to the quote section.  Otherwise,
 # look for a macro that doesn't take arguments.
 ac_script='
+:mline
+/\\$/{
+ N
+ s,\\\n,,
+ b mline
+}
 t clear
 :clear
 s/^[    ]*#[    ]*define[       ][      ]*\([^  (][^    (]*([^)]*)\)[   
]*\(.*\)/-D\1=\2/g
diff --git a/tests/torture.at b/tests/torture.at
index 268c15b..1d601e0 100644
--- a/tests/torture.at
+++ b/tests/torture.at
@@ -333,6 +333,14 @@ AC_DEFINE(fubar, tutu)
 AC_DEFINE(a, A)
 AC_DEFINE(aaa, AAA)
 AC_DEFINE(aa, AA)
+
+# backslash-newline combinations
+AC_DEFINE([multiline], [line1\
+line2\
+line3 \
+line4])
+AC_DEFINE([multiline_args(ARG1, ARG2)], [ARG2 \
+ARG1])
 AC_CONFIG_FILES(defs)
 
 # Things included in confdefs.h, but which make no sense in
@@ -359,13 +367,22 @@ AT_DATA([config.hin],
 [[#define foo   0
 #  define bar bar
 #  define baz   "Archimedes was sinking in his baz"
-#  define fubar                                tutu
+  #  define fubar                              tutu
 #define a B
-#define aa BB
-#define aaa BBB
+ #define aa BB
+ #  define aaa BBB
 #undef a
-#undef aa
+ #  undef aa
 #undef aaa
+#define aaa(a, aa) aa a
+#define aaab
+#define aaac(a, aa) aa a
+#undef multiline
+#  undef multiline_args
+/* an ugly one: */
+#define str(define) \
+#define
+#define stringify(arg) str(arg)
 ]])
 
 AT_CHECK_AUTOCONF
@@ -376,19 +393,32 @@ AT_DATA([expout],
 #define foo toto
 #  define bar tata
 #  define baz titi
-#  define fubar tutu
+  #  define fubar tutu
 #define a A
-#define aa AA
-#define aaa AAA
+ #define aa AA
+ #  define aaa AAA
 #define a A
-#define aa AA
+ #  define aa AA
+#define aaa AAA
 #define aaa AAA
+#define aaab
+#define aaac(a, aa) aa a
+#define multiline line1\
+line2\
+line3 \
+line4
+#  define multiline_args(ARG1, ARG2) ARG2 \
+ARG1
+/* an ugly one: */
+#define str(define) \
+#define
+#define stringify(arg) str(arg)
 ]])
 AT_CHECK([cat config.h], 0, expout)
 
 # Check the value of DEFS.
 AT_DATA([expout],
-[[-DPACKAGE_NAME=\"\" -DPACKAGE_TARNAME=\"\" -DPACKAGE_VERSION=\"\" 
-DPACKAGE_STRING=\"\" -DPACKAGE_BUGREPORT=\"\" -Dfoo=toto -Dbar=tata -Dbaz=titi 
-Dfubar=tutu -Da=A -Daaa=AAA -Daa=AA
+[[-DPACKAGE_NAME=\"\" -DPACKAGE_TARNAME=\"\" -DPACKAGE_VERSION=\"\" 
-DPACKAGE_STRING=\"\" -DPACKAGE_BUGREPORT=\"\" -Dfoo=toto -Dbar=tata -Dbaz=titi 
-Dfubar=tutu -Da=A -Daaa=AAA -Daa=AA -Dmultiline=line1line2line3\ line4 
-Dmultiline_args\(ARG1,\ ARG2\)=ARG2\ ARG1
 ]])
 
 # Because we strip trailing spaces in `testsuite' we can't leave one in
@@ -476,7 +506,7 @@ m4_dquote(m4_defn([AT_BIG_VALUE]))[)]]
 # used in the torture tests.
 m4_defun([AC_DEFUBST],
 [AC_DUMMY_VAR($1)="AC_BIG_VALUE"
-AC_DEFINE_UNQUOTED(AC_DUMMY_VAR($1), "$AC_DUMMY_VAR($1)",
+AC_DEFINE_UNQUOTED(AC_DUMMY_VAR($1), "$AC_DUMMY_VAR($1) $AC_DUMMY_VAR($1)",
                   AC_DESCRIPTION)
 AC_SUBST(AC_DUMMY_VAR($1))
 AC_SUBST_FILE([f]$1)
@@ -517,7 +547,7 @@ for awk_arg in FOO= AWK=awk; do
 m4_for(AT_Count, 1, 100, 1,
 [
 /* AT_DESCRIPTION */
-[#define] AT_DUMMY_VAR(AT_Count) "AT_BIG_VALUE"
+[#define] AT_DUMMY_VAR(AT_Count) "AT_BIG_VALUE AT_BIG_VALUE"
 ])])
 
   AT_CHECK([sed -n '4,$ p' config.h], 0, expout)
@@ -657,6 +687,7 @@ AC_SUBST([zardoz])
 file=File
 AC_SUBST_FILE([file])
 AC_DEFINE([foo], [[X*'[]+ ",& &`\($foo !]], [Awful value.])
+AC_DEFINE([bar], [[%!_!# X]], [Value that is used as special delimiter.])
 AC_PROG_AWK
 AC_CONFIG_FILES([Foo Zardoz])]])
 
@@ -679,7 +710,8 @@ address@hidden@
 @address@hidden
 ]])
   AT_CHECK([cmp allowed-chars Zardoz])
-  AT_CHECK_DEFINES([[#define foo X*'[]+ ",& &`\($foo !
+  AT_CHECK_DEFINES([[#define bar %!_!# X
+#define foo X*'[]+ ",& &`\($foo !
 ]])
 done
 AT_CLEANUP




reply via email to

[Prev in Thread] Current Thread [Next in Thread]