trans-coord-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH] gnun-validate-html may break on ASCII art


From: Ineiev
Subject: Re: [PATCH] gnun-validate-html may break on ASCII art
Date: Fri, 19 Aug 2011 14:53:20 +0000
User-agent: Thunderbird 2.0.0.14 (X11/20080501)

On 08/18/2011 03:32 PM, Ineiev wrote:
It is suggested that the SSIs may be expanded via recursive
awk script; by the way, configuration time variables for
awk and sed programs are used like the variable for m4 in the
current version of gnun-validate-html is used.

Next revision: expand also includes like '<!--#include file="...',
make it work with relative paths of the included files, support
' and ` as well as " to quote the file name.
Index: gnun-validate-html.in
===================================================================
RCS file: 
/sources/trans-coord/trans-coord/gnun/server/gnun/gnun-validate-html.in,v
retrieving revision 1.9
diff -U 2 -r1.9 gnun-validate-html.in
--- gnun-validate-html.in       7 Jan 2010 14:43:04 -0000       1.9
+++ gnun-validate-html.in       19 Aug 2011 14:45:18 -0000
@@ -1,5 +1,5 @@
 #!/bin/bash
 
-# Copyright (C) 2008, 2009, 2010 Free Software Foundation, Inc.
+# Copyright (C) 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
 
 # This file is part of GNUnited Nations.
@@ -21,5 +21,5 @@
 cat <<EOF
 gnun-validate-html (@PACKAGE_NAME@) @PACKAGE_VERSION@
-Copyright (C) 2010 Free Software Foundation, Inc.
+Copyright (C) 2011 Free Software Foundation, Inc.
 You may redistribute copies of @PACKAGE_NAME@
 under the terms of the GNU General Public License.
@@ -69,18 +69,59 @@
 TMP1=`mktemp -t gnun.1.XXXXXX`
 TMP2=`mktemp -t gnun.2.XXXXXX`
-TMP3=`mktemp -t gnun.3.XXXXXX`
-trap "rm -f $TMP1 $TMP2 $TMP3" EXIT
+trap "rm -f $TMP1 $TMP2" EXIT
 
 # Expand input file's #include directives and save the result in
 # $TMP1.
-cat $1 > $TMP1
+cat > $TMP2 <<"EOF"
+# Limitations: CGI includes would be expanded in a wrong way
+BEGIN {
+  relative_dir_name = ARGV[1];
+  sub ("[^/]*$", "", relative_dir_name);
+  sub ("/*$", "/", relative_dir_name);
+}
+
+/<!--#include +(virtual)|(file)=[\"`']/ {
+  n = split ($0, names, "<!--#include ");
+  printf ("%s", names[1]);
+  for (i = 2; i <= n; i++)
+    {
+      # Extract the next included file name
+      if (names[i] !~ /^ *(virtual)|(file)=[\"'`]/)
+        {
+          # Pass it unchanged: this is not an Apache include directive
+          printf ("<!--#include %s", names[i]);
+          continue;
+        }
+      m = index (names[i], "-->");
+      if (m == 0) # This shouldn't happen: the "-->" must be on the same line
+        m = length (names[i]) + 1;
+      name = substr (names[i], 1, m-1);
+      # Absolute paths are not allowed with "file=" type of includes,
+      # but we process them in the same way for simplicity
+      sub ("^ *((virtual)|(file))=", "", name);
+      quote_symbol = substr (name, 1, 1);
+      sub ("^.", "", name);
+      sub (quote_symbol "[^" quote_symbol "]*$", "", name);
+
+      # Construct the real path to the file
+      if (name ~ /^\//)
+        name = root name;
+      else
+        name = relative_dir_name name;
+
+      # Invoke the script recursively
+      system ("@AWK@ -v script_name='" script_name "' -v root='" \
+               root "' -f '" script_name "' " name);
+
+      # Output the part remaining after the include directive
+      print (substr (names[i], m + 3));
+    }
+  next;
+}
+
+{ print; }
+EOF
 
-while true; do
-    grep --quiet '<!--#include virtual' $TMP1 || break
-    sed --in-place \
-      "s/<\!--#include virtual=\"\/\?\(.*\)\" -->/m4_include(\`\1')/g" $TMP1
-    @M4@ -P -EE -I $ROOT $TMP1 > $TMP3
-    cp $TMP3 $TMP1
-done
address@hidden@ -v script_name=$TMP2 -v root="$ROOT" -f $TMP2 $1 > $TMP1
 
 # Execute xmllint on $TMP1 and save its output to $TMP2.
@@ -98,5 +139,5 @@
 # expanded #include directives) and the translator can not easily look
 # up for references in it.
-cat $TMP2 | sed '
+cat $TMP2 | @SED@ '
   /line [[:digit:]]\+/ {
      p

reply via email to

[Prev in Thread] Current Thread [Next in Thread]