bison-patches
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Python Support: Patch to add automatic indenting


From: Dennis Heimbigner
Subject: Python Support: Patch to add automatic indenting
Date: Thu, 05 Sep 2013 11:46:04 -0600
User-agent: Thunderbird 2.0.0.24 (Windows/20100228)

>From d8ddf3259dd99b42d3adbc1afce0f7860ae9a232 Mon Sep 17 00:00:00 2001
From: dmh <address@hidden>
Date: Thu, 5 Sep 2013 11:39:51 -0600
Subject: [PATCH] Python: Automatic indenting.

Modify previous python patches as follows:

* data/lalr1.py:  Automatically indent each line of code
                  for %code lexer {} and for actions in
                  grammar rules.
* data/python.m4: ditto
* doc/bison.texi: document automatic indentation
* src/getargs.c: revert to allow synclines.
* src/parse-gram.y: revert to allow synclines.
                    They are now inserted as proper
                    python comments.
* tests/python.at: modify to test out automatic indentation
---
 data/lalr1.py    |   2 +-
 data/python.m4   |  35 +++++++++------
 doc/bison.texi   |  29 +++++++------
 gnulib           |   2 +-
 src/getargs.c    |   4 --
 src/parse-gram.c |   4 +-
 src/parse-gram.h |   2 +-
 src/parse-gram.y |   7 +--
 tests/python.at  | 128 +++++++++++++++++++++++++++++++------------------------
 9 files changed, 119 insertions(+), 94 deletions(-)

diff --git a/data/lalr1.py b/data/lalr1.py
index 0aca5f3..09fb4fb 100644
--- a/data/lalr1.py
+++ b/data/lalr1.py
@@ -378,7 +378,7 @@ class Lexer :
 # If the user specifies %code lexer ...
 # Then insert it here
 ]b4_lex_param_if([[class YYLexer (Lexer) :
-]b4_percent_code_get([[lexer]])[
+]b4_python_indent([b4_percent_code_get([[lexer]])],2)[
 ]])[
 
 
diff --git a/data/python.m4 b/data/python.m4
index 09ac52d..518dd05 100644
--- a/data/python.m4
+++ b/data/python.m4
@@ -229,6 +229,21 @@ m4_define([b4_typed_parser_table_define],
 # left in. For python, remove these.
 m4_define([b4_debrace],[m4_translit($1,[{}])])
 
+# b4_python_indent([TEXT],[N])
+#-------------------------
+# Prefix each line in text by N blanks (0<N<=8).
+# Used to make sure %code blocks have necessary
+# minimum indentation.
+
+m4_define([b4_python_indent],
+[b4_python_nblanks([$2])m4_bpatsubst([$1],[
+],[
+b4_python_nblanks([$2])])])
+
+# b4_python_nblanks([N])
+#-------------------------
+# Helper for b4_python_indent
+m4_define([b4_python_nblanks],[m4_substr([         ],1,$1)])
 
 # b4_integral_parser_table_define(NAME, DATA, COMMENT)
 #-----------------------------------------------------
@@ -237,19 +252,15 @@ m4_define([b4_integral_parser_table_define],
 
 # b4-case(ID, CODE)
 # -----------------
-m4_define([b4_case], [    elif yyn == [$1] :
-m4_bpatsubst(m4_bpatsubst(m4_bpatsubst($2,
-                                        [^\([ \\t\\n]*\){],[\1]),
-                           [}[ \\t\\n]*$]),
-              [^],[      ])
-])
+# This is complicated for python because
+# each line of the body of the action 
+# must be indented more than the 4 blanks
+# that precede the elif line.
+# Before that, however, any braces must be
+# elided.
 
-m4_define([b4_casex], [    elif yyn == [$1] :
-      m4_bpatsubst(
-        m4_bpatsubst(
-          [}[ \\t\\n]*$]),
-      [\\n],[\\n          ])
-])
+m4_define([b4_case], [    elif yyn == [$1] :
+b4_python_indent(b4_debrace($2),6)])
 
 ## ------------------------- ##
 ## Assigning token numbers.  ##
diff --git a/doc/bison.texi b/doc/bison.texi
index 67db576..1932c48 100644
--- a/doc/bison.texi
+++ b/doc/bison.texi
@@ -12226,11 +12226,14 @@ of those notions.
 
 The most important difference is that line
 indentation has syntactic and semantic significance.
-This means a grammar file writer must be very
-careful about the indentation of inserted code.
-In practice, the writer may have to iterate on the
-indentation by looking at the bison produced python
-file in order to get it right.
+Effort has been expended to ensure that this is not
+a significant problem: if a piece of code is written
+and is legitimate python, then is should work correctly.
+It is possible, however, that something could be
+incorrectly indented. If indentation errors occur
+when compiling the parser, the grammar writer
+must be prepared to examine the generated python
+code and adjust the indentation in the bison .y file.
 
 There are, however, some heuristics that can help.
 @itemize
@@ -12238,8 +12241,8 @@ There are, however, some heuristics that can help.
 @item Assume that the code inside a @code{%code} or @code{%code qualifier}
 is at indent level zero (0).
 @item Single line actions (e.g. @address@hidden@}}) should cause no problems.
address@hidden Multiple line actions should assume an indentation level greater
-than four (4).
address@hidden Multiple line actions can be indented as desired, but note
+that every line of the action will automatically  be indented by 6 blanks.
 @end itemize
 
 In python code, line breaks can also have syntactic and semantic
@@ -12344,12 +12347,14 @@ which means that the types you specify can be 
anything.
 @comment implementation of Bison, and may change in future releases.
 
 Python parsers do not support @code{%destructor}. This may eventually
-change to use the @code{del} statement, but until then, and as with any other
-python program, the programmer has to be careful to avoid circular references.
+change to use the python @code{del} statement, but until then, and as
+with any other python program, the programmer has to be careful to
+avoid circular references.
 
 In addition, python parsers do not support @code{%printer}, as
address@hidden()} can be used to print the semantic values.  This
-however may change (in a backwards-compatible way) in future versions.
address@hidden()} or the @code{str()} builtin function can be used to
+print the semantic values.  This however may change (in a
+backwards-compatible way) in future versions.
 
 @node Python Location Values
 @subsection Python Location Values
@@ -12457,7 +12462,7 @@ available with @samp{%define parse.error verbose}, 
which also turns on
 verbose error messages.
 @end deftypemethod
 
address@hidden {YYParser} {void} yyerror (@var{msg} address@hidden,location}])
address@hidden {YYParser} {void} yyerror (@var{msg} [,@var{location}])
 Print an error message using the @code{yyerror} method defined
 in the Lexer instance given to the parser.
 The second argument is only defined if the @code{%locations}
diff --git a/gnulib b/gnulib
index 03e96cc..2521ded 160000
--- a/gnulib
+++ b/gnulib
@@ -1 +1 @@
-Subproject commit 03e96cc338b5237e15fce73e9423526969ee768a
+Subproject commit 2521dedb696298e87e8763a71613813d3fe9cd9f
diff --git a/src/getargs.c b/src/getargs.c
index 26e69da..b11b695 100644
--- a/src/getargs.c
+++ b/src/getargs.c
@@ -728,10 +728,6 @@ getargs (int argc, char *argv[])
       usage (EXIT_FAILURE);
     }
 
-  /* Python requires --no-lines */
-  if (c_strcasecmp ("python", language->language) == 0)
-    no_lines_flag = true;
-
   current_file = grammar_file = uniqstr_new (argv[optind]);
   MUSCLE_INSERT_C_STRING ("file_name", grammar_file);
 }
diff --git a/src/parse-gram.c b/src/parse-gram.c
index b48ed6f..3a34c16 100644
--- a/src/parse-gram.c
+++ b/src/parse-gram.c
@@ -1,4 +1,4 @@
-/* A Bison parser, made by GNU Bison 3.0.  */
+/* A Bison parser, made by GNU Bison 3.0.4-a728-dirty.  */
 
 /* Bison implementation for Yacc-like parsers in C
 
@@ -44,7 +44,7 @@
 #define YYBISON 1
 
 /* Bison version.  */
-#define YYBISON_VERSION "3.0"
+#define YYBISON_VERSION "3.0.4-a728-dirty"
 
 /* Skeleton name.  */
 #define YYSKELETON_NAME "yacc.c"
diff --git a/src/parse-gram.h b/src/parse-gram.h
index 0935896..364ed6d 100644
--- a/src/parse-gram.h
+++ b/src/parse-gram.h
@@ -1,4 +1,4 @@
-/* A Bison parser, made by GNU Bison 3.0.  */
+/* A Bison parser, made by GNU Bison 3.0.4-a728-dirty.  */
 
 /* Bison interface for Yacc-like parsers in C
 
diff --git a/src/parse-gram.y b/src/parse-gram.y
index e29331e..1ec4b4d 100644
--- a/src/parse-gram.y
+++ b/src/parse-gram.y
@@ -34,7 +34,6 @@
   #include "system.h"
 
   #include "c-ctype.h"
-  #include "c-strcase.h"
   #include "complain.h"
   #include "conflicts.h"
   #include "files.h"
@@ -317,11 +316,7 @@ prologue_declaration:
       muscle_code_grow ("initial_action", translate_code ($2, @2, false), @2);
       code_scanner_last_string_free ();
     }
-| "%language" STRING            { language_argmatch ($2, grammar_prio, @1);
-                                  /* Python requires --no-lines */
-                                  if (c_strcasecmp ("python", 
language->language) == 0)
-                                    no_lines_flag = true;
-                                }
+| "%language" STRING            { language_argmatch ($2, grammar_prio, @1); }
 | "%name-prefix" STRING         { spec_name_prefix = $2; }
 | "%no-lines"                   { no_lines_flag = true; }
 | "%nondeterministic-parser"    { nondeterministic_parser = true; }
diff --git a/tests/python.at b/tests/python.at
index 0949aaf..8b33ab6 100644
--- a/tests/python.at
+++ b/tests/python.at
@@ -25,6 +25,24 @@
 # General Support Utilities
 ##################################################
 
+
+# AT_PYTHON_INDENT([TEXT],[N])
+#-------------------------
+# Prefix each line in text by N blanks (0<N<=8).
+# Used to make sure %code blocks have necessary
+# minimum indentation.
+
+m4_define([AT_PYTHON_INDENT],
+[AT_PYTHON_NBLANKS([$2])m4_bpatsubst([$1],[
+],[
+]AT_PYTHON_NBLANKS([$2])[])])
+
+# AT_PYTHON_NBLANKS([N])
+#-------------------------
+# Helper for AT_PYTHON_INDENT
+m4_define([AT_PYTHON_NBLANKS],[m4_substr([         ],1,$1)])
+
+
 # AT_NORMALIZE([STRING])
 # Clean up string
 # -------------------------------------
@@ -167,11 +185,11 @@ line:
 exp:
   NUM                { [$]$ = [$]1}
 | exp '=' exp
-  {
-    if ([$]1 != [$]3) :
-      yyerror (AT_LOCATION_IF(address@hidden, ]])
-               "calc: error: " + str([$]1) + " != " + str([$]3));
-  }
+{
+if ([$]1 != [$]3) :
+  yyerror (AT_LOCATION_IF(address@hidden, ]])
+           "calc: error: " + str([$]1) + " != " + str([$]3));
+}
 | exp '+' exp
     { [$]$ = [$]1 + [$]3 }
 | exp '-' exp
@@ -192,10 +210,10 @@ exp:
        return YYERROR
        }
 | '-' error
-      {
-      [$]$ = (0)
-      return YYERROR
-      }
+    {
+        [$]$ = (0)
+        return YYERROR
+    }
 ;
 ])
 
@@ -257,13 +275,13 @@ def tokenizer(text) :
       number = c
       while True :
         index += 1
-        row += 1      
+        row += 1
         c = text[index]
         if string.find("0123456789",c) < 0 : break;
         number += c
       # end while
       index -= 1 # backup
-      row -= 1   
+      row -= 1
       tokens.append((token.NUMBER,number,(saveline,saverow),(line,row)))
     else :
       tokens.append((token.NAME,c,(line,row),(line,row+1)))
@@ -277,47 +295,47 @@ def tokenizer(text) :
 
 # WARNING: watch the indentation
 m4_define([AT_LEXER_BODY],[[
-  def __init__ (self) :
+def __init__ (self) :
 ]AT_LOCATION_IF([[
-    self.yypos = Position (1, 0)
+  self.yypos = Position (1, 0)
 ]])[
-    self.tokens = tokenizer(sys.stdin.read())
-    self.ntokens = len(self.tokens)
-    self.index = 0
-
-  def yyerror (self, ]AT_LOCATION_IF([[location, ]])[msg) :
-    s = msg
-    ]AT_LOCATION_IF([[
-    if location is not None :
-      s = str(location) + ": " + s]])[
-    sys.stderr.write(s+'\n')
-
-  def yylex (self) :
-    while (True) :
-      if (self.index >= self.ntokens) :
-        return (EOF, None)
-      type, text, start, end = self.tokens[self.index]
-      self.index += 1
-      ]AT_LOCATION_IF([[self.yypos = Position (start[0], start[1]+1)]])[
-      if type == token.NEWLINE :
-        return (ord('\n'), None)
-      elif type == token.NUMBER :
-        return (NUM, int(text))
-      elif type == token.OP :
-        return (ord(text[0]), None)
-      elif type == token.NAME : # Return the first character
-        return (ord(text[0]), None)          
-      elif type == token.ENDMARKER : # EOF
-        return (EOF,None)
-      else :
-       pass        
-  # end yylex
+  self.tokens = tokenizer(sys.stdin.read())
+  self.ntokens = len(self.tokens)
+  self.index = 0
+
+def yyerror (self, ]AT_LOCATION_IF([[location, ]])[msg) :
+  s = msg
+  ]AT_LOCATION_IF([[
+  if location is not None :
+    s = str(location) + ": " + s]])[
+  sys.stderr.write(s+'\n')
+
+def yylex (self) :
+  while (True) :
+    if (self.index >= self.ntokens) :
+      return (EOF, None)
+    type, text, start, end = self.tokens[[self.index]]
+    self.index += 1
+    ]AT_LOCATION_IF([[self.yypos = Position (start[[0]], start[[1]]+1)]])[
+    if type == token.NEWLINE :
+      return (ord('\n'), None)
+    elif type == token.NUMBER :
+      return (NUM, int(text))
+    elif type == token.OP :
+      return (ord(text[[0]]), None)
+    elif type == token.NAME : # Return the first character
+      return (ord(text[[0]]), None)
+    elif type == token.ENDMARKER : # EOF
+      return (EOF,None)
+    else :
+      pass
+# end yylex
 ]AT_LOCATION_IF([[
-  def getStartPos(self) :
-    return self.yypos
+def getStartPos(self) :
+  return self.yypos
 
-  def getEndPos(self) :
-     return self.yypos
+def getEndPos(self) :
+   return self.yypos
 ]])[
 ]])
 
@@ -406,7 +424,7 @@ AT_DATA([Calc.y],[[
 %code {
 ]AT_POSITION_CLASS[
 class CalcLexer (Lexer) :
-]AT_LEXER_BODY[
+]AT_PYTHON_INDENT([]AT_LEXER_BODY[],2)[
 }
 ]AT_PYTHON_CALC_BODY[
 %%
@@ -541,7 +559,7 @@ m4_define([AT_CALC_TEST],[[
 %code {
 ]AT_POSITION_CLASS[
 class CalcLexer (Lexer) :
-]AT_LEXER_BODY[
+]AT_PYTHON_INDENT(AT_LEXER_BODY,2)[
 }
 ]AT_PYTHON_CALC_BODY[
 %%
@@ -865,7 +883,7 @@ file2: FILE {$$=$1}
 ]])
 # This bison call should fail
 AT_BISON_CHECK([-o YYParser.py YYParser.y],[0],[stdout],[stderr])
-# This test must immediately follow the AT_BISON_CHECK test 
+# This test must immediately follow the AT_BISON_CHECK test
 AT_CHECK_PYTHON_GREP([stderr],
     [warning: type clash on default action: <object> != <File>])
 
@@ -1207,10 +1225,10 @@ AT_DATA([Calc.y],[[/* Infix notation calculator--calc */
 
 %%
 
-]AT_TOKENIZER[  
+]AT_TOKENIZER[
 
 class UserLexer(Lexer) :
-]AT_LEXER_BODY[
+]AT_PYTHON_INDENT([]AT_LEXER_BODY[],2)[
 
 def main() :
     lexer = UserLexer()
@@ -1450,10 +1468,10 @@ import string
 ]AT_PYTHON_CALC_BODY[
 
 %%
-]AT_TOKENIZER[  
+]AT_TOKENIZER[
 
 class YYLexer(Lexer) :
-]AT_LEXER_BODY[
+]AT_PYTHON_INDENT([]AT_LEXER_BODY[],2)[
 
 ]AT_POSITION_CLASS[
 
-- 
1.8.4.rc0.1.g8f6a3e5



reply via email to

[Prev in Thread] Current Thread [Next in Thread]