[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Python Support: Patch to add automatic indenting
From: |
Dennis Heimbigner |
Subject: |
Python Support: Patch to add automatic indenting |
Date: |
Thu, 05 Sep 2013 11:46:04 -0600 |
User-agent: |
Thunderbird 2.0.0.24 (Windows/20100228) |
>From d8ddf3259dd99b42d3adbc1afce0f7860ae9a232 Mon Sep 17 00:00:00 2001
From: dmh <address@hidden>
Date: Thu, 5 Sep 2013 11:39:51 -0600
Subject: [PATCH] Python: Automatic indenting.
Modify previous python patches as follows:
* data/lalr1.py: Automatically indent each line of code
for %code lexer {} and for actions in
grammar rules.
* data/python.m4: ditto
* doc/bison.texi: document automatic indentation
* src/getargs.c: revert to allow synclines.
* src/parse-gram.y: revert to allow synclines.
They are now inserted as proper
python comments.
* tests/python.at: modify to test out automatic indentation
---
data/lalr1.py | 2 +-
data/python.m4 | 35 +++++++++------
doc/bison.texi | 29 +++++++------
gnulib | 2 +-
src/getargs.c | 4 --
src/parse-gram.c | 4 +-
src/parse-gram.h | 2 +-
src/parse-gram.y | 7 +--
tests/python.at | 128 +++++++++++++++++++++++++++++++------------------------
9 files changed, 119 insertions(+), 94 deletions(-)
diff --git a/data/lalr1.py b/data/lalr1.py
index 0aca5f3..09fb4fb 100644
--- a/data/lalr1.py
+++ b/data/lalr1.py
@@ -378,7 +378,7 @@ class Lexer :
# If the user specifies %code lexer ...
# Then insert it here
]b4_lex_param_if([[class YYLexer (Lexer) :
-]b4_percent_code_get([[lexer]])[
+]b4_python_indent([b4_percent_code_get([[lexer]])],2)[
]])[
diff --git a/data/python.m4 b/data/python.m4
index 09ac52d..518dd05 100644
--- a/data/python.m4
+++ b/data/python.m4
@@ -229,6 +229,21 @@ m4_define([b4_typed_parser_table_define],
# left in. For python, remove these.
m4_define([b4_debrace],[m4_translit($1,[{}])])
+# b4_python_indent([TEXT],[N])
+#-------------------------
+# Prefix each line in text by N blanks (0<N<=8).
+# Used to make sure %code blocks have necessary
+# minimum indentation.
+
+m4_define([b4_python_indent],
+[b4_python_nblanks([$2])m4_bpatsubst([$1],[
+],[
+b4_python_nblanks([$2])])])
+
+# b4_python_nblanks([N])
+#-------------------------
+# Helper for b4_python_indent
+m4_define([b4_python_nblanks],[m4_substr([ ],1,$1)])
# b4_integral_parser_table_define(NAME, DATA, COMMENT)
#-----------------------------------------------------
@@ -237,19 +252,15 @@ m4_define([b4_integral_parser_table_define],
# b4-case(ID, CODE)
# -----------------
-m4_define([b4_case], [ elif yyn == [$1] :
-m4_bpatsubst(m4_bpatsubst(m4_bpatsubst($2,
- [^\([ \\t\\n]*\){],[\1]),
- [}[ \\t\\n]*$]),
- [^],[ ])
-])
+# This is complicated for python because
+# each line of the body of the action
+# must be indented more than the 4 blanks
+# that precede the elif line.
+# Before that, however, any braces must be
+# elided.
-m4_define([b4_casex], [ elif yyn == [$1] :
- m4_bpatsubst(
- m4_bpatsubst(
- [}[ \\t\\n]*$]),
- [\\n],[\\n ])
-])
+m4_define([b4_case], [ elif yyn == [$1] :
+b4_python_indent(b4_debrace($2),6)])
## ------------------------- ##
## Assigning token numbers. ##
diff --git a/doc/bison.texi b/doc/bison.texi
index 67db576..1932c48 100644
--- a/doc/bison.texi
+++ b/doc/bison.texi
@@ -12226,11 +12226,14 @@ of those notions.
The most important difference is that line
indentation has syntactic and semantic significance.
-This means a grammar file writer must be very
-careful about the indentation of inserted code.
-In practice, the writer may have to iterate on the
-indentation by looking at the bison produced python
-file in order to get it right.
+Effort has been expended to ensure that this is not
+a significant problem: if a piece of code is written
+and is legitimate python, then is should work correctly.
+It is possible, however, that something could be
+incorrectly indented. If indentation errors occur
+when compiling the parser, the grammar writer
+must be prepared to examine the generated python
+code and adjust the indentation in the bison .y file.
There are, however, some heuristics that can help.
@itemize
@@ -12238,8 +12241,8 @@ There are, however, some heuristics that can help.
@item Assume that the code inside a @code{%code} or @code{%code qualifier}
is at indent level zero (0).
@item Single line actions (e.g. @address@hidden@}}) should cause no problems.
address@hidden Multiple line actions should assume an indentation level greater
-than four (4).
address@hidden Multiple line actions can be indented as desired, but note
+that every line of the action will automatically be indented by 6 blanks.
@end itemize
In python code, line breaks can also have syntactic and semantic
@@ -12344,12 +12347,14 @@ which means that the types you specify can be
anything.
@comment implementation of Bison, and may change in future releases.
Python parsers do not support @code{%destructor}. This may eventually
-change to use the @code{del} statement, but until then, and as with any other
-python program, the programmer has to be careful to avoid circular references.
+change to use the python @code{del} statement, but until then, and as
+with any other python program, the programmer has to be careful to
+avoid circular references.
In addition, python parsers do not support @code{%printer}, as
address@hidden()} can be used to print the semantic values. This
-however may change (in a backwards-compatible way) in future versions.
address@hidden()} or the @code{str()} builtin function can be used to
+print the semantic values. This however may change (in a
+backwards-compatible way) in future versions.
@node Python Location Values
@subsection Python Location Values
@@ -12457,7 +12462,7 @@ available with @samp{%define parse.error verbose},
which also turns on
verbose error messages.
@end deftypemethod
address@hidden {YYParser} {void} yyerror (@var{msg} address@hidden,location}])
address@hidden {YYParser} {void} yyerror (@var{msg} [,@var{location}])
Print an error message using the @code{yyerror} method defined
in the Lexer instance given to the parser.
The second argument is only defined if the @code{%locations}
diff --git a/gnulib b/gnulib
index 03e96cc..2521ded 160000
--- a/gnulib
+++ b/gnulib
@@ -1 +1 @@
-Subproject commit 03e96cc338b5237e15fce73e9423526969ee768a
+Subproject commit 2521dedb696298e87e8763a71613813d3fe9cd9f
diff --git a/src/getargs.c b/src/getargs.c
index 26e69da..b11b695 100644
--- a/src/getargs.c
+++ b/src/getargs.c
@@ -728,10 +728,6 @@ getargs (int argc, char *argv[])
usage (EXIT_FAILURE);
}
- /* Python requires --no-lines */
- if (c_strcasecmp ("python", language->language) == 0)
- no_lines_flag = true;
-
current_file = grammar_file = uniqstr_new (argv[optind]);
MUSCLE_INSERT_C_STRING ("file_name", grammar_file);
}
diff --git a/src/parse-gram.c b/src/parse-gram.c
index b48ed6f..3a34c16 100644
--- a/src/parse-gram.c
+++ b/src/parse-gram.c
@@ -1,4 +1,4 @@
-/* A Bison parser, made by GNU Bison 3.0. */
+/* A Bison parser, made by GNU Bison 3.0.4-a728-dirty. */
/* Bison implementation for Yacc-like parsers in C
@@ -44,7 +44,7 @@
#define YYBISON 1
/* Bison version. */
-#define YYBISON_VERSION "3.0"
+#define YYBISON_VERSION "3.0.4-a728-dirty"
/* Skeleton name. */
#define YYSKELETON_NAME "yacc.c"
diff --git a/src/parse-gram.h b/src/parse-gram.h
index 0935896..364ed6d 100644
--- a/src/parse-gram.h
+++ b/src/parse-gram.h
@@ -1,4 +1,4 @@
-/* A Bison parser, made by GNU Bison 3.0. */
+/* A Bison parser, made by GNU Bison 3.0.4-a728-dirty. */
/* Bison interface for Yacc-like parsers in C
diff --git a/src/parse-gram.y b/src/parse-gram.y
index e29331e..1ec4b4d 100644
--- a/src/parse-gram.y
+++ b/src/parse-gram.y
@@ -34,7 +34,6 @@
#include "system.h"
#include "c-ctype.h"
- #include "c-strcase.h"
#include "complain.h"
#include "conflicts.h"
#include "files.h"
@@ -317,11 +316,7 @@ prologue_declaration:
muscle_code_grow ("initial_action", translate_code ($2, @2, false), @2);
code_scanner_last_string_free ();
}
-| "%language" STRING { language_argmatch ($2, grammar_prio, @1);
- /* Python requires --no-lines */
- if (c_strcasecmp ("python",
language->language) == 0)
- no_lines_flag = true;
- }
+| "%language" STRING { language_argmatch ($2, grammar_prio, @1); }
| "%name-prefix" STRING { spec_name_prefix = $2; }
| "%no-lines" { no_lines_flag = true; }
| "%nondeterministic-parser" { nondeterministic_parser = true; }
diff --git a/tests/python.at b/tests/python.at
index 0949aaf..8b33ab6 100644
--- a/tests/python.at
+++ b/tests/python.at
@@ -25,6 +25,24 @@
# General Support Utilities
##################################################
+
+# AT_PYTHON_INDENT([TEXT],[N])
+#-------------------------
+# Prefix each line in text by N blanks (0<N<=8).
+# Used to make sure %code blocks have necessary
+# minimum indentation.
+
+m4_define([AT_PYTHON_INDENT],
+[AT_PYTHON_NBLANKS([$2])m4_bpatsubst([$1],[
+],[
+]AT_PYTHON_NBLANKS([$2])[])])
+
+# AT_PYTHON_NBLANKS([N])
+#-------------------------
+# Helper for AT_PYTHON_INDENT
+m4_define([AT_PYTHON_NBLANKS],[m4_substr([ ],1,$1)])
+
+
# AT_NORMALIZE([STRING])
# Clean up string
# -------------------------------------
@@ -167,11 +185,11 @@ line:
exp:
NUM { [$]$ = [$]1}
| exp '=' exp
- {
- if ([$]1 != [$]3) :
- yyerror (AT_LOCATION_IF(address@hidden, ]])
- "calc: error: " + str([$]1) + " != " + str([$]3));
- }
+{
+if ([$]1 != [$]3) :
+ yyerror (AT_LOCATION_IF(address@hidden, ]])
+ "calc: error: " + str([$]1) + " != " + str([$]3));
+}
| exp '+' exp
{ [$]$ = [$]1 + [$]3 }
| exp '-' exp
@@ -192,10 +210,10 @@ exp:
return YYERROR
}
| '-' error
- {
- [$]$ = (0)
- return YYERROR
- }
+ {
+ [$]$ = (0)
+ return YYERROR
+ }
;
])
@@ -257,13 +275,13 @@ def tokenizer(text) :
number = c
while True :
index += 1
- row += 1
+ row += 1
c = text[index]
if string.find("0123456789",c) < 0 : break;
number += c
# end while
index -= 1 # backup
- row -= 1
+ row -= 1
tokens.append((token.NUMBER,number,(saveline,saverow),(line,row)))
else :
tokens.append((token.NAME,c,(line,row),(line,row+1)))
@@ -277,47 +295,47 @@ def tokenizer(text) :
# WARNING: watch the indentation
m4_define([AT_LEXER_BODY],[[
- def __init__ (self) :
+def __init__ (self) :
]AT_LOCATION_IF([[
- self.yypos = Position (1, 0)
+ self.yypos = Position (1, 0)
]])[
- self.tokens = tokenizer(sys.stdin.read())
- self.ntokens = len(self.tokens)
- self.index = 0
-
- def yyerror (self, ]AT_LOCATION_IF([[location, ]])[msg) :
- s = msg
- ]AT_LOCATION_IF([[
- if location is not None :
- s = str(location) + ": " + s]])[
- sys.stderr.write(s+'\n')
-
- def yylex (self) :
- while (True) :
- if (self.index >= self.ntokens) :
- return (EOF, None)
- type, text, start, end = self.tokens[self.index]
- self.index += 1
- ]AT_LOCATION_IF([[self.yypos = Position (start[0], start[1]+1)]])[
- if type == token.NEWLINE :
- return (ord('\n'), None)
- elif type == token.NUMBER :
- return (NUM, int(text))
- elif type == token.OP :
- return (ord(text[0]), None)
- elif type == token.NAME : # Return the first character
- return (ord(text[0]), None)
- elif type == token.ENDMARKER : # EOF
- return (EOF,None)
- else :
- pass
- # end yylex
+ self.tokens = tokenizer(sys.stdin.read())
+ self.ntokens = len(self.tokens)
+ self.index = 0
+
+def yyerror (self, ]AT_LOCATION_IF([[location, ]])[msg) :
+ s = msg
+ ]AT_LOCATION_IF([[
+ if location is not None :
+ s = str(location) + ": " + s]])[
+ sys.stderr.write(s+'\n')
+
+def yylex (self) :
+ while (True) :
+ if (self.index >= self.ntokens) :
+ return (EOF, None)
+ type, text, start, end = self.tokens[[self.index]]
+ self.index += 1
+ ]AT_LOCATION_IF([[self.yypos = Position (start[[0]], start[[1]]+1)]])[
+ if type == token.NEWLINE :
+ return (ord('\n'), None)
+ elif type == token.NUMBER :
+ return (NUM, int(text))
+ elif type == token.OP :
+ return (ord(text[[0]]), None)
+ elif type == token.NAME : # Return the first character
+ return (ord(text[[0]]), None)
+ elif type == token.ENDMARKER : # EOF
+ return (EOF,None)
+ else :
+ pass
+# end yylex
]AT_LOCATION_IF([[
- def getStartPos(self) :
- return self.yypos
+def getStartPos(self) :
+ return self.yypos
- def getEndPos(self) :
- return self.yypos
+def getEndPos(self) :
+ return self.yypos
]])[
]])
@@ -406,7 +424,7 @@ AT_DATA([Calc.y],[[
%code {
]AT_POSITION_CLASS[
class CalcLexer (Lexer) :
-]AT_LEXER_BODY[
+]AT_PYTHON_INDENT([]AT_LEXER_BODY[],2)[
}
]AT_PYTHON_CALC_BODY[
%%
@@ -541,7 +559,7 @@ m4_define([AT_CALC_TEST],[[
%code {
]AT_POSITION_CLASS[
class CalcLexer (Lexer) :
-]AT_LEXER_BODY[
+]AT_PYTHON_INDENT(AT_LEXER_BODY,2)[
}
]AT_PYTHON_CALC_BODY[
%%
@@ -865,7 +883,7 @@ file2: FILE {$$=$1}
]])
# This bison call should fail
AT_BISON_CHECK([-o YYParser.py YYParser.y],[0],[stdout],[stderr])
-# This test must immediately follow the AT_BISON_CHECK test
+# This test must immediately follow the AT_BISON_CHECK test
AT_CHECK_PYTHON_GREP([stderr],
[warning: type clash on default action: <object> != <File>])
@@ -1207,10 +1225,10 @@ AT_DATA([Calc.y],[[/* Infix notation calculator--calc */
%%
-]AT_TOKENIZER[
+]AT_TOKENIZER[
class UserLexer(Lexer) :
-]AT_LEXER_BODY[
+]AT_PYTHON_INDENT([]AT_LEXER_BODY[],2)[
def main() :
lexer = UserLexer()
@@ -1450,10 +1468,10 @@ import string
]AT_PYTHON_CALC_BODY[
%%
-]AT_TOKENIZER[
+]AT_TOKENIZER[
class YYLexer(Lexer) :
-]AT_LEXER_BODY[
+]AT_PYTHON_INDENT([]AT_LEXER_BODY[],2)[
]AT_POSITION_CLASS[
--
1.8.4.rc0.1.g8f6a3e5
- Python Support: Patch to add automatic indenting,
Dennis Heimbigner <=