[Top][All Lists]

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: bison 2.0a token quoting change

From: Paul Eggert
Subject: Re: bison 2.0a token quoting change
Date: Thu, 21 Jul 2005 14:04:37 -0700
User-agent: Gnus/5.1007 (Gnus v5.10.7) Emacs/21.4 (gnu/linux)

In <http://lists.gnu.org/archive/html/bug-bison/2005-07/msg00043.html>,
Anthony Heading <address@hidden> writes:

> yytname is unchanged, but yytstring is an new array of size NTOKENS,
> containing the literal token string if one exists, or NULL if it
> doesn't.

I thought about this for a while, and looked at the documentation
(which doesn't match what the code does), and decided to revert the
change to yytname instead.  That way, yytname will be unambiguous and
code that relies on the Bison 2.0-and-earlier convention will continue
to work.

To solve the problem mentioned in
and originally fixed in the 2005-04-17 patch
I modified the skeletons to dequote the yytname entries if needed.
This undoes some (but not all) of the 2005-04-17 patch, but I hope
it accomplishes what Jan Nieuwenhuizen was asking for.

I installed this:

2005-07-21  Paul Eggert  <address@hidden>

        * data/glr.c (yytnamerr): New function.
        (yyreportSyntaxError): Use it to dequote most string literals.
        * data/lalr1.c (yytname_): Renamed from yyname_, for compatibility
        with other skeletons.  All uses changed.
        (yytnameerr_): New function.
        (yyreport_syntax_error): Use it to dequote most string literals.
        * data/yacc.c (yytnamerr): New function.
        (yyerrlab): Use it to decode most string literals.
        * doc/bison.texinfo (Decl Summary, Calling Convention):
        Clarify quoting convention of yytname.
        * src/output.c (prepare_symbols): Quote all names.  This undoes
        the 2005-04-17 change, which is now accomplished (mostly) via
        changes in the parsers as described above.
        * tests/regression.at (Token definitions, Web2c Actions):
        Undo most 2005-04-17 change here, too.

Index: data/glr.c
RCS file: /cvsroot/bison/bison/data/glr.c,v
retrieving revision 1.106
diff -p -u -r1.106 glr.c
--- data/glr.c  20 Jul 2005 21:43:32 -0000      1.106
+++ data/glr.c  21 Jul 2005 20:50:01 -0000
@@ -332,7 +332,7 @@ static const ]b4_int_type_for([b4_rline]
-/* YYTNME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM.
+/* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM.
    First, the terminals, then, starting at YYNTOKENS, nonterminals. */
 static const char *const yytname[] =
@@ -600,6 +600,54 @@ yystpcpy (char *yydest, const char *yysr
 #  endif
 # endif
+# ifndef yytnamerr
+/* Copy to YYRES the contents of YYSTR after stripping away unnecessary
+   quotes and backslashes, so that it's suitable for yyerror.  The
+   heuristic is that double-quoting is unnecessary unless the string
+   contains an apostrophe, a comma, or backslash (other than
+   backslash-backslash).  YYSTR is taken from yytname.  If YYRES is
+   null, do not copy; instead, return the length of what the result
+   would have been.  */
+static size_t
+yytnamerr (char *yyres, const char *yystr)
+  if (*yystr == '"')
+    {
+      size_t yyn = 0;
+      char const *yyp = yystr;
+      for (;;)
+       switch (*++yyp)
+         {
+         case '\'':
+         case ',':
+           goto do_not_strip_quotes;
+         case '\\':
+           if (*++yyp != '\\')
+             goto do_not_strip_quotes;
+           /* Fall through.  */
+         default:
+           if (yyres)
+             yyres[yyn] = *yyp;
+           yyn++;
+           break;
+         case '"':
+           if (yyres)
+             yyres[yyn] = '\0';
+           return yyn;
+         }
+    do_not_strip_quotes: ;
+    }
+  if (! yyres)
+    return strlen (yystr);
+  return yystpcpy (yyres, yystr) - yyres;
+# endif
 #endif /* !YYERROR_VERBOSE */
 /** State numbers, as in LALR(1) machine */
@@ -1736,7 +1784,7 @@ yyreportSyntaxError (yyGLRStack* yystack
       yyn = yypact[yystack->yytops.yystates[0]->yylrState];
       if (YYPACT_NINF < yyn && yyn < YYLAST)
-         size_t yysize0 = strlen (yytokenName (*yytokenp));
+         size_t yysize0 = yytnamerr (NULL, yytokenName (*yytokenp));
          size_t yysize = yysize0;
          size_t yysize1;
          yybool yysize_overflow = yyfalse;
@@ -1778,7 +1826,7 @@ yyreportSyntaxError (yyGLRStack* yystack
                yyarg[yycount++] = yytokenName (yyx);
-               yysize1 = yysize + strlen (yytokenName (yyx));
+               yysize1 = yysize + yytnamerr (NULL, yytokenName (yyx));
                yysize_overflow |= yysize1 < yysize;
                yysize = yysize1;
                yyfmt = yystpcpy (yyfmt, yyprefix);
@@ -1801,7 +1849,7 @@ yyreportSyntaxError (yyGLRStack* yystack
                  if (*yyp == '%' && yyf[1] == 's' && yyi < yycount)
-                     yyp = yystpcpy (yyp, yyarg[yyi++]);
+                     yyp += yytnamerr (yyp, yyarg[yyi++]);
                      yyf += 2;
Index: data/lalr1.cc
RCS file: /cvsroot/bison/bison/data/lalr1.cc,v
retrieving revision 1.89
diff -p -u -r1.89 lalr1.cc
--- data/lalr1.cc       19 Jul 2005 06:56:43 -0000      1.89
+++ data/lalr1.cc       21 Jul 2005 20:50:01 -0000
@@ -323,7 +323,12 @@ namespace yy
     /// For a symbol, its name in clear.
-    static const char* const yyname_[];
+    static const char* const yytname_[];
+    /// Convert the symbol name \a n to a form suitable for a diagnostic.
+    virtual std::string yytnamerr_ (const char *n);
@@ -471,6 +476,47 @@ do {                                       \
 #define YYABORT                goto yyabortlab
 #define YYERROR                goto yyerrorlab
+/* Return YYSTR after stripping away unnecessary quotes and
+   backslashes, so that it's suitable for yyerror.  The heuristic is
+   that double-quoting is unnecessary unless the string contains an
+   apostrophe, a comma, or backslash (other than backslash-backslash).
+   YYSTR is taken from yytname.  */
+yy::]b4_parser_class_name[::yytnamerr_ (const char *yystr)
+  if (*yystr == '"')
+    {
+      std::string yyr = "";
+      char const *yyp = yystr;
+      for (;;)
+       switch (*++yyp)
+         {
+         case '\'':
+         case ',':
+           goto do_not_strip_quotes;
+         case '\\':
+           if (*++yyp != '\\')
+             goto do_not_strip_quotes;
+           /* Fall through.  */
+         default:
+           yyr += *yyp;
+           break;
+         case '"':
+           return yyr;
+         }
+    do_not_strip_quotes: ;
+    }
+  return yystr;
 | Print this symbol on YYOUTPUT.  |
@@ -488,7 +534,7 @@ yy::]b4_parser_class_name[::yysymprint_ 
   (void) cdebug_;
   *yycdebug_ << (yytype < yyntokens_ ? "token" : "nterm")
-            << ' ' << yyname_[yytype] << " ("
+            << ' ' << yytname_[yytype] << " ("
              << *yylocationp << ": ";
   switch (yytype)
@@ -880,10 +926,10 @@ yy::]b4_parser_class_name[::yyreport_syn
          // "syntax error, unexpected %s or %s or %s"
          // Then, invoke YY_ on this string.
          // Finally, use the string as a format to output
-         // yyname_[yyilooka_], etc.
+         // yytname_[yyilooka_], etc.
          // Until this gets fixed, this message appears in English only.
          message = "syntax error, unexpected ";
-         message += yyname_[yyilooka_];
+         message += yytnamerr_ (yytname_[yyilooka_]);
           if (count < 5)
               count = 0;
@@ -891,7 +937,7 @@ yy::]b4_parser_class_name[::yyreport_syn
                 if (yycheck_[x + yyn_] == x && x != yyterror_)
                     message += (!count++) ? ", expecting " : " or ";
-                    message += yyname_[x];
+                   message += yytnamerr_ (yytname_[x]);
@@ -988,7 +1034,7 @@ yy::]b4_parser_class_name[::yyr2_[] =
 /* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM.
    First, the terminals, then, starting at \a yyntokens_, nonterminals. */
 const char*
-const yy::]b4_parser_class_name[::yyname_[] =
+const yy::]b4_parser_class_name[::yytname_[] =
@@ -1038,8 +1084,8 @@ yy::]b4_parser_class_name[::yyreduce_pri
              << " (line " << yylno << "), ";
   for (]b4_int_type_for([b4_prhs])[ i = yyprhs_[yyn_];
        0 <= yyrhs_[i]; ++i)
-    *yycdebug_ << yyname_[yyrhs_[i]] << ' ';
-  *yycdebug_ << "-> " << yyname_[yyr1_[yyn_]] << std::endl;
+    *yycdebug_ << yytname_[yyrhs_[i]] << ' ';
+  *yycdebug_ << "-> " << yytname_[yyr1_[yyn_]] << std::endl;
 #endif // YYDEBUG
Index: data/yacc.c
RCS file: /cvsroot/bison/bison/data/yacc.c,v
retrieving revision 1.98
diff -p -u -r1.98 yacc.c
--- data/yacc.c 19 Jul 2005 06:56:43 -0000      1.98
+++ data/yacc.c 21 Jul 2005 20:50:01 -0000
@@ -416,7 +416,7 @@ static const ]b4_int_type_for([b4_rline]
-/* YYTNME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM.
+/* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM.
    First, the terminals, then, starting at YYNTOKENS, nonterminals. */
 static const char *const yytname[] =
@@ -645,8 +645,8 @@ do {                                                        
              yyrule - 1, yylno);
   /* Print the symbols being reduced, and their result.  */
   for (yyi = yyprhs[yyrule]; 0 <= yyrhs[yyi]; yyi++)
-    YYFPRINTF (stderr, "%s ", yytname [yyrhs[yyi]]);
-  YYFPRINTF (stderr, "-> %s\n", yytname [yyr1[yyrule]]);
+    YYFPRINTF (stderr, "%s ", yytname[yyrhs[yyi]]);
+  YYFPRINTF (stderr, "-> %s\n", yytname[yyr1[yyrule]]);
 # define YY_REDUCE_PRINT(Rule)         \
@@ -735,7 +735,55 @@ yystpcpy (yydest, yysrc)
 #  endif
 # endif
-#endif /* !YYERROR_VERBOSE */
+# ifndef yytnamerr
+/* Copy to YYRES the contents of YYSTR after stripping away unnecessary
+   quotes and backslashes, so that it's suitable for yyerror.  The
+   heuristic is that double-quoting is unnecessary unless the string
+   contains an apostrophe, a comma, or backslash (other than
+   backslash-backslash).  YYSTR is taken from yytname.  If YYRES is
+   null, do not copy; instead, return the length of what the result
+   would have been.  */
+static YYSIZE_T
+yytnamerr (char *yyres, const char *yystr)
+  if (*yystr == '"')
+    {
+      size_t yyn = 0;
+      char const *yyp = yystr;
+      for (;;)
+       switch (*++yyp)
+         {
+         case '\'':
+         case ',':
+           goto do_not_strip_quotes;
+         case '\\':
+           if (*++yyp != '\\')
+             goto do_not_strip_quotes;
+           /* Fall through.  */
+         default:
+           if (yyres)
+             yyres[yyn] = *yyp;
+           yyn++;
+           break;
+         case '"':
+           if (yyres)
+             yyres[yyn] = '\0';
+           return yyn;
+         }
+    do_not_strip_quotes: ;
+    }
+  if (! yyres)
+    return yystrlen (yystr);
+  return yystpcpy (yyres, yystr) - yyres;
+# endif
+#endif /* YYERROR_VERBOSE */
@@ -1111,7 +1159,7 @@ yyerrlab:
       if (YYPACT_NINF < yyn && yyn < YYLAST)
          int yytype = YYTRANSLATE (yychar);
-         YYSIZE_T yysize0 = yystrlen (yytname[yytype]);
+         YYSIZE_T yysize0 = yytnamerr (0, yytname[yytype]);
          YYSIZE_T yysize = yysize0;
          YYSIZE_T yysize1;
          int yysize_overflow = 0;
@@ -1163,7 +1211,7 @@ yyerrlab:
                yyarg[yycount++] = yytname[yyx];
-               yysize1 = yysize + yystrlen (yytname[yyx]);
+               yysize1 = yysize + yytnamerr (0, yytname[yyx]);
                yysize_overflow |= yysize1 < yysize;
                yysize = yysize1;
                yyfmt = yystpcpy (yyfmt, yyprefix);
@@ -1188,7 +1236,7 @@ yyerrlab:
                  if (*yyp == '%' && yyf[1] == 's' && yyi < yycount)
-                     yyp = yystpcpy (yyp, yyarg[yyi++]);
+                     yyp += yytnamerr (yyp, yyarg[yyi++]);
                      yyf += 2;
Index: doc/bison.texinfo
RCS file: /cvsroot/bison/bison/doc/bison.texinfo,v
retrieving revision 1.153
diff -p -u -r1.153 bison.texinfo
--- doc/bison.texinfo   19 Jul 2005 06:56:43 -0000      1.153
+++ doc/bison.texinfo   21 Jul 2005 20:50:01 -0000
@@ -4164,15 +4164,14 @@ three elements of @code{yytname} corresp
 @code{"error"}, and @code{"$undefined"}; after these come the symbols
 defined in the grammar file.
-For single-character literal tokens and literal string tokens, the name
-in the table includes the single-quote or double-quote characters: for
-example, @code{"'+'"} is a single-character literal and @code{"\"<=\""}
-is a literal string token.  All the characters of the literal string
-token appear verbatim in the string found in the table; even
-double-quote characters are not escaped.  For example, if the token
-consists of three characters @samp{*"*}, its string in @code{yytname}
-contains @samp{"*"*"}.  (In C, that would be written as
+The name in the table includes all the characters needed to represent
+the token in Bison.  For single-character literals and literal
+strings, this includes the surrounding quoting characters and any
+escape sequences.  For example, the Bison single-character literal
address@hidden'+'} corresponds to a three-character name, represented in C as
address@hidden"'+'"}; and the Bison two-character literal string @code{"\\/"}
+corresponds to a five-character name, represented in C as
 When you specify @code{%token-table}, Bison also generates macro
 definitions for macros @code{YYNTOKENS}, @code{YYNNTS}, and
@@ -4413,11 +4412,13 @@ the grammar file has no effect on @code{
 table.  The index of the token in the table is the token type's code.
 The name of a multicharacter token is recorded in @code{yytname} with a
 double-quote, the token's characters, and another double-quote.  The
-token's characters are not escaped in any way; they appear verbatim in
-the contents of the string in the table.
+token's characters are escaped as necessary to be suitable as input
+to Bison.
-Here's code for looking up a token in @code{yytname}, assuming that the
-characters of the token are stored in @code{token_buffer}.
+Here's code for looking up a multicharacter token in @code{yytname},
+assuming that the characters of the token are stored in
address@hidden, and assuming that the token does not contain any
+characters like @samp{"} that require escaping.
 for (i = 0; i < YYNTOKENS; i++)
Index: src/output.c
RCS file: /cvsroot/bison/bison/src/output.c,v
retrieving revision 1.232
diff -p -u -r1.232 output.c
--- src/output.c        14 May 2005 06:49:47 -0000      1.232
+++ src/output.c        21 Jul 2005 20:50:01 -0000
@@ -60,7 +60,7 @@ bool error_verbose = false;
 #define GENERATE_MUSCLE_INSERT_TABLE(Name, Type)                       \
 static void                                                            \
-Name (const char *name,                                                        
+Name (char const *name,                                                        
       Type *table_data,                                                        
       Type first,                                                      \
       int begin,                                                       \
@@ -162,10 +162,7 @@ prepare_symbols (void)
     int j = 2;
     for (i = 0; i < nsyms; i++)
-       char const *tag = symbols[i]->tag;
-       char const *cp = (*tag == '"'
-                         ? tag
-                         : quotearg_style (c_quoting_style, tag));
+       char const *cp = quotearg_style (c_quoting_style, symbols[i]->tag);
        /* Width of the next token, including the two quotes, the
           comma and the space.  */
        int width = strlen (cp) + 2;
Index: tests/regression.at
RCS file: /cvsroot/bison/bison/tests/regression.at,v
retrieving revision 1.91
diff -p -u -r1.91 regression.at
--- tests/regression.at 14 May 2005 06:49:48 -0000      1.91
+++ tests/regression.at 21 Jul 2005 20:50:02 -0000
@@ -324,9 +324,9 @@ int yylex (void);
 %token B_TOKEN "b"
 %token C_TOKEN 'c'
 %token 'd' D_TOKEN
-%token SPECIAL "\\\'\?\"\n\t??!"
+%token SPECIAL "\\\'\?\"\a\b\f\n\r\t\v\001\377\x001\x0000ff??!"
-exp: "a" "\\\'\?\"\n\t??!";
+exp: "a" "\\\'\?\"\a\b\f\n\r\t\v\001\377\x001\x0000ff??!";
 yyerror (char const *s)
@@ -350,8 +350,7 @@ main (void)
 AT_CHECK([bison -o input.c input.y])
 AT_PARSER_CHECK([./input], 1, [],
-[syntax error, unexpected \'?"
-       ??!, expecting a
+[syntax error, unexpected "\\'?\"\a\b\f\n\r\t\v\001\377\001\377?\?!", 
expecting a
@@ -635,8 +634,8 @@ static const unsigned char yyrline[] =
 static const char *const yytname[] =
-  "$end", "error", "$undefined", "if", "const", "then", "else", "$accept",
-  "statement", "struct_stat", "if", "else", 0
+  "$end", "error", "$undefined", "\"if\"", "\"const\"", "\"then\"",
+  "\"else\"", "$accept", "statement", "struct_stat", "if", "else", 0
 static const unsigned short int yytoknum[] =

reply via email to

[Prev in Thread] Current Thread [Next in Thread]