[PATCH 3/4] doc: promote yytoken_kind

bison-patches
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 3/4] doc: promote yytoken_kind_t, not yytokentype

From:	Akim Demaille
Subject:	[PATCH 3/4] doc: promote yytoken_kind_t, not yytokentype
Date:	Sun, 12 Apr 2020 19:27:07 +0200
* data/skeletons/c.m4 (yytoken_kind_t): New.
* data/skeletons/c++.m4, data/skeletons/lalr1.cc (yysymbol_kind_type):
New.
* examples/c/lexcalc/parse.y, examples/c/reccalc/parse.y,
* tests/regression.at:
Use them.
* doc/bison.texi: Replace "enum yytokentype" by "yytoken_kind_t".
(api.token.raw): Explain that it forces "yytoken_kind_t" to coincide
with "yysymbol_kind_t".
(Calling Convention): Mention YYEOF.
(Table of Symbols): Add entries for "yytoken_kind_t" and
"yysymbol_kind_t".
(Glossary): Add entries for "Kind", "Token kind" and "Symbol kind".
---
 TODO                       |  12 ----
 data/skeletons/bison.m4    |   4 +-
 data/skeletons/c++.m4      |  13 ++--
 data/skeletons/c.m4        |   7 +-
 data/skeletons/lalr1.cc    |   2 +-
 doc/bison.texi             | 138 ++++++++++++++++++++++++-------------
 examples/c/lexcalc/parse.y |   2 +-
 examples/c/reccalc/parse.y |   2 +-
 src/parse-gram.h           |   3 +-
 tests/regression.at        |   5 +-
 10 files changed, 111 insertions(+), 77 deletions(-)

diff --git a/TODO b/TODO
index 9ce2ae2f..7eb65198 100644
--- a/TODO
+++ b/TODO
@@ -1,7 +1,6 @@
 * Bison 3.6
 ** Documentation
 - yyexpected_tokens/expected_tokens/expectedTokens in all the languages.
-- YYENOMEM
 - YYERRCODE, YYUNDEF, YYEOF
 - i18n in Java
 - symbol.type_get should be kind_get, and it's not documented.
@@ -9,20 +8,9 @@
 - YYERRCODE and "end of file" and translation
 
 ** User token number, internal symbol number, external token number, etc.
-There is some confusion over these terms, which is even a problem for
-translators.  We need something clear, especially if we provide access to
-the symbol numbers (which would be useful for custom error messages).
-
 We could use "number" and "code".
 
-Update: the current best options would be "token kind" and "symbol kind",
-instead of "token type" and "symbol type".
-
-*** yytokentype
-Make an alias so that it is about "kind", not "type".
-
 *** The documentation
-
 You can explicitly specify the numeric code for a token type...
 
 The token numbered as 0.
diff --git a/data/skeletons/bison.m4 b/data/skeletons/bison.m4
index c1b04077..c1cd6606 100644
--- a/data/skeletons/bison.m4
+++ b/data/skeletons/bison.m4
@@ -534,8 +534,8 @@ m4_define([b4_symbol_map],
 
 # b4_token_visible_if(NUM, IF-TRUE, IF-FALSE)
 # -------------------------------------------
-# Whether NUM denotes a token that has an exported definition (i.e.,
-# shows in enum yytokentype).
+# Whether NUM denotes a token kind that has an exported definition
+# (i.e., shows in enum yytokentype).
 m4_define([b4_token_visible_if],
 [b4_symbol_if([$1], [is_token],
               [b4_symbol_if([$1], [has_id], [$2], [$3])],
diff --git a/data/skeletons/c++.m4 b/data/skeletons/c++.m4
index 1c77d57f..50881770 100644
--- a/data/skeletons/c++.m4
+++ b/data/skeletons/c++.m4
@@ -169,7 +169,7 @@ 
m4_bpatsubst(m4_dquote(m4_bpatsubst(m4_dquote(b4_namespace_ref[ ]),
 
 # b4_token_enums
 # --------------
-# Output the definition of the tokens as enums.
+# Output the definition of the token kinds.
 m4_define([b4_token_enums],
 [[enum yytokentype
       {
@@ -260,8 +260,11 @@ m4_define([b4_public_types_declare],
       ]b4_token_enums[
     };
 
-    /// (External) token kind, as returned by yylex.
-    typedef token::yytokentype token_type;
+    /// Token kind, as returned by yylex.
+    typedef token::yytokentype token_kind_type;
+
+    /// Backward compatibility alias.
+    typedef token_kind_type token_type;
 
     /// Symbol kinds.
     struct symbol_kind
@@ -385,7 +388,7 @@ m4_define([b4_symbol_type_define],
       by_type (const by_type& that);
 
       /// The symbol type as needed by the constructor.
-      typedef token_type kind_type;
+      typedef token_kind_type kind_type;
 
       /// Constructor from (external) token numbers.
       by_type (kind_type t);
@@ -493,7 +496,7 @@ m4_define([b4_public_types_define],
     : type (that.type)
   {}
 
-  ]b4_inline([$1])b4_parser_class[::by_type::by_type (token_type t)
+  ]b4_inline([$1])b4_parser_class[::by_type::by_type (token_kind_type t)
     : type (yytranslate_ (t))
   {}
 
diff --git a/data/skeletons/c.m4 b/data/skeletons/c.m4
index e517259d..d095e7a2 100644
--- a/data/skeletons/c.m4
+++ b/data/skeletons/c.m4
@@ -449,7 +449,7 @@ m4_define([b4_token_define],
 # ----------------
 # Output the definition of the tokens.
 m4_define([b4_token_defines],
-[b4_any_token_visible_if([/* Tokens.  */
+[b4_any_token_visible_if([/* Token kinds.  */
 m4_join([
 ], b4_symbol_map([b4_token_define]))
 ])])
@@ -470,15 +470,16 @@ m4_define([b4_token_enum],
 
 # b4_token_enums
 # --------------
-# The definition of the tokens (if there are) as enums.
+# The definition of the token kinds.
 m4_define([b4_token_enums],
-[b4_any_token_visible_if([[/* Token type.  */
+[b4_any_token_visible_if([[/* Token kinds.  */
 #ifndef ]b4_api_PREFIX[TOKENTYPE
 # define ]b4_api_PREFIX[TOKENTYPE
   enum ]b4_api_prefix[tokentype
   {
 ]b4_symbol_foreach([b4_token_enum])dnl
 [  };
+  typedef enum ]b4_api_prefix[tokentype ]b4_api_prefix[token_kind_t;
 #endif
 ]])])
 
diff --git a/data/skeletons/lalr1.cc b/data/skeletons/lalr1.cc
index 13fab5bf..5b6dbd3b 100644
--- a/data/skeletons/lalr1.cc
+++ b/data/skeletons/lalr1.cc
@@ -302,7 +302,7 @@ m4_define([b4_shared_declarations],
     static const ]b4_int_type(b4_table_ninf, b4_table_ninf)[ yytable_ninf_;
 
     /// Convert a scanner token kind \a t to a symbol kind.
-    /// In theory \a t should be a token_type, but character literals
+    /// In theory \a t should be a token_kind_type, but character literals
     /// are valid, yet not members of the token_type enum.
     static symbol_kind_type yytranslate_ (int t);
 ]b4_parse_error_bmatch([custom\|detailed], [[
diff --git a/doc/bison.texi b/doc/bison.texi
index 54929904..64859997 100644
--- a/doc/bison.texi
+++ b/doc/bison.texi
@@ -2940,7 +2940,7 @@ declaration.
 
 @group
 %@{
-  static void print_token (enum yytokentype token, YYSTYPE val);
+  static void print_token (yytoken_kind_t token, YYSTYPE val);
 %@}
 @end group
 
@@ -2989,7 +2989,7 @@ Look again at the example of the previous section:
 
 @group
 %@{
-  static void print_token (enum yytokentype token, YYSTYPE val);
+  static void print_token (yytoken_kind_t token, YYSTYPE val);
 %@}
 @end group
 
@@ -3004,16 +3004,16 @@ override Bison's default definition for @code{YYLTYPE}, 
in which
 write it in the first since Bison will insert that code into the parser
 implementation file @emph{before} the default @code{YYLTYPE} definition.  In
 which @var{Prologue} section should you prototype an internal function,
-@code{trace_token}, that accepts @code{YYLTYPE} and @code{yytokentype} as
+@code{trace_token}, that accepts @code{YYLTYPE} and @code{yytoken_kind_t} as
 arguments?  You should prototype it in the second since Bison will insert
-that code @emph{after} the @code{YYLTYPE} and @code{yytokentype}
+that code @emph{after} the @code{YYLTYPE} and @code{yytoken_kind_t}
 definitions.
 
 This distinction in functionality between the two @var{Prologue} sections is
 established by the appearance of the @code{%union} between them.  This
 behavior raises a few questions.  First, why should the position of a
 @code{%union} affect definitions related to @code{YYLTYPE} and
-@code{yytokentype}?  Second, what if there is no @code{%union}?  In that
+@code{yytoken_kind_t}?  Second, what if there is no @code{%union}?  In that
 case, the second kind of @var{Prologue} section is not available.  This
 behavior is not intuitive.
 
@@ -3051,8 +3051,8 @@ the same time:
 
 @group
 %code @{
-  static void print_token (enum yytokentype token, YYSTYPE val);
-  static void trace_token (enum yytokentype token, YYLTYPE loc);
+  static void print_token (yytoken_kind_t token, YYSTYPE val);
+  static void trace_token (yytoken_kind_t token, YYLTYPE loc);
 @}
 @end group
 
@@ -3116,8 +3116,8 @@ Thus, they belong in one or more @code{%code requires}:
 
 @group
 %code @{
-  static void print_token (enum yytokentype token, YYSTYPE val);
-  static void trace_token (enum yytokentype token, YYLTYPE loc);
+  static void print_token (yytoken_kind_t token, YYSTYPE val);
+  static void trace_token (yytoken_kind_t token, YYLTYPE loc);
 @}
 @end group
 
@@ -3149,7 +3149,7 @@ might wish for Bison to insert the prototype into both 
the parser header
 file and the parser implementation file.  Since this function is not a
 dependency required by @code{YYSTYPE} or @code{YYLTYPE}, it doesn't make
 sense to move its prototype to a @code{%code requires}.  More importantly,
-since it depends upon @code{YYLTYPE} and @code{yytokentype}, @code{%code
+since it depends upon @code{YYLTYPE} and @code{yytoken_kind_t}, @code{%code
 requires} is not sufficient.  Instead, move its prototype from the
 unqualified @code{%code} to a @code{%code provides}:
 
@@ -3189,7 +3189,7 @@ unqualified @code{%code} to a @code{%code provides}:
 
 @group
 %code provides @{
-  void trace_token (enum yytokentype token, YYLTYPE loc);
+  void trace_token (yytoken_kind_t token, YYLTYPE loc);
 @}
 @end group
 
@@ -3205,7 +3205,7 @@ unqualified @code{%code} to a @code{%code provides}:
 @noindent
 Bison will insert the @code{trace_token} prototype into both the parser
 header file and the parser implementation file after the definitions for
-@code{yytokentype}, @code{YYLTYPE}, and @code{YYSTYPE}.
+@code{yytoken_kind_t}, @code{YYLTYPE}, and @code{YYSTYPE}.
 
 The above examples are careful to write directives in an order that reflects
 the layout of the generated parser implementation and header files:
@@ -5755,7 +5755,7 @@ so on.
 
 Contrary to defining @code{api.prefix}, some symbols are @emph{not} renamed
 by @code{%name-prefix}, for instance @code{YYDEBUG}, @code{YYTOKENTYPE},
-@code{yytokentype}, @code{YYSTYPE}, @code{YYLTYPE}.
+@code{yytoken_kind_t}, @code{YYSTYPE}, @code{YYLTYPE}.
 @end deffn
 
 @ifset defaultprec
@@ -6296,18 +6296,19 @@ introduced in Bison 3.0.
 all
 
 @item Purpose:
-The output files normally define the tokens with Yacc-compatible token
-numbers: sequential numbers starting at 257 except for single character
-tokens which stand for themselves (e.g., in ASCII, @samp{'a'} is numbered
-65).  The parser however uses symbol numbers assigned sequentially starting
-at 3.  Therefore each time the scanner returns an (external) token number,
-it must be mapped to the (internal) symbol number.
-
-When @code{api.token.raw} is set, tokens are assigned their internal number,
-which saves one table lookup per token to map them from the external to the
-internal number, and also saves the generation of the mapping table.  The
-gain is typically moderate, but in extreme cases (very simple user actions),
-a 10% improvement can be observed.
+The output files normally define the enumeration of the @emph{token kinds}
+with Yacc-compatible token codes: sequential numbers starting at 257 except
+for single character tokens which stand for themselves (e.g., in ASCII,
+@samp{'a'} is numbered 65).  The parser however uses @emph{symbol kinds}
+which are assigned numbers sequentially starting at 0.  Therefore each time
+the scanner returns an (external) token kind, it must be mapped to the
+(internal) symbol kind.
+
+When @code{api.token.raw} is set, the code of the token kinds are forced to
+coincide with the symbol kind.  This saves one table lookup per token to map
+them from the token kind to the symbol kind, and also saves the generation
+of the mapping table.  The gain is typically moderate, but in extreme cases
+(very simple user actions), a 10% improvement can be observed.
 
 When @code{api.token.raw} is set, the grammar cannot use character literals
 (such as @samp{'a'}).
@@ -7138,13 +7139,14 @@ that need it.  @xref{Invocation}.
 @subsection Calling Convention for @code{yylex}
 
 The value that @code{yylex} returns must be the positive numeric code for
-the type of token it has just found; a zero or negative value signifies
+the kind of token it has just found; a zero or negative value signifies
 end-of-input.
 
-When a token is referred to in the grammar rules by a name, that name in the
-parser implementation file becomes a C macro whose definition is the proper
-numeric code for that token kind.  So @code{yylex} can use the name to
-indicate that type.  @xref{Symbols}.
+When a token kind is referred to in the grammar rules by a name, that name
+in the parser implementation file becomes an enumerator of the enum
+@code{yytoken_kind_t} whose definition is the proper numeric code for that
+token kind.  So @code{yylex} should use the name to indicate that type.
+@xref{Symbols}.
 
 When a token is referred to in the grammar rules by a character literal, the
 numeric code for that character is also the code for the token kind.  So
@@ -7160,12 +7162,13 @@ yylex (void)
 @{
   @dots{}
   if (c == EOF)    /* Detect end-of-input. */
-    return 0;
+    return YYEOF;
   @dots{}
-  if (c == '+' || c == '-')
+  else if (c == '+' || c == '-')
     return c;      /* Assume token kind for '+' is '+'. */
   @dots{}
-  return INT;      /* Return the type of the token. */
+  else
+    return INT;    /* Return the type of the token. */
   @dots{}
 @}
 @end example
@@ -7207,10 +7210,9 @@ The @code{yytname} table is generated only if you use the
 
 @vindex yylval
 In an ordinary (nonreentrant) parser, the semantic value of the token must
-be stored into the global variable @code{yylval}.  When you are using
-just one data type for semantic values, @code{yylval} has that type.
-Thus, if the type is @code{int} (the default), you might write this in
-@code{yylex}:
+be stored into the global variable @code{yylval}.  When you are using just
+one data type for semantic values, @code{yylval} has that type.  Thus, if
+the type is @code{int} (the default), you might write this in @code{yylex}:
 
 @example
 @group
@@ -10503,17 +10505,16 @@ calculator (@pxref{Mfcalc Declarations}):
 @dots{} %% @dots{} %% @dots{}
 
 static void
-print_token_value (FILE *file, int type, YYSTYPE value)
+print_token_value (FILE *file, yytoken_kind_t kind, YYSTYPE value)
 @{
-  if (type == VAR)
+  if (kind == VAR)
     fprintf (file, "%s", value.tptr->name);
-  else if (type == NUM)
+  else if (kind == NUM)
     fprintf (file, "%d", value.val);
 @}
 @end example
 
-@xref{Mfcalc Traces}, for the
-proper use of @code{%printer}.
+@xref{Mfcalc Traces}, for the proper use of @code{%printer}.
 
 @c ================================================= Invoking Bison
 
@@ -11545,8 +11546,8 @@ Values}.
 @end defcv
 
 @defcv {Type} {parser} {token}
-A structure that contains (only) the @code{yytokentype} enumeration, which
-defines the tokens.  To refer to the token @code{FOO}, use
+A structure that contains (only) the @code{yytoken_kind_t} enumeration,
+which defines the tokens.  To refer to the token @code{FOO}, use
 @code{yy::parser::token::FOO}.  The scanner can use @samp{typedef
 yy::parser::token token;} to ``import'' the token enumeration (@pxref{Calc++
 Scanner}).
@@ -12005,7 +12006,7 @@ The generated parser expects @code{yylex} to have the 
following prototype.
 
 @deftypefun {int} yylex (@code{semantic_type*} @var{yylval}, 
@code{location_type*} @var{yylloc}, @var{type1} @var{arg1}, @dots{})
 @deftypefunx {int} yylex (@code{semantic_type*} @var{yylval}, @var{type1} 
@var{arg1}, @dots{})
-Return the next token.  Its type is the return value, its semantic value and
+Return the next token.  Its kind is the return value, its semantic value and
 location (if enabled) being @var{yylval} and @var{yylloc}.  Invocations of
 @samp{%lex-param @{@var{type1} @var{arg1}@}} yield additional arguments.
 @end deftypefun
@@ -14580,6 +14581,22 @@ Data type of semantic values; @code{int} by default.
 @xref{Value Type}.
 @end deffn
 
+@deffn {Type} yysymbol_kind_t
+An enum that includes all the symbols, tokens and nonterminals, of the
+grammar.  @xref{Syntax Error Reporting Function}.  The symbol kinds are used
+internally by the parser, and should not be confused with the token kinds:
+the symbol kind of a terminal symbol is not equal to its token kind! (Unless
+@samp{%define api.token.raw} was used).
+@end deffn
+
+@deffn {Type} yytoken_kind_t
+An enum that includes all the declared @dfn{token kinds} declared with
+@code{%token} (@pxref{Token Decl}).  These are the return values for
+@code{yylex}.  They should not be confused with the @emph{symbol kinds},
+used internally by the parser.
+@end deffn
+
+
 @node Glossary
 @appendix Glossary
 @cindex glossary
@@ -14662,6 +14679,21 @@ performs some operation.
 @item Input stream
 A continuous flow of data between devices or programs.
 
+@item Kind
+``Token'' and ``symbol'' are each overloaded to mean either a grammar symbol
+(kind) or all parse info (kind, value, location) associated with occurrences
+of that grammar symbol from the input.  To disambiguate, we use ``token
+kind'' and ``symbol kind'' to mean both grammar symbols and the types that
+represent them in a base programming language (C, C++, etc.). However, we
+use ``token'' and ``symbol'' without the word ``kind'' to mean parsed
+occurrences, and we append the word ``type'' to refer to the types that
+represent them in a base programming language.
+
+In summary: When you see ``kind'', interpret ``symbol'' or ``token'' to mean
+a @emph{grammar symbol}.  When you don't see ``kind'' (including when you
+see ``type''), interpret ``symbol'' or ``token'' to mean a @emph{parsed
+symbol}.
+
 @item LAC (Lookahead Correction)
 A parsing mechanism that fixes the problem of delayed syntax error
 detection, which is caused by LR state merging, default reductions, and the
@@ -14761,6 +14793,10 @@ the language being parsed.  The start symbol is 
usually listed as the
 first nonterminal symbol in a language specification.
 @xref{Start Decl}.
 
+@item Symbol kind
+A finite enumeration of all the possible grammar symbols, as processed by
+the parser.  @xref{Symbols}.
+
 @item Symbol table
 A data structure where symbol names and associated data are stored
 during parsing to allow for recognition and use of existing
@@ -14770,16 +14806,20 @@ information in repeated uses of a symbol.  
@xref{Multi-function Calc}.
 An error encountered during parsing of an input stream due to invalid
 syntax.  @xref{Error Recovery}.
 
+@item Terminal symbol
+A grammar symbol that has no rules in the grammar and therefore is
+grammatically indivisible.  The piece of text it represents is a token.
+@xref{Language and Grammar}.
+
 @item Token
 A basic, grammatically indivisible unit of a language.  The symbol
 that describes a token in the grammar is a terminal symbol.
 The input of the Bison parser is a stream of tokens which comes from
 the lexical analyzer.  @xref{Symbols}.
 
-@item Terminal symbol
-A grammar symbol that has no rules in the grammar and therefore is
-grammatically indivisible.  The piece of text it represents is a token.
-@xref{Language and Grammar}.
+@item Token kind
+A finite enumeration of all the possible grammar terminals, as disciminated
+by the scanner.  @xref{Symbols}.
 
 @item Unreachable state
 A parser state to which there does not exist a sequence of transitions from
diff --git a/examples/c/lexcalc/parse.y b/examples/c/lexcalc/parse.y
index e8a560fe..41546cb3 100644
--- a/examples/c/lexcalc/parse.y
+++ b/examples/c/lexcalc/parse.y
@@ -6,7 +6,7 @@
 {
   // Tell Flex the expected prototype of yylex.
 #define YY_DECL                                 \
-  enum yytokentype yylex (YYSTYPE* yylval, YYLTYPE *yylloc, int *nerrs)
+  yytoken_kind_t yylex (YYSTYPE* yylval, YYLTYPE *yylloc, int *nerrs)
   YY_DECL;
 
   void yyerror (YYLTYPE *loc, int *nerrs, const char *msg);
diff --git a/examples/c/reccalc/parse.y b/examples/c/reccalc/parse.y
index ae80942c..bcea1b83 100644
--- a/examples/c/reccalc/parse.y
+++ b/examples/c/reccalc/parse.y
@@ -26,7 +26,7 @@
   // Tell Flex the expected prototype of yylex.
   // The scanner argument must be named yyscanner.
 #define YY_DECL                                                         \
-  enum yytokentype yylex (YYSTYPE* yylval, yyscan_t yyscanner, result *res)
+  yytoken_kind_t yylex (YYSTYPE* yylval, yyscan_t yyscanner, result *res)
   YY_DECL;
 
   void yyerror (yyscan_t scanner, result *res, const char *msg, ...);
diff --git a/src/parse-gram.h b/src/parse-gram.h
index b40347ba..32a81e9f 100644
--- a/src/parse-gram.h
+++ b/src/parse-gram.h
@@ -72,7 +72,7 @@ extern int gram_debug;
   } value_type;
 
 
-/* Token type.  */
+/* Token kinds.  */
 #ifndef GRAM_TOKENTYPE
 # define GRAM_TOKENTYPE
   enum gram_tokentype
@@ -139,6 +139,7 @@ extern int gram_debug;
     PERCENT_UNION = 59,            /* "%union"  */
     PERCENT_EMPTY = 60             /* "%empty"  */
   };
+  typedef enum gram_tokentype gram_token_kind_t;
 #endif
 
 /* Value type.  */
diff --git a/tests/regression.at b/tests/regression.at
index 67866d20..6014bc8d 100644
--- a/tests/regression.at
+++ b/tests/regression.at
@@ -146,8 +146,9 @@ void print_my_token (void);
 void
 print_my_token (void)
 {
-  enum yytokentype my_token = MY_TOKEN;
-  printf ("%d\n", my_token);
+  enum yytokentype tok1 = MY_TOKEN;
+  yytoken_kind_t   tok2 = MY_TOKEN;
+  printf ("%d, %d\n", tok1, tok2);
 }
 %}
 %token MY_TOKEN
-- 
2.26.0
[Prev in Thread]
Current Thread
[Next in Thread]
[PATCH 0/4] Doc: update, and document "kinds", Akim Demaille, 2020/04/12
- [PATCH 1/4] c: rename yyexpected_tokens as yypcontext_expected_tokens, Akim Demaille, 2020/04/12
- [PATCH 2/4] doc: document yypcontext_t, and api.symbol.prefix, Akim Demaille, 2020/04/12
- [PATCH 4/4] doc: use "code", not "number", for token (and symbol) kinds, Akim Demaille, 2020/04/12
- [PATCH 3/4] doc: promote yytoken_kind_t, not yytokentype, Akim Demaille <=
Prev by Date: [PATCH 4/4] doc: use "code", not "number", for token (and symbol) kinds
Next by Date: RE: Getting involved in Bison
Previous by thread: [PATCH 4/4] doc: use "code", not "number", for token (and symbol) kinds
Next by thread: RE: Getting involved in Bison
Index(es):
- Date
- Thread