[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 9/9] reports: the column width differs from the byte count
From: |
Akim Demaille |
Subject: |
[PATCH 9/9] reports: the column width differs from the byte count |
Date: |
Sat, 13 Jun 2020 17:23:17 +0200 |
From
"number" shift, and go to state 1
"Ñùṃéℝô" shift, and go to state 2
to
"number" shift, and go to state 1
"Ñùṃéℝô" shift, and go to state 2
* src/print.c: Use mbswidth, not strlen, to compute visual columns.
* tests/report.at: Adjust.
---
NEWS | 20 +++++++++++++++++++-
src/print.c | 13 +++++++------
tests/report.at | 27 ++++++++++++++++-----------
3 files changed, 42 insertions(+), 18 deletions(-)
diff --git a/NEWS b/NEWS
index 1cec2252..1584d49e 100644
--- a/NEWS
+++ b/NEWS
@@ -27,6 +27,23 @@ GNU Bison NEWS
header. This is disabled when the generated header is `y.tab.h`, to
comply with Automake's ylwrap.
+*** String aliases are faithfully propagated
+
+ Bison used to interpret user strings (i.e., decoding backslash escapes)
+ when reading them, and to escape them (i.e., issue non-printable
+ characters as backslash escapes, taking the locale into account) when
+ outputting them. As a consequence non-ASCII strings (say in UTF-8) ended
+ up "ciphered" as sequences of backslash escapes. This happened not only
+ in the generated sources (where the compiler will reinterpret them), but
+ also in all the generated reports (text, xml, html, dot, etc.). Reports
+ were therefore not readable when string aliases were not pure ASCII.
+ Worse yet: the output depended on the user's locale.
+
+ Now Bison faithfully treats the string aliases exactly the way the user
+ spelled them. This fixes all the aforementioned problems. However, now,
+ string aliases semantically equivalent but syntactically different (e.g.,
+ "A", "\x41", "\101") are considered to be different.
+
** New features
*** File prefix mapping
@@ -4205,7 +4222,8 @@ along with this program. If not, see
<http://www.gnu.org/licenses/>.
LocalWords: yysymbol yytnamerr yyreport ctx ARGMAX yysyntax stderr LPAREN
LocalWords: symrec yypcontext TOKENMAX yyexpected YYEMPTY yypstate YYEOF
LocalWords: autocompletion bistromathic submessages Cayuela lexcalc hoc
- LocalWords: yytoken YYUNDEF YYerror
+ LocalWords: yytoken YYUNDEF YYerror basename Automake's UTF ifdef ffile
+ LocalWords: gotos readline
Local Variables:
ispell-dictionary: "american"
diff --git a/src/print.c b/src/print.c
index 1da0f9dd..91b44cb3 100644
--- a/src/print.c
+++ b/src/print.c
@@ -22,6 +22,7 @@
#include "system.h"
#include <bitset.h>
+#include <mbswidth.h>
#include "closure.h"
#include "conflicts.h"
@@ -49,7 +50,7 @@ static bitset no_reduce_set;
static void
max_length (size_t *width, const char *str)
{
- size_t len = strlen (str);
+ size_t len = mbswidth (str, 0);
if (len > *width)
*width = len;
}
@@ -130,7 +131,7 @@ print_transitions (state *s, FILE *out, bool
display_transitions_p)
state *s1 = trans->states[i];
fprintf (out, " %s", tag);
- for (int j = width - strlen (tag); j > 0; --j)
+ for (int j = width - mbswidth (tag, 0); j > 0; --j)
fputc (' ', out);
if (display_transitions_p)
fprintf (out, _("shift, and go to state %d\n"), s1->number);
@@ -168,7 +169,7 @@ print_errs (FILE *out, state *s)
{
const char *tag = errp->symbols[i]->tag;
fprintf (out, " %s", tag);
- for (int j = width - strlen (tag); j > 0; --j)
+ for (int j = width - mbswidth (tag, 0); j > 0; --j)
fputc (' ', out);
fputs (_("error (nonassociative)\n"), out);
}
@@ -187,7 +188,7 @@ print_reduction (FILE *out, size_t width,
rule *r, bool enabled)
{
fprintf (out, " %s", lookahead_token);
- for (int j = width - strlen (lookahead_token); j > 0; --j)
+ for (int j = width - mbswidth (lookahead_token, 0); j > 0; --j)
fputc (' ', out);
if (!enabled)
fputc ('[', out);
@@ -232,7 +233,7 @@ print_reductions (FILE *out, state *s)
/* Compute the width of the lookahead token column. */
size_t width = 0;
if (default_reduction)
- width = strlen (_("$default"));
+ width = mbswidth (_("$default"), 0);
if (reds->lookahead_tokens)
for (int i = 0; i < ntokens; i++)
@@ -404,7 +405,7 @@ print_nonterminal_symbols (FILE *out)
break;
}
- int column = 4 + strlen (tag);
+ int column = 4 + mbswidth (tag, 0);
fprintf (out, "%4s%s", "", tag);
if (symbols[i]->content->type_name)
column += fprintf (out, " <%s>",
diff --git a/tests/report.at b/tests/report.at
index 27d4e7b8..21708070 100644
--- a/tests/report.at
+++ b/tests/report.at
@@ -1150,6 +1150,11 @@ AT_SETUP([Reports with conflicts])
AT_KEYWORDS([report])
+# We need UTF-8 support for correct screen-width computation of UTF-8
+# characters. Skip the test if not available.
+locale=`locale -a | $EGREP '^en_US\.(UTF-8|utf8)$' | sed 1q`
+AT_SKIP_IF([test x == x"$locale"])
+
AT_BISON_OPTION_PUSHDEFS
AT_DATA([input.y],
[[%left "+"
@@ -1162,7 +1167,7 @@ exp
| "Ñùṃéℝô"
]])
-AT_BISON_CHECK([-o input.cc -rall --graph=input.gv --xml input.y], [], [],
+AT_CHECK([LC_ALL="$locale" $5 bison -fno-caret -o input.cc -rall
--graph=input.gv --xml input.y], [], [],
[[input.y: warning: 3 shift/reduce conflicts [-Wconflicts-sr]
input.y: warning: 3 reduce/reduce conflicts [-Wconflicts-rr]
input.y: warning: rerun with option '-Wcounterexamples' to generate conflict
counterexamples [-Wother]
@@ -1219,7 +1224,7 @@ State 0
4 | . "number"
5 | . "Ñùṃéℝô"
- "number" shift, and go to state 1
+ "number" shift, and go to state 1
"Ñùṃéℝô" shift, and go to state 2
exp go to state 3
@@ -1246,9 +1251,9 @@ State 3
2 | exp . "+" exp
3 | exp . "+" exp
- $end shift, and go to state 4
- "+" shift, and go to state 5
- "⊕" shift, and go to state 6
+ $end shift, and go to state 4
+ "+" shift, and go to state 5
+ "⊕" shift, and go to state 6
State 4
@@ -1268,7 +1273,7 @@ State 5
4 | . "number"
5 | . "Ñùṃéℝô"
- "number" shift, and go to state 1
+ "number" shift, and go to state 1
"Ñùṃéℝô" shift, and go to state 2
exp go to state 7
@@ -1283,7 +1288,7 @@ State 6
4 | . "number"
5 | . "Ñùṃéℝô"
- "number" shift, and go to state 1
+ "number" shift, and go to state 1
"Ñùṃéℝô" shift, and go to state 2
exp go to state 8
@@ -1303,8 +1308,8 @@ State 7
$end [reduce using rule 3 (exp)]
"+" reduce using rule 2 (exp)
"+" [reduce using rule 3 (exp)]
- "⊕" [reduce using rule 2 (exp)]
- "⊕" [reduce using rule 3 (exp)]
+ "⊕" [reduce using rule 2 (exp)]
+ "⊕" [reduce using rule 3 (exp)]
$default reduce using rule 2 (exp)
Conflict between rule 2 and token "+" resolved as reduce (%left "+").
@@ -1317,11 +1322,11 @@ State 8
2 | exp . "+" exp
3 | exp . "+" exp
- "+" shift, and go to state 5
+ "+" shift, and go to state 5
"⊕" shift, and go to state 6
"+" [reduce using rule 1 (exp)]
- "⊕" [reduce using rule 1 (exp)]
+ "⊕" [reduce using rule 1 (exp)]
$default reduce using rule 1 (exp)
]])
--
2.27.0
- [PATCH 0/9] Fix reports, Akim Demaille, 2020/06/13
- [PATCH 1/9] style: prefer 'FOO ()' to 'FOO' for function-like macros, Akim Demaille, 2020/06/13
- [PATCH 2/9] style: reduce scopes, Akim Demaille, 2020/06/13
- [PATCH 3/9] style: introduce & use STRING_1GROW, Akim Demaille, 2020/06/13
- [PATCH 4/9] style: factor common bits about string scanning, Akim Demaille, 2020/06/13
- [PATCH 5/9] tests: check reports with conflicts and UTF-8, Akim Demaille, 2020/06/13
- [PATCH 6/9] parser: keep string aliases as the user wrote it, Akim Demaille, 2020/06/13
- [PATCH 7/9] regen, Akim Demaille, 2020/06/13
- [PATCH 8/9] reports: don't escape the labels, Akim Demaille, 2020/06/13
- [PATCH 9/9] reports: the column width differs from the byte count,
Akim Demaille <=