[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 06/11] diagnostics: learn how to count column number with multiby
From: |
Akim Demaille |
Subject: |
[PATCH 06/11] diagnostics: learn how to count column number with multibyte chars |
Date: |
Sat, 21 Sep 2019 11:59:59 +0200 |
So far diagnostics were cheating: in addition to the 'column' field of
locations (based on actual screen width per multibyte characters and
on tabulation expansion), the scanner sets the 'byte' field.
Diagnostics used this byte count to decide where to insert (color)
style.
We want to be able to truncate the quoted lines when there are too
wide to fit the screen. This requires that the diagnostics learn how
to count columns, the byte-in-boundary trick no longer works.
Bytes are still used for fix-its.
* bootstrap.conf: We need mbfile for mbf_getc.
* src/location.c (caret_info): We need an mbfile.
(caret_set_file): Initialize it.
(caret_getc): Convert to mbfile.
(location_caret): Instead of relying on the byte position to decide
where to insert the color style, count the current column using
boundary_compute.
---
bootstrap.conf | 2 +-
lib/.gitignore | 5 ++++
m4/.gitignore | 3 +++
src/location.c | 62 ++++++++++++++++++++++++++++----------------------
4 files changed, 44 insertions(+), 28 deletions(-)
diff --git a/bootstrap.conf b/bootstrap.conf
index 44469581..756d798d 100644
--- a/bootstrap.conf
+++ b/bootstrap.conf
@@ -30,7 +30,7 @@ gnulib_modules='
ldexpl
libtextstyle-optional
malloc-gnu
- mbswidth
+ mbfile mbswidth
non-recursive-gnulib-prefix-hack
obstack
obstack-printf
diff --git a/lib/.gitignore b/lib/.gitignore
index 70f13cd6..533d5dbc 100644
--- a/lib/.gitignore
+++ b/lib/.gitignore
@@ -132,6 +132,7 @@
/isnanf.c
/isnanl-nolibm.h
/isnanl.c
+/iswblank.c
/itold.c
/ldexpl.c
/libc-config.h
@@ -148,6 +149,10 @@
/math.c
/math.h
/math.in.h
+/mbchar.c
+/mbchar.h
+/mbfile.c
+/mbfile.h
/mbrtowc.c
/mbsinit.c
/mbswidth.c
diff --git a/m4/.gitignore b/m4/.gitignore
index 0d5b7472..286437d0 100644
--- a/m4/.gitignore
+++ b/m4/.gitignore
@@ -67,6 +67,7 @@
/isnand.m4
/isnanf.m4
/isnanl.m4
+/iswblank.m4
/javacomp.m4
/javaexec.m4
/largefile.m4
@@ -91,6 +92,8 @@
/malloc.m4
/malloca.m4
/math_h.m4
+/mbchar.m4
+/mbfile.m4
/mbrtowc.m4
/mbsinit.m4
/mbstate_t.m4
diff --git a/src/location.c b/src/location.c
index ca2d0b7f..66361fc0 100644
--- a/src/location.c
+++ b/src/location.c
@@ -21,6 +21,7 @@
#include <config.h>
#include "system.h"
+#include <mbfile.h>
#include <mbswidth.h>
#include <quotearg.h>
#include <stdio.h> /* fileno */
@@ -167,6 +168,8 @@ static struct
{
/* Raw input file. */
FILE *file;
+ /* Input file as a stream of multibyte characters. */
+ mb_file_t mbfile;
/* The position within the last file we quoted. If POS.FILE is non
NULL, but FILE is NULL, it means this file is special and should
not be quoted. */
@@ -201,7 +204,7 @@ caret_set_file (const char *file)
&& buf.st_mode & S_IFREG)
{
caret_info.pos.line = 1;
- caret_info.offset = 0;
+ mbf_init (caret_info.mbfile, caret_info.file);
}
else
caret_free ();
@@ -221,22 +224,23 @@ caret_free (void)
}
/* Getc, but smash \r\n as \n. */
-static int
-caret_getc (void)
+static void
+caret_getc_internal (mbchar_t *res)
{
- FILE *f = caret_info.file;
- int res = getc (f);
- if (res == '\r')
+ mbf_getc (*res, caret_info.mbfile);
+ if (mb_iseq (*res, '\r'))
{
- int c = getc (f);
- if (c == '\n')
- res = c;
+ mbchar_t c;
+ mbf_getc (c, caret_info.mbfile);
+ if (mb_iseq (c, '\n'))
+ mb_copy (res, &c);
else
- ungetc (c, f);
+ mbf_ungetc (c, caret_info.mbfile);
}
- return res;
}
+#define caret_getc(Var) caret_getc_internal(&Var)
+
void
location_caret (location loc, const char *style, FILE *out)
{
@@ -259,19 +263,25 @@ location_caret (location loc, const char *style, FILE
*out)
/* Advance to the line's position, keeping track of the offset. */
while (caret_info.pos.line < loc.start.line)
{
- int c = caret_getc ();
- if (c == EOF)
+ mbchar_t c;
+ caret_getc (c);
+ if (mb_iseof (c))
/* Something is wrong, that line number does not exist. */
return;
- caret_info.pos.line += c == '\n';
+ caret_info.pos.line += mb_iseq (c, '\n');
}
caret_info.offset = ftell (caret_info.file);
+ caret_info.pos.column = 1;
+ /* Reset mbf's internal state.
+ FIXME: should be done in mbfile. */
+ caret_info.mbfile.eof_seen = 0;
/* Read the actual line. Don't update the offset, so that we keep a pointer
to the start of the line. */
{
- int c = caret_getc ();
- if (c != EOF)
+ mbchar_t c;
+ caret_getc (c);
+ if (!mb_iseof (c))
{
bool single_line = loc.start.line == loc.end.line;
/* Quote the file (at most the first line in the case of
@@ -280,29 +290,27 @@ location_caret (location loc, const char *style, FILE
*out)
fprintf (out, "%5d | ", loc.start.line);
/* Consider that single point location (with equal boundaries)
actually denote the character that they follow. */
- int byte_end = loc.end.byte +
- (single_line && loc.start.byte == loc.end.byte);
- /* Byte number. */
- int byte = 1;
+ int col_end = loc.end.column +
+ (single_line && loc.start.column == loc.end.column);
/* Whether we opened the style. If the line is not as
expected (maybe the file was changed since the scanner
ran), we might reach the end before we actually saw the
opening column. */
bool opened = false;
- while (c != EOF && c != '\n')
+ while (!mb_iseof (c) && !mb_iseq (c, '\n'))
{
- if (byte == loc.start.byte)
+ if (caret_info.pos.column == loc.start.column)
{
begin_use_class (style, out);
opened = true;
}
- fputc (c, out);
- c = caret_getc ();
- ++byte;
+ mb_putc (c, out);
+ boundary_compute (&caret_info.pos, mb_ptr (c), mb_len (c));
+ caret_getc (c);
if (opened
&& (single_line
- ? byte == byte_end
- : c == '\n' || c == EOF))
+ ? caret_info.pos.column == col_end
+ : mb_iseq (c, '\n') || mb_iseof (c)))
end_use_class (style, out);
}
putc ('\n', out);
--
2.23.0
- [PATCH 00/11] diagnostics: truncate quoted sources to fit the screen width, Akim Demaille, 2019/09/21
- [PATCH 01/11] diagnostics: style: minor changes, Akim Demaille, 2019/09/21
- [PATCH 04/11] diagnostics: style: use a boundary to track the caret_info, Akim Demaille, 2019/09/21
- [PATCH 02/11] diagnostics: style: add caret_set_file, Akim Demaille, 2019/09/21
- [PATCH 07/11] diagnostics: truncate quoted sources to fit the screen, Akim Demaille, 2019/09/21
- [PATCH 03/11] diagnostics: extract boundary_compute from location_compute, Akim Demaille, 2019/09/21
- [PATCH 06/11] diagnostics: learn how to count column number with multibyte chars,
Akim Demaille <=
- [PATCH 05/11] diagnostics: style: rename member for clariy, Akim Demaille, 2019/09/21
- [PATCH 09/11] diagnostics: also show truncation at the end of line with "...", Akim Demaille, 2019/09/21
- [PATCH 10/11] diagnostics: don't print ellipsis on the caret line, Akim Demaille, 2019/09/21
- [PATCH 11/11] diagnostics: get the screen width from the terminal, Akim Demaille, 2019/09/21
- [PATCH 08/11] diagnostics: check that quoted lines are truncated, Akim Demaille, 2019/09/21
- [PATCH 11/11] diagnotics: get the screen width from the terminal, Akim Demaille, 2019/09/21