bug-coreutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

bug#7960: [PATCH] fmt: fix formatting multibyte text (bug #7372)


From: Kostya Stopani
Subject: bug#7960: [PATCH] fmt: fix formatting multibyte text (bug #7372)
Date: Wed, 2 Feb 2011 17:17:12 +0300
User-agent: Mutt/1.5.20 (2009-06-14)

>From b118695b7b614f5f0e371cad885a01306f527d9e Mon Sep 17 00:00:00 2001
From: Kostya Stopani <address@hidden>
Date: Wed, 2 Feb 2011 17:10:05 +0300
Subject: [PATCH] fmt: fix formatting multibyte text (bug #7372)

* src/fmt.c (guess_screen_width): Add function to compute screen width
of a possibly multibyte word to correctly format international
text. If it's not multibyte fall back to byte length.

* src/fmt.c (mbsnrtowcs): Stub function partly implementing a GNU
extenstion function of the same name for non-GNU platforms.

* src/fmt.c (struct Word): Add a new field "nchar" to hold byte-length
of "text".

* src/fmt.c (get_line, check_punctuation, put_word): Use Word.length
as screen width of a word and Word.nchar as byte-length.
---
 src/fmt.c |   91 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 files changed, 88 insertions(+), 3 deletions(-)

diff --git a/src/fmt.c b/src/fmt.c
index 7d5aee3..1dcbaaf 100644
--- a/src/fmt.c
+++ b/src/fmt.c
@@ -20,6 +20,10 @@
 #include <stdio.h>
 #include <sys/types.h>
 #include <getopt.h>
+#include <wchar.h>
+#include <string.h>
+#include <errno.h>
+
 
 /* Redefine.  Otherwise, systems (Unicos for one) with headers that define
    it to be a type get syntax errors for the variable declaration below.  */
@@ -135,6 +139,7 @@ struct Word
 
     const char *text;          /* the text of the word */
     int length;                        /* length of this word */
+    int nchar;                 /* number of char entries in text array */
     int space;                 /* the size of the following space */
     unsigned int paren:1;      /* starts with open paren */
     unsigned int period:1;     /* ends in [.?!])* */
@@ -167,6 +172,11 @@ static void put_paragraph (WORD *finish);
 static void put_line (WORD *w, int indent);
 static void put_word (WORD *w);
 static void put_space (int space);
+static unsigned int guess_screen_width (const char *text, size_t b);
+#ifndef __GNU_LIBRARY__
+static size_t mbsnrtowcs (wchar_t *DST, const char **SRC, size_t NMC, size_t 
LEN,
+                         mbstate_t *restrict PS);
+#endif /* __GNU_LIBRARY__ */
 
 /* Option values.  */
 
@@ -670,7 +680,10 @@ get_line (FILE *f, int c)
           c = getc (f);
         }
       while (c != EOF && !isspace (c));
-      in_column += word_limit->length = wptr - word_limit->text;
+      word_limit->nchar = wptr - word_limit->text;
+      word_limit->length = guess_screen_width (word_limit->text, 
word_limit->nchar); 
+      in_column += word_limit->length;
+      
       check_punctuation (word_limit);
 
       /* Scan inter-word space.  */
@@ -751,7 +764,7 @@ static void
 check_punctuation (WORD *w)
 {
   char const *start = w->text;
-  char const *finish = start + (w->length - 1);
+  char const *finish = start + (w->nchar - 1);
   unsigned char fin = *finish;
 
   w->paren = isopen (*start);
@@ -982,7 +995,7 @@ put_word (WORD *w)
   int n;
 
   s = w->text;
-  for (n = w->length; n != 0; n--)
+  for (n = w->nchar; n != 0; n--)
     putchar (*s++);
   out_column += w->length;
 }
@@ -1011,3 +1024,75 @@ put_space (int space)
       out_column++;
     }
 }
+
+/* Try to convert text to multibyte and in this way determine its
+   screen width. Return number of bytes if conversion fails. */
+
+static unsigned int
+guess_screen_width (const char *text, size_t b)
+{
+  size_t c;
+  mbstate_t state;
+
+  memset (&state, 0, sizeof (state));
+
+  /* Try conversion */
+  c = mbsnrtowcs (NULL, &text, b, b, &state);
+  if (c > 0 && errno != EILSEQ)
+    return c;
+  else
+    return b;
+}
+
+
+#ifndef __GNU_LIBRARY__
+
+#define INITBUFSIZE 1024
+#define MAXBUFSIZE 1024*1024
+
+/* Stub mbsnrtowcs to be used when GNU extensions are unavailable. */
+
+size_t mbsnrtowcs (wchar_t *DST, const char **SRC, size_t NMC, size_t LEN,
+                     mbstate_t *restrict PS)
+{
+  static char *buf = NULL;
+  static size_t buf_size = INITBUFSIZE;
+  char *new_buf;
+  size_t new_buf_size, c;
+  mbstate_t state;
+
+  if (!buf)
+    {
+      buf = malloc (buf_size * sizeof (char));
+      if (!buf) return NMC;
+    }
+
+  memset (&state, 0, sizeof (state));
+
+  if (buf_size < NMC + 1)
+    {
+      /* Try to resize the buffer. */
+      new_buf_size = NMC + 1;
+      if (new_buf_size <= MAXBUFSIZE)
+       {
+         new_buf = realloc (buf, new_buf_size * sizeof (char));
+         if (!new_buf) return NMC;
+         buf = new_buf;
+         buf_size = new_buf_size;
+       }
+      else
+       {
+         return NMC;
+       }
+    }
+
+  strncpy (buf, *SRC, NMC);
+  buf[NMC] = '\0';
+  new_buf = buf;
+  c = mbsrtowcs (NULL, (const char **restrict) &new_buf, NMC, &state);
+  if (c > 0 && errno != EILSEQ)
+    return c;
+  else
+    return NMC;
+}
+#endif /* __GNU_LIBRARY__ */
-- 
1.7.0.4






reply via email to

[Prev in Thread] Current Thread [Next in Thread]