[SCM] gawk branch, master, updated. gawk-4.1.0-3851-g6d1580b

gawk-diffs
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[SCM] gawk branch, master, updated. gawk-4.1.0-3851-g6d1580b

From:	Arnold Robbins
Subject:	[SCM] gawk branch, master, updated. gawk-4.1.0-3851-g6d1580b
Date:	Mon, 16 Dec 2019 14:37:29 -0500 (EST)
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "gawk".

The branch, master has been updated
       via  6d1580bfd328fbbb04f4b5627032602dd8dfe98c (commit)
      from  fb48abe6ca16de5887b15f7c7774cd6c2e402176 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.sv.gnu.org/cgit/gawk.git/commit/?id=6d1580bfd328fbbb04f4b5627032602dd8dfe98c

commit 6d1580bfd328fbbb04f4b5627032602dd8dfe98c
Author: Arnold D. Robbins <address@hidden>
Date:   Mon Dec 16 21:37:04 2019 +0200

    Sync localeinfo and partially sync dfa from GNULIB.

diff --git a/support/ChangeLog b/support/ChangeLog
index bb323e9..d3213ee 100644
--- a/support/ChangeLog
+++ b/support/ChangeLog
@@ -1,6 +1,11 @@
+2019-12-16         Arnold D. Robbins     <address@hidden>
+
+       * localeinfo.h, localeinfo.c: Updated from GNULIB.
+       * dfa.c: Partial sync with GNULIB for localeinfo related stuff.
+
 2019-12-13         Arnold D. Robbins     <address@hidden>
 
-       * dfah, dfa.c: Updated from GNULIB.
+       * dfa.h, dfa.c: Updated from GNULIB.
 
 2019-11-21         Arnold D. Robbins     <address@hidden>
 
diff --git a/support/dfa.c b/support/dfa.c
index cfa5421..9e7c8a8 100644
--- a/support/dfa.c
+++ b/support/dfa.c
@@ -40,9 +40,6 @@
 #include <stdlib.h>
 #include <limits.h>
 #include <string.h>
-#if HAVE_SETLOCALE
-#include <locale.h>
-#endif
 
 #include "dfa.h"       // gets stdbool.h for us
 
@@ -58,11 +55,6 @@ isasciidigit (char c)
   return '0' <= c && c <= '9';
 }
 
-/* Gawk doesn't use Gnulib, so don't assume that setlocale is present.  */
-#ifndef LC_ALL
-# define setlocale(category, locale) NULL
-#endif
-
 #include "gettext.h"
 #define _(str) gettext (str)
 
@@ -597,12 +589,6 @@ struct dfa
   char *(*dfaexec) (struct dfa *, char const *, char *,
                     bool, size_t *, bool *);
 
-  /* The locale is simple, like the C locale.  These locales can be
-     processed more efficiently, as they are single-byte, their native
-     character set is in collating-sequence order, and they do not
-     have multi-character collating elements.  */
-  bool simple_locale;
-
   /* Other cached information derived from the locale.  */
   struct localeinfo localeinfo;
 };
@@ -921,7 +907,6 @@ void
 dfacopysyntax (struct dfa *to, const struct dfa *from)
 {
   to->dfaexec = from->dfaexec;
-  to->simple_locale = from->simple_locale;
   to->localeinfo = from->localeinfo;
 
   to->fast = from->fast;
@@ -958,38 +943,6 @@ setbit_case_fold_c (int b, charclass *c)
       setbit (i, c);
 }
 
-/* Return true if the locale compatible with the C locale.  */
-
-static bool
-using_simple_locale (bool multibyte)
-{
-  /* The native character set is known to be compatible with
-     the C locale.  The following test isn't perfect, but it's good
-     enough in practice, as only ASCII and EBCDIC are in common use
-     and this test correctly accepts ASCII and rejects EBCDIC.  */
-  enum { native_c_charset =
-    ('\b' == 8 && '\t' == 9 && '\n' == 10 && '\v' == 11 && '\f' == 12
-     && '\r' == 13 && ' ' == 32 && '!' == 33 && '"' == 34 && '#' == 35
-     && '%' == 37 && '&' == 38 && '\'' == 39 && '(' == 40 && ')' == 41
-     && '*' == 42 && '+' == 43 && ',' == 44 && '-' == 45 && '.' == 46
-     && '/' == 47 && '0' == 48 && '9' == 57 && ':' == 58 && ';' == 59
-     && '<' == 60 && '=' == 61 && '>' == 62 && '?' == 63 && 'A' == 65
-     && 'Z' == 90 && '[' == 91 && '\\' == 92 && ']' == 93 && '^' == 94
-     && '_' == 95 && 'a' == 97 && 'z' == 122 && '{' == 123 && '|' == 124
-     && '}' == 125 && '~' == 126)
-  };
-
-  if (!native_c_charset || multibyte)
-    return false;
-  else
-    {
-      /* Treat C and POSIX locales as being compatible.  Also, treat
-         errors as compatible, as these are invariably from stubs.  */
-      char const *loc = setlocale (LC_ALL, NULL);
-      return !loc || streq (loc, "C") || streq (loc, "POSIX");
-    }
-}
-
 /* Fetch the next lexical input character from the pattern.  There
    must at least one byte of pattern input.  Set DFA->lex.wctok to the
    value of the character or to WEOF depending on whether the input is
@@ -1080,7 +1033,7 @@ parse_bracket_exp (struct dfa *dfa)
   if (invert)
     {
       c = bracket_fetch_wc (dfa);
-      known_bracket_exp = dfa->simple_locale;
+      known_bracket_exp = dfa->localeinfo.simple;
     }
   wint_t wc = dfa->lex.wctok;
   int c1;
@@ -1210,7 +1163,7 @@ parse_bracket_exp (struct dfa *dfa)
               /* Treat [x-y] as a range if x != y.  */
               if (wc != wc2 || wc == WEOF)
                 {
-                  if (dfa->simple_locale
+                  if (dfa->localeinfo.simple
                       || (isasciidigit (c) & isasciidigit (c2)))
                     {
                       for (int ci = c; ci <= c2; ci++)
@@ -3389,7 +3342,7 @@ skip_remains_mb (struct dfa *d, unsigned char const *p,
     - [[:alpha:]] etc. in multibyte locale (except [[:digit:]] works OK)
     - back-reference: (.)\1
     - word-delimiter in multibyte locale: \<, \>, \b, \B
-   See using_simple_locale for the definition of "simple locale".  */
+   See struct localeinfo.simple for the definition of "simple locale".  */
 
 static inline char *
 dfaexec_main (struct dfa *d, char const *begin, char *end, bool allow_nl,
@@ -4352,7 +4305,6 @@ dfasyntax (struct dfa *dfa, struct localeinfo const 
*linfo,
 {
   memset (dfa, 0, offsetof (struct dfa, dfaexec));
   dfa->dfaexec = linfo->multibyte ? dfaexec_mb : dfaexec_sb;
-  dfa->simple_locale = using_simple_locale (linfo->multibyte);
   dfa->localeinfo = *linfo;
 
   dfa->fast = !dfa->localeinfo.multibyte;
diff --git a/support/localeinfo.c b/support/localeinfo.c
index 5c38849..694735e 100644
--- a/support/localeinfo.c
+++ b/support/localeinfo.c
@@ -48,17 +48,55 @@ is_using_utf8 (void)
   return mbrtowc (&wc, "\xc4\x80", 2, &mbs) == 2 && wc == 0x100;
 }
 
+/* Return true if the locale is compatible enough with the C locale so
+   that the locale is single-byte, bytes are in collating-sequence
+   order, and there are no multi-character collating elements.  */
+
+static bool
+using_simple_locale (bool multibyte)
+{
+  /* The native character set is known to be compatible with
+     the C locale.  The following test isn't perfect, but it's good
+     enough in practice, as only ASCII and EBCDIC are in common use
+     and this test correctly accepts ASCII and rejects EBCDIC.  */
+  enum { native_c_charset =
+    ('\b' == 8 && '\t' == 9 && '\n' == 10 && '\v' == 11 && '\f' == 12
+     && '\r' == 13 && ' ' == 32 && '!' == 33 && '"' == 34 && '#' == 35
+     && '%' == 37 && '&' == 38 && '\'' == 39 && '(' == 40 && ')' == 41
+     && '*' == 42 && '+' == 43 && ',' == 44 && '-' == 45 && '.' == 46
+     && '/' == 47 && '0' == 48 && '9' == 57 && ':' == 58 && ';' == 59
+     && '<' == 60 && '=' == 61 && '>' == 62 && '?' == 63 && 'A' == 65
+     && 'Z' == 90 && '[' == 91 && '\\' == 92 && ']' == 93 && '^' == 94
+     && '_' == 95 && 'a' == 97 && 'z' == 122 && '{' == 123 && '|' == 124
+     && '}' == 125 && '~' == 126)
+  };
+
+  if (!native_c_charset || multibyte)
+    return false;
+
+  /* As a heuristic, use strcoll to compare native character order.
+     If this agrees with byte order the locale should be simple.
+     This heuristic should work for all known practical locales,
+     although it would be invalid for artificially-constructed locales
+     where the native order is the collating-sequence order but there
+     are multi-character collating elements.  */
+  for (int i = 0; i < UCHAR_MAX; i++)
+    if (strcoll (((char []) {i, 0}), ((char []) {i + 1, 0})) <= 0)
+      return false;
+
+  return true;
+}
+
 /* Initialize *LOCALEINFO from the current locale.  */
 
 void
 init_localeinfo (struct localeinfo *localeinfo)
 {
-  int i;
-
   localeinfo->multibyte = MB_CUR_MAX > 1;
+  localeinfo->simple = using_simple_locale (localeinfo->multibyte);
   localeinfo->using_utf8 = is_using_utf8 ();
 
-  for (i = CHAR_MIN; i <= CHAR_MAX; i++)
+  for (int i = CHAR_MIN; i <= CHAR_MAX; i++)
     {
       char c = i;
       unsigned char uc = i;
diff --git a/support/localeinfo.h b/support/localeinfo.h
index a514016..c827a2b 100644
--- a/support/localeinfo.h
+++ b/support/localeinfo.h
@@ -28,6 +28,12 @@ struct localeinfo
   /* MB_CUR_MAX > 1.  */
   bool multibyte;
 
+  /* The locale is simple, like the C locale.  These locales can be
+     processed more efficiently, as they are single-byte, their native
+     character set is in collating-sequence order, and they do not
+     have multi-character collating elements.  */
+  bool simple;
+
   /* The locale uses UTF-8.  */
   bool using_utf8;
 

-----------------------------------------------------------------------

Summary of changes:
 support/ChangeLog    |  7 ++++++-
 support/dfa.c        | 54 +++-------------------------------------------------
 support/localeinfo.c | 44 +++++++++++++++++++++++++++++++++++++++---
 support/localeinfo.h |  6 ++++++
 4 files changed, 56 insertions(+), 55 deletions(-)


hooks/post-receive
-- 
gawk
[Prev in Thread]
Current Thread
[Next in Thread]
[SCM] gawk branch, master, updated. gawk-4.1.0-3851-g6d1580b, Arnold Robbins <=
Prev by Date: [SCM] gawk branch, feature/memory-work, updated. gawk-4.1.0-4373-gb8904d5
Next by Date: [SCM] gawk branch, porting, updated. gawk-4.1.0-3919-g4e0c297
Previous by thread: [SCM] gawk branch, feature/memory-work, updated. gawk-4.1.0-4373-gb8904d5
Next by thread: [SCM] gawk branch, porting, updated. gawk-4.1.0-3919-g4e0c297
Index(es):
- Date
- Thread