gawk-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[gawk-diffs] [SCM] gawk branch, master, updated. 61e6d1bdd8bb6518d6293dd


From: Arnold Robbins
Subject: [gawk-diffs] [SCM] gawk branch, master, updated. 61e6d1bdd8bb6518d6293ddf2da845c4195d8535
Date: Fri, 17 Jun 2011 08:04:08 +0000

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "gawk".

The branch, master has been updated
       via  61e6d1bdd8bb6518d6293ddf2da845c4195d8535 (commit)
      from  0479a809ad3a0a0437ce16f889d7b07a09c39323 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.sv.gnu.org/cgit/gawk.git/commit/?id=61e6d1bdd8bb6518d6293ddf2da845c4195d8535

commit 61e6d1bdd8bb6518d6293ddf2da845c4195d8535
Author: Arnold D. Robbins <address@hidden>
Date:   Fri Jun 17 11:03:41 2011 +0300

    Put RRI into code.

diff --git a/ChangeLog b/ChangeLog
index 8507b98..6e65583 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,18 @@
+Fri Jun 17 10:55:27 2011  Arnold D. Robbins  <address@hidden>
+
+       Implement Rational Range Interpretation (RRI) directly in code.
+
+       * regex.h [RE_RANGES_IGNORE_LOCALES]: Remove macro and its use.
+       * dfa.c (parse_bracket_exp): Remove use of RE_RANGES_IGNORE_LOCALES
+       and just do it in code.
+       (hard-locale.h): Remove include.
+       (hard_LC_COLLATE): Remove variable and its uses.
+       * re.c (resetup): Remove use of RE_RANGES_IGNORE_LOCALES.
+       * regcomp.c (build_range_exp): Remove use of RE_RANGES_IGNORE_LOCALES
+       and just do it in code. Remove cmp_buf array; it's no longer needed.
+       * Makefile.am (base_sources): Remove hard_locale.h and hard_locale.c.
+       * hard_locale.h, hard_locale.c: Removed from dist.
+
 Sun Jun 12 23:43:06 2011  Arnold D. Robbins  <address@hidden>
 
        * re.c (resetup): Always turn on RE_RANGES_IGNORE_LOCALES.
diff --git a/Makefile.am b/Makefile.am
index c89f642..d08ff17 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -96,8 +96,6 @@ base_sources = \
        getopt1.c \
        getopt_int.h \
        gettext.h \
-       hard-locale.h \
-       hard-locale.c \
        io.c \
        mbsupport.h \
        main.c \
diff --git a/Makefile.in b/Makefile.in
index 6348d56..4feab74 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -90,10 +90,9 @@ PROGRAMS = $(bin_PROGRAMS)
 am__objects_1 = array.$(OBJEXT) awkgram.$(OBJEXT) builtin.$(OBJEXT) \
        dfa.$(OBJEXT) ext.$(OBJEXT) field.$(OBJEXT) \
        floatcomp.$(OBJEXT) gawkmisc.$(OBJEXT) getopt.$(OBJEXT) \
-       getopt1.$(OBJEXT) hard-locale.$(OBJEXT) io.$(OBJEXT) \
-       main.$(OBJEXT) msg.$(OBJEXT) node.$(OBJEXT) random.$(OBJEXT) \
-       re.$(OBJEXT) regex.$(OBJEXT) replace.$(OBJEXT) \
-       version.$(OBJEXT)
+       getopt1.$(OBJEXT) io.$(OBJEXT) main.$(OBJEXT) msg.$(OBJEXT) \
+       node.$(OBJEXT) random.$(OBJEXT) re.$(OBJEXT) regex.$(OBJEXT) \
+       replace.$(OBJEXT) version.$(OBJEXT)
 am_dgawk_OBJECTS = $(am__objects_1) eval_d.$(OBJEXT) profile.$(OBJEXT) \
        command.$(OBJEXT) debug.$(OBJEXT)
 dgawk_OBJECTS = $(am_dgawk_OBJECTS)
@@ -372,8 +371,6 @@ base_sources = \
        getopt1.c \
        getopt_int.h \
        gettext.h \
-       hard-locale.h \
-       hard-locale.c \
        io.c \
        mbsupport.h \
        main.c \
@@ -530,7 +527,6 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @address@hidden/$(DEPDIR)/address@hidden@
 @AMDEP_TRUE@@am__include@ @address@hidden/$(DEPDIR)/address@hidden@
 @AMDEP_TRUE@@am__include@ @address@hidden/$(DEPDIR)/address@hidden@
address@hidden@@am__include@ @address@hidden/$(DEPDIR)/address@hidden@
 @AMDEP_TRUE@@am__include@ @address@hidden/$(DEPDIR)/address@hidden@
 @AMDEP_TRUE@@am__include@ @address@hidden/$(DEPDIR)/address@hidden@
 @AMDEP_TRUE@@am__include@ @address@hidden/$(DEPDIR)/address@hidden@
diff --git a/dfa.c b/dfa.c
index 2042dc3..f155742 100644
--- a/dfa.c
+++ b/dfa.c
@@ -64,7 +64,6 @@
 #endif
 #endif
 
-/* need this before include of hard-locale.h */
 #ifdef GAWK
 #define bool int
 #define true (1)
@@ -73,7 +72,6 @@
 
 #include "regex.h"
 #include "dfa.h"
-#include "hard-locale.h"
 #include "xalloc.h"
 
 #ifdef GAWK
@@ -650,7 +648,6 @@ static int laststart;               /* True if we're 
separated from beginning or (, |
                                    only by zero-width characters. */
 static int parens;             /* Count of outstanding left parens. */
 static int minrep, maxrep;     /* Repeat counts for {m,n}. */
-static int hard_LC_COLLATE;    /* Nonzero if LC_COLLATE is hard.  */
 
 static int cur_mb_len = 1;     /* Length of the multibyte representation of
                                    wctok.  */
@@ -1007,29 +1004,8 @@ parse_bracket_exp (void)
                   c1 = tolower (c1);
                   c2 = tolower (c2);
                 }
-              if (!hard_LC_COLLATE
-                  || (syntax_bits & RE_RANGES_IGNORE_LOCALES))
-                for (c = c1; c <= c2; c++)
-                  setbit_case_fold_c (c, ccl);
-              else
-                {
-                  /* Defer to the system regex library about the meaning
-                     of range expressions.  */
-                  regex_t re;
-                  char pattern[6] = { '[', 0, '-', 0, ']', 0 };
-                  char subject[2] = { 0, 0 };
-                 pattern[1] = c1;
-                 pattern[3] = c2;
-                  regcomp (&re, pattern, REG_NOSUB);
-                  for (c = 0; c < NOTCHAR; ++c)
-                    {
-                      subject[0] = c;
-                      if (!(case_fold && isupper (c))
-                          && regexec (&re, subject, 0, NULL, 0) != REG_NOMATCH)
-                        setbit_case_fold_c (c, ccl);
-                    }
-                  regfree (&re);
-                }
+              for (c = c1; c <= c2; c++)
+                setbit_case_fold_c (c, ccl);
             }
 
           colon_warning_state |= 8;
@@ -1821,9 +1797,6 @@ dfaparse (char const *s, size_t len, struct dfa *d)
   lasttok = END;
   laststart = 1;
   parens = 0;
-#ifdef LC_COLLATE
-  hard_LC_COLLATE = hard_locale (LC_COLLATE);
-#endif
 #if MBS_SUPPORT
   if (MB_CUR_MAX > 1)
     {
diff --git a/hard-locale.c b/hard-locale.c
deleted file mode 100644
index 8b7353b..0000000
--- a/hard-locale.c
+++ /dev/null
@@ -1,83 +0,0 @@
-/* hard-locale.c -- Determine whether a locale is hard.
-
-   Copyright (C) 1997, 1998, 1999, 2002, 2003, 2004, 2006, 2007, 2009, 2010,
-   2011,
-   Free Software Foundation, Inc.
-
-   This program is free software: you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3 of the License, or
-   (at your option) any later version.
-
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
-
-#include <config.h>
-
-#ifdef GAWK
-#define bool int
-#define true (1)
-#define false (0)
-#endif
-
-#include "hard-locale.h"
-
-#ifdef HAVE_LOCALE_H
-#include <locale.h>
-#endif
-#include <stdlib.h>
-#include <string.h>
-
-#ifdef __GLIBC__
-# define GLIBC_VERSION __GLIBC__
-#else
-# define GLIBC_VERSION 0
-#endif
-
-/* Return true if the current CATEGORY locale is hard, i.e. if you
-   can't get away with assuming traditional C or POSIX behavior.  */
-bool
-hard_locale (int category)
-{
-#if ! (defined ENABLE_NLS && HAVE_SETLOCALE)
-  return 0;
-#else
-  bool hard = true;
-  char const *p = setlocale (category, NULL);
-
-  if (p)
-    {
-      if (2 <= GLIBC_VERSION)
-        {
-          if (strcmp (p, "C") == 0 || strcmp (p, "POSIX") == 0)
-            hard = false;
-        }
-      else
-        {
-          char *locale = strdup (p);
-          if (locale)
-            {
-              /* Temporarily set the locale to the "C" and "POSIX" locales
-                 to find their names, so that we can determine whether one
-                 or the other is the caller's locale.  */
-              if (((p = setlocale (category, "C"))
-                   && strcmp (p, locale) == 0)
-                  || ((p = setlocale (category, "POSIX"))
-                      && strcmp (p, locale) == 0))
-                hard = false;
-
-              /* Restore the caller's locale.  */
-              setlocale (category, locale);
-              free (locale);
-            }
-        }
-    }
-
-  return hard;
-#endif
-}
diff --git a/hard-locale.h b/hard-locale.h
deleted file mode 100644
index 160d544..0000000
--- a/hard-locale.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/* Determine whether a locale is hard.
-
-   Copyright (C) 1999, 2003, 2004, 2009, 2010, 2011 Free Software Foundation, 
Inc.
-
-   This program is free software: you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3 of the License, or
-   (at your option) any later version.
-
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
-
-#ifndef HARD_LOCALE_H_
-# define HARD_LOCALE_H_ 1
-
-#ifndef GAWK
-# include <stdbool.h>
-#endif
-
-bool hard_locale (int);
-
-#endif /* HARD_LOCALE_H_ */
diff --git a/re.c b/re.c
index 2e1a37e..234384b 100644
--- a/re.c
+++ b/re.c
@@ -388,20 +388,6 @@ resetup()
                syn = RE_SYNTAX_GNU_AWK;        /* POSIX re's + GNU ops */
 
        /*
-        * As of POSIX 1003.1-2008 (see rule 7 of 
-        * 
http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html#tag_09_03_05
-        * and the rationale, at 
http://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xbd_chap09.html#tag_21_09_03_05)
-        * POSIX changed ranges outside the POSIX locale from requiring
-        * Collation Element Order to being "undefined". This gives an
-        * implementation, like gawk, the freedom to do ranges as it
-        * pleases.
-        *
-        * We very much please to always use numeric ordering, as
-        * the Good Lord intended.
-        */
-       syn |= RE_RANGES_IGNORE_LOCALES;
-
-       /*
         * Interval expressions are now on by default, as POSIX is
         * wide-spread enough that people want it. The do_intervals
         * variable remains for use with --traditional.
diff --git a/regcomp.c b/regcomp.c
index 22c79cd..a181d63 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -2651,7 +2651,6 @@ build_range_exp (reg_syntax_t syntax, bitset_t sbcset,
 # endif /* not RE_ENABLE_I18N */
 {
   unsigned int start_ch, end_ch;
-  int ignore_locales = (syntax & RE_RANGES_IGNORE_LOCALES) != 0;
 
   /* Equivalence Classes and Character Classes can't be a range start/end.  */
   if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS
@@ -2672,7 +2671,6 @@ build_range_exp (reg_syntax_t syntax, bitset_t sbcset,
     wchar_t wc;
     wint_t start_wc;
     wint_t end_wc;
-    wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
 
     start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch
                : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
@@ -2698,12 +2696,7 @@ build_range_exp (reg_syntax_t syntax, bitset_t sbcset,
 #endif
     if (start_wc == WEOF || end_wc == WEOF)
       return REG_ECOLLATE;
-    cmp_buf[0] = start_wc;
-    cmp_buf[4] = end_wc;
-    if (ignore_locales && start_wc > end_wc)
-      return REG_ERANGE;
-    else if ((syntax & RE_NO_EMPTY_RANGES)
-             && wcscoll (cmp_buf, cmp_buf + 4) > 0)
+    else if ((syntax & RE_NO_EMPTY_RANGES) && start_wc > end_wc)
       return REG_ERANGE;
 
     /* Got valid collation sequence values, add them as a new entry.
@@ -2742,23 +2735,10 @@ build_range_exp (reg_syntax_t syntax, bitset_t sbcset,
       }
 
     /* Build the table for single byte characters.  */
-    if (ignore_locales)
-      {
-        for (wc = 0; wc < SBC_MAX; ++wc)
-          {
-            if (start_wc <= wc && wc <= end_wc)
-              bitset_set (sbcset, wc);
-          }
-      }
-    else
+    for (wc = 0; wc < SBC_MAX; ++wc)
       {
-        for (wc = 0; wc < SBC_MAX; ++wc)
-          {
-            cmp_buf[2] = wc;
-            if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
-                && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
-              bitset_set (sbcset, wc);
-          }
+         if (start_wc <= wc && wc <= end_wc)
+           bitset_set (sbcset, wc);
       }
   }
 # else /* not RE_ENABLE_I18N */
diff --git a/regex.h b/regex.h
index a2d120f..6bc503b 100644
--- a/regex.h
+++ b/regex.h
@@ -184,10 +184,6 @@ typedef unsigned long int reg_syntax_t;
 /* If this bit is set, then no_sub will be set to 1 during
    re_compile_pattern.  */
 # define RE_NO_SUB (RE_CONTEXT_INVALID_DUP << 1)
-
-/* If this bit is set, then ranges act like they are in
-   the "C" locale.  */
-# define RE_RANGES_IGNORE_LOCALES (RE_NO_SUB << 1)
 #endif
 
 /* This global variable defines the particular regexp syntax to use (for
@@ -213,7 +209,6 @@ extern reg_syntax_t re_syntax_options;
 
 #define RE_SYNTAX_GNU_AWK                                              \
   ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS            \
-    | RE_RANGES_IGNORE_LOCALES                                          \
     | RE_INVALID_INTERVAL_ORD)                                         \
    & ~(RE_DOT_NOT_NULL | RE_CONTEXT_INDEP_OPS                          \
       | RE_CONTEXT_INVALID_OPS ))

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog     |   15 ++++++++++
 Makefile.am   |    2 -
 Makefile.in   |   10 ++-----
 dfa.c         |   31 +--------------------
 hard-locale.c |   83 ---------------------------------------------------------
 hard-locale.h |   27 ------------------
 re.c          |   14 ---------
 regcomp.c     |   28 +++----------------
 regex.h       |    5 ---
 9 files changed, 24 insertions(+), 191 deletions(-)
 delete mode 100644 hard-locale.c
 delete mode 100644 hard-locale.h


hooks/post-receive
-- 
gawk



reply via email to

[Prev in Thread] Current Thread [Next in Thread]