gawk-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[SCM] gawk branch, feature/minrx, updated. gawk-4.1.0-5659-g5aa09a55


From: Arnold Robbins
Subject: [SCM] gawk branch, feature/minrx, updated. gawk-4.1.0-5659-g5aa09a55
Date: Sat, 20 Jul 2024 15:24:51 -0400 (EDT)

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "gawk".

The branch, feature/minrx has been updated
       via  5aa09a55106ecb72d3145e51b025c9fffe9796e5 (commit)
      from  da96ad23e44068f5904121e2881531251bb45777 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.sv.gnu.org/cgit/gawk.git/commit/?id=5aa09a55106ecb72d3145e51b025c9fffe9796e5

commit 5aa09a55106ecb72d3145e51b025c9fffe9796e5
Author: Arnold D. Robbins <arnold@skeeve.com>
Date:   Sat Jul 20 22:23:41 2024 +0300

    Include minrx.{h,cpp} and some doc updates.

diff --git a/ChangeLog b/ChangeLog
index 9bd71428..f7888c52 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+2024-07-20         Arnold D. Robbins     <arnold@skeeve.com>
+
+       * version.c: Add "-minrx" to the version, for now.
+
 2024-07-02         Arnold D. Robbins     <arnold@skeeve.com>
 
        * re.c (make_regexp): \u escapes also now treated literally
diff --git a/README_d/ChangeLog b/README_d/ChangeLog
index 0fd6b5b6..a815dc00 100644
--- a/README_d/ChangeLog
+++ b/README_d/ChangeLog
@@ -1,3 +1,7 @@
+2024-07-20         Arnold D. Robbins     <arnold@skeeve.com>
+
+       * README.matchers: Updated.
+
 2024-03-10         Arnold D. Robbins     <arnold@skeeve.com>
 
        * README.matchers: New file.
diff --git a/README_d/README.matchers b/README_d/README.matchers
index 80278993..50a726e3 100644
--- a/README_d/README.matchers
+++ b/README_d/README.matchers
@@ -1,11 +1,11 @@
-Sun 10 Mar 2024 10:39:42 IST
-============================
+Sat Jul 20 10:18:28 PM IDT 2024
+===============================
 
                * I * M * P * O * R * T * A * N * T *
 
 This release includes a new regular expression matcher, MinRX, written
 by Mike Haertel, the original author of GNU grep. It's available from
-GITHUB URL HERE.
+https://github.com/mikehaertel/minrx.
 
 This matcher is fully POSIX compliant, which the current GNU matchers
 are not. In particular it follows POSIX rules for finding the longest
diff --git a/support/ChangeLog b/support/ChangeLog
index 346c2ee2..415829be 100644
--- a/support/ChangeLog
+++ b/support/ChangeLog
@@ -1,3 +1,7 @@
+2024-07-20         Arnold D. Robbins     <arnold@skeeve.com>
+
+       * minrx.h, minrx.cpp: New files.
+
 2024-07-16         Arnold D. Robbins     <arnold@skeeve.com>
 
        * Makefile.am (CXXFLAGS): Added for new version of minrx.
diff --git a/support/minrx.cpp b/support/minrx.cpp
new file mode 100644
index 00000000..ceb4b78b
--- /dev/null
+++ b/support/minrx.cpp
@@ -0,0 +1,1176 @@
+//
+// MinRX: a minimal matcher for POSIX Extended Regular Expressions.
+// Copyright (C) 2023, 2024 Michael J. Haertel.
+//
+// This file is part of MinRX.
+//
+// MinRX is free software; you can redistribute it and/or modify it
+// under the terms of the GNU Lesser General Public License as published
+// by the Free Software Foundation; either version 3 of the License, or
+// (at your option) any later version.
+//
+// MinRX is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+// See the GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+
+#include <cctype>
+#include <climits>
+#include <clocale>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <cwchar>
+#include <cwctype>
+#include <algorithm>
+#include <deque>
+#include <map>
+#include <mutex>
+#include <optional>
+#include <set>
+#include <string>
+#include <tuple>
+#include <vector>
+
+#include "minrx.h"
+
+namespace MinRX {
+
+template <typename UINT> inline auto ctz(UINT x) { return __builtin_ctz(x); }
+template <> inline auto ctz(unsigned long x) { return __builtin_ctzl(x); }
+template <> inline auto ctz(unsigned long long x) { return __builtin_ctzll(x); 
}
+
+template <typename TYPE, TYPE INIT = 0>
+struct COWVec {
+       struct Storage;
+       struct Allocator {
+               std::size_t length;
+               Storage *freelist = nullptr;
+               Allocator(std::size_t length)
+               : length(length)
+               {}
+               ~Allocator() {
+                       for (Storage *s = freelist, *sfreelink = nullptr; s != 
nullptr; s = sfreelink) {
+                               sfreelink = s->u.freelink;
+                               ::operator delete(s);
+                       }
+               }
+               Storage *alloc() {
+                       Storage *r;
+                       if (freelist) {
+                               r = freelist;
+                               freelist = r->u.freelink;
+                               r->u.allocator = this;
+                               r->refcnt = 1;
+                       } else {
+                               void *p = ::operator new(sizeof (Storage) + 
(length - 1) * sizeof (TYPE));
+                               r = new (p) Storage(*this);
+                       }
+                       for (std::size_t i = 0; i != length; ++i)
+                               (*r)[i] = INIT;
+                       return r;
+               }
+               void dealloc(Storage *s) {
+                       s->u.freelink = freelist;
+                       freelist = s;
+               }
+       };
+       struct Storage {
+               union {
+                       Allocator *allocator;
+                       Storage *freelink;
+               } u;
+               std::size_t refcnt;
+               TYPE hack[1];
+               const TYPE &operator[](std::size_t i) const { return 
(&hack[0])[i]; }
+               TYPE &operator[](std::size_t i) { return (&hack[0])[i]; }
+               Storage *clone() {
+                       auto s = u.allocator->alloc();
+                       for (std::size_t i = 0; i != u.allocator->length; ++i)
+                               (*s)[i] = (*this)[i];
+                       return s;
+               }
+               Storage(Allocator &a)
+               : u({&a})
+               , refcnt(1)
+               {}
+       };
+       Storage *storage;
+       COWVec(): storage(nullptr) {}
+       COWVec(Allocator &a): storage(a.alloc()) {}
+       COWVec(const COWVec &cv): storage(cv.storage) { ++storage->refcnt; }
+       COWVec(COWVec &&cv): storage(cv.storage) { cv.storage = nullptr; }
+       COWVec &operator=(const COWVec &cv) {
+               ++cv.storage->refcnt;
+               if (storage && --storage->refcnt == 0)
+                       storage->u.allocator->dealloc(storage);
+               storage = cv.storage;
+               return *this;
+       }
+       COWVec &operator=(COWVec &&cv) {
+               if (storage && --storage->refcnt == 0)
+                       storage->u.allocator->dealloc(storage);
+               storage = cv.storage;
+               cv.storage = nullptr;
+               return *this;
+       }
+       ~COWVec() { if (storage && --storage->refcnt == 0) 
storage->u.allocator->dealloc(storage); }
+       bool cmpgt(const COWVec &other, std::size_t limit) const {
+               std::size_t i = 0;
+               TYPE *xv = &(*storage)[0];
+               TYPE *yv = &(*other.storage)[0];
+               while (i < limit && xv[i] == yv[i])
+                       ++i;
+               if (i == limit)
+                       return false;
+               return xv[i] > yv[i];
+       }
+       const TYPE &get(std::size_t idx) const { return (*storage)[idx]; }
+       COWVec &put(std::size_t idx, TYPE val) {
+               if ((*storage)[idx] == val)
+                       return *this;
+               if (storage->refcnt > 1) {
+                       --storage->refcnt;
+                       storage = storage->clone();
+                       storage->refcnt = 1;
+               }
+               (*storage)[idx] = val;
+               return *this;
+       }
+       COWVec &sub(std::size_t idx, TYPE val) {
+               if (storage->refcnt > 1) {
+                       --storage->refcnt;
+                       storage = storage->clone();
+                       storage->refcnt = 1;
+               }
+               (*storage)[idx] -= val;
+               return *this;
+       }
+};
+
+template <typename UINT>
+struct QSet {
+       std::uint64_t *bits[10];
+       unsigned int depth = 0;
+       QSet(UINT limit) {
+               std::size_t s[10], t = 0;
+               do
+                       t += (limit = s[depth++] = (limit + 63u) / 64u);
+               while (limit > 1);
+               bits[0] = (std::uint64_t *) ::operator new(t * sizeof 
(std::uint64_t));
+#if defined(__GNUC__) && !defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
+#endif
+               for (unsigned int i = 1; i < depth; ++i)
+                       bits[i] = bits[i - 1] + s[i - 1];
+#if defined(__GNUC__) && !defined(__clang__)
+#pragma GCC diagnostic pop
+#endif
+               bits[depth - 1][0] = 0;
+       }
+       ~QSet() { ::operator delete(bits[0]); }
+       bool contains(UINT k) const {
+               for (auto d = depth; d > 0; ) {
+                       std::size_t idx = k >> 6 * d;
+                       unsigned int bit = (k >> 6 * --d) & 0x3f;
+                       if ((bits[d][idx] & ((std::uint64_t) 1 << bit)) == 0)
+                               return false;
+               }
+               return true;
+       }
+       bool empty() const { return !bits[depth - 1][0]; }
+       UINT remove() {
+               UINT k = 0;
+               auto d = depth;
+               do {
+                       auto m = bits[--d][k];
+                       k = (k << 6) | ctz(m);
+               } while (d != 0);
+               UINT r = k;
+               for (; d < depth && !(bits[d][k >> 6] &= ~((std::uint64_t) 1 << 
(k & 0x3f))); k >>= 6, ++d)
+                       ;
+               return r;
+       }
+       bool insert(UINT k) {
+               bool r = false;
+               for (auto d = depth; d-- != 0; ) {
+                       auto bp = bits[d] + ((k >> 6 * d) >> 6);
+                       std::uint64_t m = (std::uint64_t) 1 << ((k >> 6 * d) & 
0x3f);
+                       if ((*bp & m) == 0) {
+                               if (d)
+                                       bits[d - 1][k >> (6 * d)] = 0;
+                               else
+                                       r = true;
+                       }
+                       *bp |= m;
+               }
+               return r;
+       }
+};
+
+template <typename UINT, typename DATA>
+struct QVec {
+       QSet<UINT> qset;
+       DATA *storage;
+       QVec(UINT l): qset(l), storage(static_cast<DATA *>(::operator new(l * 
sizeof (DATA)))) {}
+       ~QVec() {
+               clear();
+               ::operator delete(storage);
+       }
+       void clear() {
+               while (!qset.empty()) {
+                       auto i = qset.remove();
+                       storage[i].~DATA();
+               }
+       }
+       bool contains(UINT k) const { return qset.contains(k); }
+       bool empty() const { return qset.empty(); }
+       std::tuple<bool, DATA&> insert(UINT k, const DATA& v) {
+               bool r = qset.insert(k);
+               if (r)
+                       new (storage + k) DATA(v);
+               return {r, storage[k]};
+       }
+       DATA &lookup(UINT k) { return storage[k]; }
+       const DATA &lookup(UINT k) const { return storage[k]; }
+       std::tuple<UINT, DATA> remove() {
+               auto k = qset.remove();
+               return {k, std::move(storage[k])};
+       }
+};
+
+typedef int32_t WChar;                 // because wchar_t may not be 32 bits
+constexpr int32_t WCharMax = 0x10FFFF; // maximum code point: valid for 
Unicode and (FIXME!) blithely assumed for non-Unicode
+struct WConv {
+       enum { End = -1 };
+       const char *const bp;
+       const char *const ep;
+       const char *cp;
+       std::mbstate_t mbs;
+       WChar wch = End;
+       int len = 0;
+       WConv(const WConv &) = default;
+       WConv(const char *bp, const char *ep): bp(bp), ep(ep), cp(bp) { 
std::memset(&mbs, 0, sizeof mbs); }
+       auto look() const { return wch; }
+       auto lookahead(WConv &(WConv::*next)()) const { return 
(WConv(*this).*next)().look(); }
+       WConv &nextchr() {
+               wchar_t wct = L'\0';
+               if ((cp += len) != ep) {
+                       auto n = mbrtowc(&wct, cp, ep - cp, &mbs);
+                       if (n == 0 || n == (std::size_t) -1 || n == 
(std::size_t) -2)
+                               len = 1;
+                       else
+                               len = n;
+                       wch = wct;
+               } else {
+                       wch = End, len = 0;
+               }
+               return *this;
+       }
+       auto save() { return std::make_tuple(cp, wch, len); }
+       void restore(std::tuple<const char *, WChar, int> t) { std::tie(cp, 
wch, len) = t; }
+};
+
+struct CSet {
+       struct Range {
+               Range(WChar x, WChar y): min(std::min(x, y)), max(std::max(x, 
y)) {}
+               WChar min, max;
+               int operator<=>(const Range &r) const {
+                       return (min > r.max) - (max < r.min);
+               }
+       };
+       std::set<Range> ranges;
+       bool inverted = false;
+       CSet &operator|=(const CSet &cs) {
+               for (const auto &e : cs.ranges)
+                       set(e.min, e.max);
+               return *this;
+       }
+       CSet &invert() { inverted = true; return *this; }
+       CSet &set(WChar wclo, WChar wchi) {
+               auto e = Range(wclo - (wclo != -2147483648), wchi + (wchi != 
2147483647));
+               auto [x, y] = ranges.equal_range(e);
+               if (x == y) {
+                       ranges.insert(Range(wclo, wchi));
+               } else {
+                       if (x->max >= e.min)
+                               wclo = std::min(wclo, x->min);
+                       auto z = y;
+                       --z;
+                       if (z->min <= e.max)
+                               wchi = std::max(wchi, z->max);
+                       auto i = ranges.erase(x, y);
+                       ranges.insert(i, Range(wclo, wchi));
+               }
+               return *this;
+       }
+       CSet &set(WChar wc) { return set(wc, wc); }
+       bool test(WChar wc) const {
+               auto i = ranges.lower_bound(Range(wc, wc));
+               return inverted ^ (i != ranges.end() && wc >= i->min && wc <= 
i->max);
+       }
+};
+
+typedef std::size_t NInt;
+
+struct Node {
+       enum Type {
+               // character-matching node types
+               /* char <= uchar max */ // no args
+               CSet = WCharMax + 1,    // args = index in Regexp::csets vector
+               // epsilon-matching node types
+               Exit,                   // no args
+               Fork,                   // args = priority delta, offset to next
+               Goto,                   // args = offset to join, offset to next
+               Join,                   // args = offset to fork
+               Loop,                   // args = offset to next, optional flag
+               Next,                   // args = offset to loop, infinite flag
+               SubL,                   // args = minimum and maximum contained 
subexpression numbers
+               SubR,                   // args = minimum and maximum contained 
subexpression numbers
+               ZBOB,                   // no args - match empty string at 
beginning of buffer
+               ZEOB,                   // no args - match empty string at end 
of buffer
+               ZBOL,                   // no args - match empty string at 
beginning of buffer or following \n
+               ZEOL,                   // no args - match empty string at end 
of buffer or preceding \n
+               ZBOW,                   // no args - match empty string at 
beginning of word
+               ZEOW,                   // no args - match empty string at end 
of word
+               ZXOW,                   // no args - match empty string at 
either end of word
+               ZNWB                    // no args - match empty string at 
non-word-boundary
+       };
+       NInt type;
+       NInt args[2];
+       NInt nstk;
+};
+
+struct Regexp {
+       const std::vector<CSet> csets;
+       const std::vector<Node> nodes;
+       std::size_t nstk;
+       std::size_t nsub;
+       minrx_result_t err;
+};
+
+struct Compile {
+       const minrx_regcomp_flags_t flags;
+       WConv wconv;
+       std::vector<CSet> csets;
+       std::optional<std::size_t> dot;
+       std::map<WChar, unsigned int> icmap;
+       NInt nsub = 0;
+       Compile(const char *bp, const char *ep, minrx_regcomp_flags_t flags): 
flags(flags), wconv(bp, ep) { wconv.nextchr(); }
+       static std::map<std::string, CSet> cclmemo;
+       static std::mutex cclmutex;
+       bool cclass(CSet &cs, const std::string &name) {
+               auto wct = std::wctype(name.c_str());
+               if (wct) {
+                       std::string key = name + ":" + std::setlocale(LC_CTYPE, 
NULL) + ":" + ((flags & MINRX_REG_ICASE) != 0 ? "1" : "0");
+                       std::lock_guard<std::mutex> lock(cclmutex);
+                       auto i = cclmemo.find(key);
+                       if (i == cclmemo.end()) {
+                               CSet cs;
+                               for (WChar wc = 0; wc <= WCharMax; ++wc) {
+                                       if (iswctype(wc, wct)) {
+                                               cs.set(wc);
+                                               if ((flags & MINRX_REG_ICASE) 
!= 0) {
+                                                       
cs.set(std::towlower(wc));
+                                                       
cs.set(std::towupper(wc));
+                                               }
+                                       }
+                               }
+                               cclmemo.emplace(key, cs);
+                               i = cclmemo.find(key);
+                       }
+                       cs |= i->second; // N.B. could probably be safely 
outside the critical section, since cclmemo entries are never deleted
+                       return true;
+               }
+               return false;
+       }
+       bool num(NInt &n) {
+               auto satmul = [](NInt x, NInt y) -> NInt {
+                       return (x == 0 || y == 0) ? 0 : ((x * y / x == y) ? x * 
y : -1);
+               };
+               auto wc = wconv.look();
+               if (wc < L'0' || wc > L'9')
+                       return false;
+               NInt v = 0;
+               do {
+                       v = satmul(v, 10);
+                       if (v == (NInt) -1 || (v += wc - L'0') < (NInt) wc - 
L'0') {
+                               do
+                                       wc = wconv.nextchr().look();
+                               while (wc >= L'0' && wc <= L'9');
+                               n = -1;
+                               return true;
+                       }
+                       wc = wconv.nextchr().look();
+               } while (wc >= L'0' && wc <= L'9');
+               n = v;
+               return true;
+       }
+       typedef std::tuple<std::deque<Node>, std::size_t, minrx_result_t> 
Subexp;
+       Subexp alt(bool nested, NInt nstk) {
+               auto [lhs, lhmaxstk, err] = cat(nested, nstk);
+               if (err)
+                       return {lhs, lhmaxstk, err};
+               if (wconv.look() == L'|') {
+                       for (auto &l : lhs)
+                               l.nstk += 2;
+                       std::vector<Subexp> alts;
+                       while (wconv.look() == L'|') {
+                               wconv.nextchr();
+                               alts.push_back(cat(nested, nstk + 1));
+                       }
+                       auto [rhs, rhmaxstk, err] = alts.back();
+                       if (err)
+                               return {rhs, rhmaxstk, err};
+                       rhs.push_front({Node::Goto, {rhs.size(), rhs.size()}, 
nstk + 1});
+                       alts.pop_back();
+                       while (!alts.empty()) {
+                               auto [mhs, mhmaxstk, _] = alts.back();
+                               alts.pop_back();
+                               rhs.insert(rhs.begin(), mhs.begin(), mhs.end());
+                               rhmaxstk = std::max(mhmaxstk, rhmaxstk);
+                               rhs.push_front({Node::Goto, {rhs.size(), 
mhs.size()}, nstk + 1});
+                       }
+                       lhs.push_front({Node::Fork, {(NInt) -1, lhs.size()}, 
nstk});
+                       lhs.insert(lhs.end(), rhs.begin(), rhs.end());
+                       lhmaxstk = std::max(lhmaxstk, rhmaxstk);
+                       lhs.push_back({Node::Join, {lhs.size() - 1, 0}, nstk + 
1});
+               }
+               return {lhs, lhmaxstk, MINRX_REG_SUCCESS};
+       }
+       Subexp cat(bool nested, NInt nstk) {
+               auto [lhs, lhmaxstk, err] = rep(nested, nstk);
+               if (err)
+                       return {lhs, lhmaxstk, err};
+               auto wc = wconv.look();
+               while (wc != WConv::End && wc != L'|' && (wc != L')' || 
!nested)) {
+                       auto [rhs, rhmaxstk, err] = rep(nested, nstk);
+                       if (err)
+                               return {rhs, rhmaxstk, err};
+                       lhs.insert(lhs.end(), rhs.begin(), rhs.end());
+                       lhmaxstk = std::max(lhmaxstk, rhmaxstk);
+                       wc = wconv.look();
+               }
+               return {lhs, lhmaxstk, MINRX_REG_SUCCESS};
+       }
+       Subexp mkrep(const Subexp &lh, bool optional, bool infinite, NInt nstk) 
{
+               auto [lhs, lhmaxstk, _] = lh;
+               if (optional && !infinite) {
+                       for (auto &l : lhs) l.nstk += 1;
+                       auto lhsize = lhs.size();
+                       lhs.push_front({Node::Fork, {(NInt) 1, lhsize}, nstk});
+                       lhs.push_back({Node::Join, {lhsize, 0}, nstk + 1});
+                       return {lhs, lhmaxstk + 1, MINRX_REG_SUCCESS};
+               } else {
+                       for (auto &l : lhs) l.nstk += 3;
+                       auto lhsize = lhs.size();
+                       lhs.push_front({Node::Loop, {lhsize, (NInt) optional}, 
nstk});
+                       lhs.push_back({Node::Next, {lhsize, (NInt) infinite}, 
nstk + 3});
+                       return {lhs, lhmaxstk + 3, MINRX_REG_SUCCESS};
+               }
+       }
+       Subexp mkrep(const Subexp &lh, NInt m, NInt n, NInt nstk) {
+               if ((m != (NInt) -1 && m > RE_DUP_MAX) || (n != (NInt) -1 && n 
> RE_DUP_MAX) || m > n)
+                       return {{}, 0, MINRX_REG_BADBR};
+               if (n == 0)
+                       return {{}, 0, MINRX_REG_SUCCESS};
+               if (m == 0 && n == 1)
+                       return mkrep(lh, true, false, nstk);
+               if (m == 0 && n == (NInt) -1)
+                       return mkrep(lh, true, true, nstk);
+               if (m == 1 && n == 1)
+                       return lh;
+               if (m == 1 && n == (NInt) -1)
+                       return mkrep(lh, false, true, nstk);
+               auto [lhs, lhmaxstk, _] = lh;
+               auto [rhs, rhmaxstk, __] = lh;
+               NInt k;
+               for (k = 1; k < m; ++k)
+                       lhs.insert(lhs.end(), rhs.begin(), rhs.end());
+               if (n != (NInt) -1 && k < n) {
+                       lhmaxstk += 1;
+                       rhmaxstk += 1;
+                       for (auto &r : rhs)
+                               r.nstk += 1;
+                       auto rhsize = rhs.size();
+                       rhs.push_front({Node::Fork, {1, rhsize}, nstk});
+                       rhs.push_back({Node::Join, {rhsize, 0}, nstk + 1});
+                       for (; k < n; ++k)
+                               lhs.insert(lhs.end(), rhs.begin(), rhs.end());
+               }
+               if (n == (NInt) -1) {
+                       lhmaxstk += 3;
+                       rhmaxstk += 3;
+                       for (auto &r : rhs)
+                               r.nstk += 3;
+                       auto rhsize = rhs.size();
+                       rhs.push_front({Node::Loop, {rhsize, 0}, nstk});
+                       rhs.push_back({Node::Next, {rhsize, 1}, nstk + 3});
+                       lhs.insert(lhs.end(), rhs.begin(), rhs.end());
+               }
+               if (m == 0)
+                       return mkrep({lhs, rhmaxstk, MINRX_REG_SUCCESS}, true, 
false, nstk);
+               else
+                       return {lhs, rhmaxstk, MINRX_REG_SUCCESS};
+       }
+       Subexp rep(bool nested, NInt nstk) {
+               auto lh = chr(nested, nstk);
+               if (std::get<minrx_result_t>(lh))
+                       return lh;
+               bool optional = false, infinite = false;
+               for (;;) {
+                       auto wc = wconv.look();
+                       switch (wc) {
+                       case L'?':
+                               optional = true;
+                               wconv.nextchr();
+                               continue;
+                       case L'*':
+                               optional = infinite = true;
+                               wconv.nextchr();
+                               continue;
+                       case L'+':
+                               infinite = true;
+                               wconv.nextchr();
+                               continue;
+                       case L'{':
+                               if ((flags & MINRX_REG_BRACE_COMPAT) == 0 || 
std::isdigit(wconv.lookahead(&WConv::nextchr))) {
+                                       if (optional || infinite) {
+                                               lh = mkrep(lh, optional, 
infinite, nstk);
+                                               optional = infinite = false;
+                                       }
+                                       wc = wconv.nextchr().look();
+                                       if (wc == WConv::End)
+                                               return {{}, 0, 
MINRX_REG_EBRACE};
+                                       NInt m, n;
+                                       if (!num(m))
+                                               return {{}, 0, MINRX_REG_BADBR};
+                                       wc = wconv.look();
+                                       if (wc == L'}') {
+                                               lh = mkrep(lh, m, m, nstk);
+                                               wconv.nextchr();
+                                               continue;
+                                       }
+                                       if (wc == WConv::End)
+                                               return {{}, 0, 
MINRX_REG_EBRACE};
+                                       if (wc != L',')
+                                               return {{}, 0, MINRX_REG_BADBR};
+                                       wc = wconv.nextchr().look();
+                                       if (wc == L'}') {
+                                               lh = mkrep(lh, m, -1, nstk);
+                                               wconv.nextchr();
+                                               continue;
+                                       }
+                                       if (!num(n))
+                                               return {{}, 0, MINRX_REG_BADBR};
+                                       wc = wconv.look();
+                                       if (wc == WConv::End)
+                                               return {{}, 0, 
MINRX_REG_EBRACE};
+                                       if (wc != L'}')
+                                               return {{}, 0, MINRX_REG_BADBR};
+                                       lh = mkrep(lh, m, n, nstk);
+                                       wconv.nextchr();
+                                       continue;
+                               }
+                               // fall through
+                       default:
+                               if (optional || infinite) {
+                                       lh = mkrep(lh, optional, infinite, 
nstk);
+                                       optional = infinite = false;
+                               }
+                               return lh;
+                       }
+               }
+       }
+       Subexp chr(bool nested, NInt nstk) {
+               std::deque<Node> lhs;
+               std::size_t lhmaxstk;
+               auto wc = wconv.look();
+               switch (wc) {
+               default:
+               normal:
+                       lhmaxstk = nstk;
+                       if ((flags & MINRX_REG_ICASE) == 0) {
+                               lhs.push_back({(NInt) wc, {0, 0}, nstk});
+                       } else {
+                               WChar wcl = std::towlower(wc), wcu = 
std::towupper(wc);
+                               auto key = std::min(wc, std::min(wcl, wcu));
+                               if (icmap.find(key) == icmap.end()) {
+                                       icmap.emplace(key, csets.size());
+                                       csets.emplace_back();
+                                       csets.back().set(wc);
+                                       csets.back().set(wcl);
+                                       csets.back().set(wcu);
+                               }
+                               lhs.push_back({Node::CSet, {icmap[key], 0}, 
nstk});
+                       }
+                       wconv.nextchr();
+                       break;
+               case L'{':
+                       if ((flags & MINRX_REG_BRACE_COMPAT) != 0 && 
!std::isdigit(wconv.lookahead(&WConv::nextchr)))
+                               goto normal;
+                       // fall through
+               case L'*':
+               case L'+':
+               case L'?':
+                       return {{}, 0, MINRX_REG_BADRPT};
+               case L'[':
+                       {
+                               lhmaxstk = nstk;
+                               wc = wconv.nextchr().look();
+                               bool invert = wc == L'^';
+                               if (invert)
+                                       wc = wconv.nextchr().look();
+                               CSet cs;
+                               for (bool first = true;; first = false) {
+                                       auto wclo = wc, wchi = wc;
+                                       if (wclo == WConv::End)
+                                               return {{}, 0, 
MINRX_REG_EBRACK};
+                                       wc = wconv.nextchr().look();
+                                       if (wclo == L']' && !first)
+                                               break;
+                                       if (wclo == L'\\' && (flags & 
MINRX_REG_BRACK_ESCAPE) != 0) {
+                                               if (wc != WConv::End) {
+                                                       wclo = wchi = wc;
+                                                       wc = 
wconv.nextchr().look();
+                                               } else {
+                                                       return {{}, 0, 
MINRX_REG_EESCAPE};
+                                               }
+                                       } else if (wclo == L'[') {
+                                               if (wc == L'.') {
+                                                       wc = 
wconv.nextchr().look();
+                                                       wclo = wchi = wc;
+                                                       wc = 
wconv.nextchr().look();
+                                                       if (wc != L'.' || (wc = 
wconv.nextchr().look() != L']'))
+                                                               return {{}, 0, 
MINRX_REG_ECOLLATE};
+                                               } else if (wc == L':') {
+                                                       wconv.nextchr();
+                                                       // FIXME: search for 
the matching :] using wconv.nextchr() rather than memchr()
+                                                       const char *colon = 
(const char *) std::memchr(wconv.cp, ':', wconv.ep - wconv.cp);
+                                                       if (colon && wconv.ep - 
colon >= 2 && colon[1] == ']') {
+                                                               auto cclname = 
std::string(wconv.cp, colon);
+                                                               if (cclass(cs, 
cclname)) {
+                                                                       
wconv.cp = colon + 2;
+                                                                       
wconv.len = 0;
+                                                                       wc = 
wconv.nextchr().look();
+                                                                       
continue; // can't be range endpoint
+                                                               } else {
+                                                                       return 
{{}, 0, MINRX_REG_ECTYPE};
+                                                               }
+                                                       } else {
+                                                               return {{}, 0, 
MINRX_REG_ECTYPE};
+                                                       }
+                                               } else if (wc == L'=') {
+                                                       // FIXME: recognize 
some equivalence classes.
+                                                       return {{}, 0, 
MINRX_REG_ECOLLATE};
+                                               }
+                                       }
+                                       bool range = false;
+                                       if (wc == L'-') {
+                                               auto save = wconv.save();
+                                               wc = wconv.nextchr().look();
+                                               if (wc == WConv::End)
+                                                       return {{}, 0, 
MINRX_REG_EBRACK};
+                                               if (wc != L']') {
+                                                       wchi = wc;
+                                                       wc = 
wconv.nextchr().look();
+                                                       if (wchi == L'\\' && 
(flags & MINRX_REG_BRACK_ESCAPE) != 0) {
+                                                               if (wc != 
WConv::End) {
+                                                                       wchi = 
wc;
+                                                                       wc = 
wconv.nextchr().look();
+                                                               } else {
+                                                                       return 
{{}, 0, MINRX_REG_EESCAPE};
+                                                               }
+                                                       } else if (wchi == 
L'[') {
+                                                               if (wc == L'.') 
{
+                                                                       wchi = 
wconv.nextchr().look();
+                                                                       wc = 
wconv.nextchr().look();
+                                                                       if (wc 
!= L'.' || (wc = wconv.nextchr().look()) != L']')
+                                                                               
return {{}, 0, MINRX_REG_ECOLLATE};
+                                                                       wc = 
wconv.nextchr().look();
+                                                               } else if (wc 
== L':' || wc == L'=') {
+                                                                       return 
{{}, 0, MINRX_REG_ERANGE}; // can't be range endpoint
+                                                               }
+                                                       }
+                                                       range = true;
+                                               } else {
+                                                       wconv.restore(save);
+                                                       wc = L'-';
+                                               }
+                                       }
+                                       if (wclo > wchi)
+                                               return {{}, 0, 
MINRX_REG_ERANGE};
+                                       cs.set(wclo, wchi);
+                                       if ((flags & MINRX_REG_ICASE) != 0)
+                                               for (auto wc = wclo; wc <= 
wchi; ++wc) {
+                                                       
cs.set(std::tolower(wc));
+                                                       
cs.set(std::toupper(wc));
+                                               }
+                                       if (range && wc == L'-' && 
wconv.lookahead(&WConv::nextchr) != L']')
+                                               return {{}, 0, 
MINRX_REG_ERANGE};
+                               }
+                               lhs.push_back({Node::CSet, {csets.size(), 0}, 
nstk});
+                               if (invert) {
+                                       if ((flags & MINRX_REG_NEWLINE) != 0)
+                                               cs.set(L'\n');
+                                       cs.invert();
+                               }
+                               csets.emplace_back(cs);
+                       }
+                       break;
+               case L'.':
+                       if (!dot.has_value()) {
+                               dot = csets.size();
+                               csets.emplace_back();
+                               if ((flags & MINRX_REG_NEWLINE) != 0)
+                                       csets.back().set(L'\n');
+                               csets.back().invert();
+                       }
+                       lhmaxstk = nstk;
+                       lhs.push_back({Node::CSet, {*dot, 0}, nstk});
+                       wconv.nextchr();
+                       break;
+               case L'^':
+                       lhmaxstk = nstk;
+                       lhs.push_back({(flags & MINRX_REG_NEWLINE) == 0 ? 
Node::ZBOB : Node::ZBOL, {0, 0}, nstk});
+                       wconv.nextchr();
+                       break;
+               case L'$':
+                       lhmaxstk = nstk;
+                       lhs.push_back({(flags & MINRX_REG_NEWLINE) == 0 ? 
Node::ZEOB : Node::ZEOL, {0, 0}, nstk});
+                       wconv.nextchr();
+                       break;
+               case L'\\':
+                       lhmaxstk = nstk;
+                       wc = wconv.nextchr().look();
+                       switch (wc) {
+                       case L'<':
+                               if ((flags & MINRX_REG_EXTENSIONS_BSD) == 0)
+                                       goto normal;
+                               lhs.push_back({Node::ZBOW, {0, 0}, nstk});
+                               break;
+                       case L'>':
+                               if ((flags & MINRX_REG_EXTENSIONS_BSD) == 0)
+                                       goto normal;
+                               lhs.push_back({Node::ZEOW, {0, 0}, nstk});
+                               break;
+                       case L'`':
+                               if ((flags & MINRX_REG_EXTENSIONS_GNU) == 0)
+                                       goto normal;
+                               lhs.push_back({Node::ZBOB, {0, 0}, nstk});
+                               break;
+                       case L'\'':
+                               if ((flags & MINRX_REG_EXTENSIONS_GNU) == 0)
+                                       goto normal;
+                               lhs.push_back({Node::ZEOB, {0, 0}, nstk});
+                               break;
+                       case L'b':
+                               if ((flags & MINRX_REG_EXTENSIONS_GNU) == 0)
+                                       goto normal;
+                               lhs.push_back({Node::ZXOW, {0, 0}, nstk});
+                               break;
+                       case L'B':
+                               if ((flags & MINRX_REG_EXTENSIONS_GNU) == 0)
+                                       goto normal;
+                               lhs.push_back({Node::ZNWB, {0, 0}, nstk});
+                               break;
+                       case L's':
+                               if ((flags & MINRX_REG_EXTENSIONS_GNU) == 0)
+                                       goto normal;
+                               lhs.push_back({Node::CSet, {csets.size(), 0}, 
nstk});
+                               csets.emplace_back();
+                               cclass(csets.back(), "space");
+                               break;
+                       case L'S':
+                               if ((flags & MINRX_REG_EXTENSIONS_GNU) == 0)
+                                       goto normal;
+                               lhs.push_back({Node::CSet, {csets.size(), 0}, 
nstk});
+                               csets.emplace_back();
+                               cclass(csets.back(), "space");
+                               csets.back().invert();
+                               break;
+                       case L'w':
+                               if ((flags & MINRX_REG_EXTENSIONS_GNU) == 0)
+                                       goto normal;
+                               lhs.push_back({Node::CSet, {csets.size(), 0}, 
nstk});
+                               csets.emplace_back();
+                               cclass(csets.back(), "alnum");
+                               csets.back().set(L'_');
+                               break;
+                       case L'W':
+                               if ((flags & MINRX_REG_EXTENSIONS_GNU) == 0)
+                                       goto normal;
+                               lhs.push_back({Node::CSet, {csets.size(), 0}, 
nstk});
+                               csets.emplace_back();
+                               cclass(csets.back(), "alnum");
+                               csets.back().set(L'_');
+                               csets.back().invert();
+                               break;
+                       case WConv::End:
+                               return {{}, 0, MINRX_REG_EESCAPE};
+                       default:
+                               goto normal;
+                       }
+                       wconv.nextchr();
+                       break;
+               case L'(':
+                       {
+                               NInt n = ++nsub;
+                               wconv.nextchr();
+                               minrx_result_t err;
+                               std::tie(lhs, lhmaxstk, err) = alt(true, nstk + 
1);
+                               if (err)
+                                       return {lhs, lhmaxstk, err};
+                               if (wconv.look() != L')')
+                                       return {{}, 0, MINRX_REG_EPAREN};
+                               lhs.push_front({Node::SubL, {n, nsub}, nstk});
+                               lhs.push_back({Node::SubR, {n, nsub}, nstk + 
1});
+                               wconv.nextchr();
+                       }
+                       break;
+               case L')':
+                       if (!nested)
+                               goto normal;
+                       // fall through
+               case L'|':
+               case WConv::End:
+                       lhmaxstk = nstk;
+                       break;
+               }
+               return {lhs, lhmaxstk, MINRX_REG_SUCCESS};
+       }
+       Regexp *compile() {
+               auto [lhs, nstk, err] = alt(false, 0);
+               if (err) {
+                       csets.clear();
+                       lhs.clear();
+                       nstk = 0;
+                       nsub = 0;
+               } else {
+                       lhs.push_back({Node::Exit, {0, 0}, 0});
+               }
+               return new Regexp{ std::move(csets), {lhs.begin(), lhs.end()}, 
nstk, nsub + 1, err };
+       }
+};
+
+std::map<std::string, CSet> Compile::cclmemo;
+std::mutex Compile::cclmutex;
+
+struct Execute {
+       typedef COWVec<std::size_t, (std::size_t) -1> Vec;
+       struct NState {
+               const char *bp = nullptr;
+               Vec substack;
+               NState() {}
+               NState(Vec::Allocator &allocator): substack(allocator) {}
+               bool cmpgt(const NState &ns, std::size_t nstk) const {
+                       return bp != ns.bp ? bp < ns.bp : 
substack.cmpgt(ns.substack, nstk);
+               }
+       };
+       const Regexp &r;
+       const minrx_regexec_flags_t flags;
+       WConv wconv;
+       WChar lookback = WConv::End;
+       Vec::Allocator allocator { r.nstk + 2 * r.nsub };
+       std::optional<COWVec<std::size_t, (std::size_t) -1>> best;
+       QSet<NInt> epsq { r.nodes.size() };
+       QVec<NInt, NState> epsv { r.nodes.size() };
+       Execute(const Regexp &r, minrx_regexec_flags_t flags, const char *bp, 
const char *ep) : r(r), flags(flags), wconv(bp, ep) {}
+       void add(QVec<NInt, NState> &ncsv, NInt n, const NState &ns) {
+               if (r.nodes[n].type <= Node::CSet) {
+                       auto [newly, oldns] = ncsv.insert(n, ns);
+                       if (!newly && ns.cmpgt(oldns, r.nodes[n].nstk))
+                               oldns = ns;
+               } else {
+                       auto [newly, oldns] = epsv.insert(n, ns);
+                       if (newly || (ns.cmpgt(oldns, r.nodes[n].nstk) && 
(oldns = ns, true)))
+                               epsq.insert(n);
+               }
+       }
+       bool is_word(int wc) { return wc == L'_' || std::iswalnum(wc); }
+       void epsclosure(QVec<NInt, NState> &ncsv) {
+               auto nodes = r.nodes;
+               do {
+                       NInt k = epsq.remove();
+                       NState &ns = epsv.lookup(k);
+                       if (best.has_value() && (std::size_t) (ns.bp - 
wconv.bp) > best->get(r.nstk + 0))
+                               continue;
+                       const auto &n = nodes[k];
+                       switch (n.type) {
+                       case Node::Exit:
+                               {
+                                       std::size_t b = ns.bp - wconv.bp, e = 
wconv.cp - wconv.bp;
+                                       if (!best.has_value()
+                                           || b < best->get(r.nstk + 0)
+                                           || (b == best->get(r.nstk + 0) && e 
>= best->get(r.nstk + 1)))
+                                       {
+                                               best = ns.substack;
+                                               best->put(r.nstk + 0, b);
+                                               best->put(r.nstk + 1, e);
+                                       }
+                               }
+                               break;
+                       case Node::Fork:
+                               {
+                                       NState nscopy = ns;
+                                       NInt pridelta = n.args[0];
+                                       NInt priority = 0;
+                                       do {
+                                               nscopy.substack.put(n.nstk, 
priority += pridelta);
+                                               add(ncsv, k + 1, nscopy);
+                                               k = k + 1 + nodes[k].args[1];
+                                       } while (nodes[k].type != Node::Join);
+                                       if (priority == pridelta) {
+                                               nscopy.substack.put(n.nstk, 
priority += pridelta);
+                                               add(ncsv, k, nscopy);
+                                       }
+                               }
+                               break;
+                       case Node::Goto:
+                               add(ncsv, k + 1 + n.args[1], ns);
+                               break;
+                       case Node::Join:
+                               add(ncsv, k + 1, ns);
+                               break;
+                       case Node::Loop:
+                               {
+                                       NState nscopy = ns;
+                                       nscopy.substack.put(n.nstk, wconv.cp - 
wconv.bp);
+                                       nscopy.substack.put(n.nstk + 1, -1);
+                                       nscopy.substack.put(n.nstk + 2, 
wconv.cp - wconv.bp);
+                                       add(ncsv, k + 1, nscopy);
+                                       if (n.args[1]) {
+                                               nscopy.substack.put(n.nstk + 1, 
0);
+                                               add(ncsv, k + 1 + n.args[0], 
nscopy);
+                                       }
+                               }
+                               break;
+                       case Node::Next:
+                               {
+                                       add(ncsv, k + 1, ns);
+                                       if (n.args[1] && (std::size_t) 
(wconv.cp - wconv.bp) > ns.substack.get(n.nstk - 1)) {
+                                               NState nscopy = ns;
+                                               nscopy.substack.sub(n.nstk - 2, 
1);
+                                               nscopy.substack.put(n.nstk - 1, 
wconv.cp - wconv.bp);
+                                               add(ncsv, k - n.args[0], 
nscopy);
+                                       }
+                               }
+                               break;
+                       case Node::SubL:
+                               {
+                                       NState nscopy = ns;
+                                       nscopy.substack.put(n.nstk, wconv.cp - 
wconv.bp);
+                                       if (n.args[0] != (NInt) -1)
+                                               for (auto i = n.args[0]; i <= 
n.args[1]; ++i) {
+                                                       
nscopy.substack.put(r.nstk + i * 2, -1);
+                                                       
nscopy.substack.put(r.nstk + i * 2 + 1, -1);
+                                               }
+                                       add(ncsv, k + 1, nscopy);
+                               }
+                               break;
+                       case Node::SubR:
+                               if (n.args[0] != (NInt) -1) {
+                                       NState nscopy = ns;
+                                       nscopy.substack.put(r.nstk + n.args[0] 
* 2 + 0, ns.substack.get(n.nstk - 1));
+                                       nscopy.substack.put(r.nstk + n.args[0] 
* 2 + 1, wconv.cp - wconv.bp);
+                                       add(ncsv, k + 1, nscopy);
+                               } else {
+                                       add(ncsv, k + 1, ns);
+                               }
+                               break;
+                       case Node::ZBOB:
+                               if (wconv.cp == wconv.bp && (flags & 
MINRX_REG_NOTBOL) == 0)
+                                       add(ncsv, k + 1, ns);
+                               break;
+                       case Node::ZEOB:
+                               if (wconv.cp == wconv.ep && (flags & 
MINRX_REG_NOTEOL) == 0)
+                                       add(ncsv, k + 1, ns);
+                               break;
+                       case Node::ZBOL:
+                               if (((wconv.cp == wconv.bp && (flags & 
MINRX_REG_NOTBOL) == 0)) || lookback == L'\n')
+                                       add(ncsv, k + 1, ns);
+                               break;
+                       case Node::ZEOL:
+                               if (((wconv.cp == wconv.ep && (flags & 
MINRX_REG_NOTEOL) == 0)) || wconv.look() == L'\n')
+                                       add(ncsv, k + 1, ns);
+                               break;
+                       case Node::ZBOW:
+                               if ((wconv.cp == wconv.bp || 
!is_word(lookback)) && (wconv.cp != wconv.ep && is_word(wconv.look())))
+                                       add(ncsv, k + 1, ns);
+                               break;
+                       case Node::ZEOW:
+                               if ((wconv.cp != wconv.bp && is_word(lookback)) 
&& (wconv.cp == wconv.ep || !is_word(wconv.look())))
+                                       add(ncsv, k + 1, ns);
+                               break;
+                       case Node::ZXOW:
+                               if (   ((wconv.cp == wconv.bp || 
!is_word(lookback)) && (wconv.cp != wconv.ep && is_word(wconv.look())))
+                                   || ((wconv.cp != wconv.bp && 
is_word(lookback)) && (wconv.cp == wconv.ep || !is_word(wconv.look()))))
+                                       add(ncsv, k + 1, ns);
+                               break;
+                       case Node::ZNWB:
+                               if (   (wconv.cp == wconv.bp && wconv.cp == 
wconv.ep)
+                                   || (wconv.cp == wconv.bp && wconv.cp != 
wconv.ep && !is_word(wconv.look()))
+                                   || (wconv.cp != wconv.bp && wconv.cp == 
wconv.ep && !is_word(lookback))
+                                   || (wconv.cp != wconv.bp && wconv.cp != 
wconv.ep && is_word(lookback) == is_word(wconv.look())))
+                                       add(ncsv, k + 1, ns);
+                               break;
+                       default:
+                               abort();
+                               break;
+                       }
+               } while (!epsq.empty());
+       }
+       int execute(std::size_t nm, minrx_regmatch_t *rm) {
+               QVec<NInt, NState> mcsvs[2] { r.nodes.size(), r.nodes.size() };
+               auto nodes = &r.nodes[0];
+               wconv.nextchr();
+               if ((flags & MINRX_REG_RESUME) != 0 && rm && rm[0].rm_eo > 0)
+                       while (wconv.cp != wconv.ep && wconv.cp - wconv.bp != 
rm[0].rm_eo)
+                               lookback = wconv.look(), wconv.nextchr();
+               NState nsinit(allocator);
+               nsinit.bp = wconv.cp;
+               add(mcsvs[0], 0, nsinit);
+               if (!epsq.empty())
+                       epsclosure(mcsvs[0]);
+               auto wc = wconv.look();
+               for (;;) { // unrolled to ping-pong roles of mcsvs[0]/[1]
+                       if (wc == WConv::End)
+                               break;
+                       epsv.clear();
+                       while (!mcsvs[0].empty()) {
+                               auto [n, ns] = mcsvs[0].remove();
+                               auto t = nodes[n].type;
+                               if (t <= WCharMax) {
+                                       if (wc != t)
+                                               continue;
+                               } else {
+                                       if (!r.csets[nodes[n].args[0]].test(wc))
+                                               continue;
+                               }
+                               add(mcsvs[1], n + 1, ns);
+                       }
+                       wconv.nextchr(), lookback = wc, wc = wconv.look();
+                       if (!best.has_value()) {
+                               nsinit.bp = wconv.cp;
+                               add(mcsvs[1], 0, nsinit);
+                       }
+                       if (!epsq.empty())
+                               epsclosure(mcsvs[1]);
+                       if (best.has_value() && mcsvs[1].empty())
+                               break;
+                       if (wc == WConv::End)
+                               break;
+                       epsv.clear();
+                       while (!mcsvs[1].empty()) {
+                               auto [n, ns] = mcsvs[1].remove();
+                               auto t = nodes[n].type;
+                               if (t <= WCharMax) {
+                                       if (wc != t)
+                                               continue;
+                               } else {
+                                       if (!r.csets[nodes[n].args[0]].test(wc))
+                                               continue;
+                               }
+                               add(mcsvs[0], n + 1, ns);
+                       }
+                       wconv.nextchr(), lookback = wc, wc = wconv.look();
+                       if (!best.has_value()) {
+                               nsinit.bp = wconv.cp;
+                               add(mcsvs[0], 0, nsinit);
+                       }
+                       if (!epsq.empty())
+                               epsclosure(mcsvs[0]);
+                       if (best.has_value() && mcsvs[0].empty())
+                               break;
+               }
+               if (best.has_value()) {
+                       if (rm) {
+                               std::size_t nsub = std::min(nm, r.nsub);
+                               std::size_t i;
+                               for (i = 0; i < nsub; ++i) {
+                                       rm[i].rm_so = (*best->storage)[r.nstk + 
i * 2];
+                                       rm[i].rm_eo = (*best->storage)[r.nstk + 
i * 2 + 1];
+                               }
+                               for (; i < nm; ++i)
+                                       rm[i].rm_so = rm[i].rm_eo = -1;
+                       }
+                       return 0;
+               } else {
+                       if (rm)
+                               for (std::size_t i = 0; i < nm; ++i)
+                                       rm[i].rm_so = rm[i].rm_eo = -1;
+                       return MINRX_REG_NOMATCH;
+               }
+       }
+};
+
+}
+
+int
+minrx_regcomp(minrx_regex_t *rx, const char *s, int flags)
+{
+       return minrx_regncomp(rx, strlen(s), s, flags);
+}
+
+int
+minrx_regexec(minrx_regex_t *rx, const char *s, std::size_t nm, 
minrx_regmatch_t *rm, int flags)
+{
+       return minrx_regnexec(rx, strlen(s), s, nm, rm, flags);
+}
+
+int
+minrx_regncomp(minrx_regex_t *rx, std::size_t ns, const char *s, int flags)
+{
+       auto r = MinRX::Compile(s, s + ns, (minrx_regcomp_flags_t) 
flags).compile();
+       rx->re_regexp = r;
+       rx->re_nsub = r->nsub;
+       rx->re_compflags = (minrx_regcomp_flags_t) flags;
+       return r->err;
+}
+
+int
+minrx_regnexec(minrx_regex_t *rx, std::size_t ns, const char *s, std::size_t 
nm, minrx_regmatch_t *rm, int flags)
+{
+       MinRX::Regexp *r = reinterpret_cast<MinRX::Regexp *>(rx->re_regexp);
+       return MinRX::Execute(*r, (minrx_regexec_flags_t) flags, s, s + 
ns).execute(nm, rm);
+}
+
+void
+minrx_regfree(minrx_regex_t *rx)
+{
+       delete reinterpret_cast<MinRX::Regexp *>(rx->re_regexp);
+       rx->re_regexp = nullptr;
+}
+
+size_t
+minrx_regerror(int errcode, const minrx_regex_t *, char *errbuf, size_t 
errsize)
+{
+       static const char *const messages[] = {
+               "success",
+               "bad pattern",
+               "invalid contents of {}",
+               "? * + or {interval} not preceded by valid subpattern",
+               "unbalanced {",
+               "unbalanced [",
+               "invalid collating element",
+               "invalid character class name",
+               "invalid trailing backslash",
+               "unbalanced (",
+               "invalid range endpoint",
+               "memory allocation failed",
+               "invalid \\digit",
+               "match not found",
+               "unknown error code",
+       };
+       if (errcode < 0 || errcode > MINRX_REG_UNKNOWN)
+               errcode = MINRX_REG_UNKNOWN;
+       size_t size = snprintf(errbuf, errsize, "%s", messages[errcode]);
+       if (errsize != 0 && size == errsize)
+               errbuf[errsize - 1] = '\0';
+       return size + 1;
+}
diff --git a/support/minrx.h b/support/minrx.h
new file mode 100644
index 00000000..ecae246e
--- /dev/null
+++ b/support/minrx.h
@@ -0,0 +1,94 @@
+//
+// MinRX: a minimal matcher for POSIX Extended Regular Expressions.
+// Copyright (C) 2023, 2024 Michael J. Haertel.
+//
+// This file is part of MinRX.
+//
+// MinRX is free software; you can redistribute it and/or modify it
+// under the terms of the GNU Lesser General Public License as published
+// by the Free Software Foundation; either version 3 of the License, or
+// (at your option) any later version.
+//
+// MinRX is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+// See the GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+
+#ifndef _MINRX_H
+#define _MINRX_H
+
+#include <stddef.h>
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Names and types in this file have been chosen so that if the minrx_ and 
MINRX_ prefixes
+ * are erased throughout the result will be usable as a POSIX-conforming 
regex.h.
+ */
+
+typedef enum {                         /* Flags for minrx_reg*comp() */
+       MINRX_REG_EXTENDED = 1,         /* required in current version of MinRX 
*/
+       MINRX_REG_ICASE = 2,            /* ignore case in pattern and search */
+       MINRX_REG_NEWLINE = 4,          /* exclude \n from . and [^...] and 
treat as boundary for ^ and $ */
+       MINRX_REG_NOSUB = 8,            /* yes/no result only; no regmatch_t 
substrings results */
+       MINRX_REG_BRACE_COMPAT = 16,    /* { begins interval expression only 
when followed by digit */
+       MINRX_REG_BRACK_ESCAPE = 32,    /* bracket expressions [...] allow 
backslash escapes */
+       MINRX_REG_EXTENSIONS_BSD = 64,  /* enable BSD extensions \< and \> */
+       MINRX_REG_EXTENSIONS_GNU = 128  /* enable GNU extensions \b \B \s \S \w 
\W \y */
+} minrx_regcomp_flags_t;
+
+typedef enum {                         /* Flags for minrx_reg*exec() */
+       MINRX_REG_NOTBOL = 1,           /* don't match ^ at beginning of string 
*/
+       MINRX_REG_NOTEOL = 2,           /* don't match $ at end of string */
+       MINRX_REG_RESUME = 4,           /* MinRX extension: resume search from 
rm[0].rm_eo */
+} minrx_regexec_flags_t;
+
+typedef enum {                         /* Return values from minrx_reg*comp() 
and minrx_reg*exec() */
+       MINRX_REG_SUCCESS = 0,          /* regcomp: compiled successfully; 
regexec: match found */
+       MINRX_REG_BADPAT,               /* regcomp: bad pattern (generic) */
+       MINRX_REG_BADBR,                /* regcomp: invalid contents of {} */
+       MINRX_REG_BADRPT,               /* regcomp: ? * + or {interval} not 
preceded by valid subpattern */
+       MINRX_REG_EBRACE,               /* regcomp: unbalanced { */
+       MINRX_REG_EBRACK,               /* regcomp: unbalanced [ */
+       MINRX_REG_ECOLLATE,             /* regcomp: invalid collating element */
+       MINRX_REG_ECTYPE,               /* regcomp: invalid character class 
name */
+       MINRX_REG_EESCAPE,              /* regcomp: invalid backslash */
+       MINRX_REG_EPAREN,               /* regcomp: unbalanced ( */
+       MINRX_REG_ERANGE,               /* regcomp: invalid range endpoint */
+       MINRX_REG_ESPACE,               /* regcomp: memory allocation failed */
+       MINRX_REG_ESUBREG,              /* regcomp: invalid \digit */
+       MINRX_REG_NOMATCH,              /* regexec: match not found */
+       MINRX_REG_UNKNOWN               /* unknown error code */
+} minrx_result_t;
+
+typedef struct {
+       void *re_regexp;
+       size_t re_nsub;
+       minrx_regcomp_flags_t re_compflags;
+} minrx_regex_t;
+
+typedef struct {
+       ptrdiff_t rm_so;
+       ptrdiff_t rm_eo;
+} minrx_regmatch_t;
+
+/* Compile from user syntax to internal regex_t; returns minrx_result_t (as 
integer) */
+int minrx_regcomp(minrx_regex_t *, const char * /* regexp (NUL terminated) */, 
int /* minrx_regcomp_flags_t */);
+int minrx_regncomp(minrx_regex_t *, size_t /* nregexp */, const char * /* 
regexp[nregexp] */, int /* minrx_regcomp_flags_t */);
+
+/* Search for previously-compiled regexp; return minrx_result_t (as integer) */
+int minrx_regexec(minrx_regex_t *, const char * /* string NUL-terminated */, 
size_t /* nrm */, minrx_regmatch_t * /* rm[nrm] */, int /* 
minrx_regexec_flags_t */);
+int minrx_regnexec(minrx_regex_t *, size_t /* nstring */, const char * /* 
string[nstring] */, size_t /* nrm */, minrx_regmatch_t * /* rm[nrm] */, int /* 
minrx_regexec_flags_t */);
+
+size_t minrx_regerror(int /* minrx_result_t */, const minrx_regex_t *, char * 
/* errbuf[nerrbuf] */, size_t /* nerrbuf */);
+void minrx_regfree(minrx_regex_t *);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif /* __cplusplus */
+#endif /* MINRX_H */
diff --git a/version.c b/version.c
index f5cf6ac0..2e45635a 100644
--- a/version.c
+++ b/version.c
@@ -1,3 +1,3 @@
 #include "config.h"
 
-const char *version_string = PACKAGE_STRING;
+const char *version_string = PACKAGE_STRING "-minrx";

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog                |    4 +
 README_d/ChangeLog       |    4 +
 README_d/README.matchers |    6 +-
 support/ChangeLog        |    4 +
 support/minrx.cpp        | 1176 ++++++++++++++++++++++++++++++++++++++++++++++
 support/minrx.h          |   94 ++++
 version.c                |    2 +-
 7 files changed, 1286 insertions(+), 4 deletions(-)
 create mode 100644 support/minrx.cpp
 create mode 100644 support/minrx.h


hooks/post-receive
-- 
gawk



reply via email to

[Prev in Thread] Current Thread [Next in Thread]