gawk-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[SCM] gawk branch, feature/minrx, updated. gawk-4.1.0-5937-g1cf4fd4e


From: Arnold Robbins
Subject: [SCM] gawk branch, feature/minrx, updated. gawk-4.1.0-5937-g1cf4fd4e
Date: Thu, 6 Feb 2025 09:05:55 -0500 (EST)

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "gawk".

The branch, feature/minrx has been updated
       via  1cf4fd4ee8131096a1347c948205b689820b11bb (commit)
      from  9689093e068ccdea3c545f886984f09174232a0c (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.sv.gnu.org/cgit/gawk.git/commit/?id=1cf4fd4ee8131096a1347c948205b689820b11bb

commit 1cf4fd4ee8131096a1347c948205b689820b11bb
Author: Arnold D. Robbins <arnold@skeeve.com>
Date:   Thu Feb 6 16:05:35 2025 +0200

    Update minrx.cpp.

diff --git a/support/ChangeLog b/support/ChangeLog
index 1a628b50..d055ae9e 100644
--- a/support/ChangeLog
+++ b/support/ChangeLog
@@ -1,3 +1,7 @@
+2025-02-06         Arnold D. Robbins     <arnold@skeeve.com>
+
+       * minrx.cpp: Update again. More speedups.
+
 2025-02-03         Arnold D. Robbins     <arnold@skeeve.com>
 
        * minrx.cpp: Update again. More speedups.
diff --git a/support/minrx.cpp b/support/minrx.cpp
index 866b757a..af54e9ea 100644
--- a/support/minrx.cpp
+++ b/support/minrx.cpp
@@ -727,11 +727,12 @@ struct Node {
                CSet = WCharMax + 1,    // args = index in Regexp::csets vector
                // epsilon-matching node types
                Exit,                   // no args
-               Fork,                   // args = priority delta, offset to next
-               Goto,                   // args = offset to join, offset to next
-               Join,                   // args = offset to fork
+               Fork,                   // args = offset to first goto
+               Goto,                   // args = offset to next goto, offset 
to just after join
+               Join,                   // args = none (but supplies incoming 
stack depth for next node)
                Loop,                   // args = offset to next, optional flag
                Next,                   // args = offset to loop, infinite flag
+               Skip,                   // args = offset over skipped nodes
                SubL,                   // args = minimum and maximum contained 
subexpression numbers
                SubR,                   // args = minimum and maximum contained 
subexpression numbers
                ZBOB,                   // no args - match empty string at 
beginning of buffer
@@ -811,16 +812,16 @@ struct Compile {
                        auto [rhs, rhmaxstk, err] = alts.back();
                        if (err)
                                return {rhs, rhmaxstk, err};
-                       rhs.push_front({Node::Goto, {rhs.size(), rhs.size()}, 
nstk + 1});
+                       rhs.push_front({Node::Goto, {rhs.size(), rhs.size() + 
1}, nstk + 1});
                        alts.pop_back();
                        while (!alts.empty()) {
                                auto [mhs, mhmaxstk, _] = alts.back();
                                alts.pop_back();
                                rhs.insert(rhs.begin(), mhs.begin(), mhs.end());
                                rhmaxstk = std::max(mhmaxstk, rhmaxstk);
-                               rhs.push_front({Node::Goto, {rhs.size(), 
mhs.size()}, nstk + 1});
+                               rhs.push_front({Node::Goto, {mhs.size(), 
rhs.size() + 1}, nstk + 1});
                        }
-                       lhs.push_front({Node::Fork, {(NInt) -1, lhs.size()}, 
nstk + 1});
+                       lhs.push_front({Node::Fork, { lhs.size(), 0 }, nstk + 
1});
                        lhs.insert(lhs.end(), rhs.begin(), rhs.end());
                        lhmaxstk = std::max(lhmaxstk, rhmaxstk);
                        lhs.push_back({Node::Join, {lhs.size() - 1, 0}, nstk + 
1});
@@ -847,9 +848,7 @@ struct Compile {
                if (optional && !infinite) {
                        for (auto &l : lhs) l.nstk += 1;
                        auto lhsize = lhs.size();
-                       lhs.push_front({Node::Fork, {(NInt) 1, lhsize}, nstk + 
1});
-                       // NOTE: Restore the following if someday we need a 
reversible automaton.
-                       // lhs.push_back({Node::Join, {lhsize, 0}, nstk + 1});
+                       lhs.push_front({Node::Skip, {lhsize, 0}, nstk + 1});
                        return {lhs, lhmaxstk + 1, MINRX_REG_SUCCESS};
                } else {
                        for (auto &l : lhs) l.nstk += 3;
@@ -883,9 +882,7 @@ struct Compile {
                        for (auto &r : rhs)
                                r.nstk += 1;
                        auto rhsize = rhs.size();
-                       rhs.push_front({Node::Fork, {1, rhsize}, nstk + 1});
-                       // NOTE: Restore the following if someday we need a 
reversible automaton.
-                       // rhs.push_back({Node::Join, {rhsize, 0}, nstk + 1});
+                       rhs.push_front({Node::Skip, {rhsize, 0}, nstk + 1});
                        for (; k < n; ++k)
                                lhs.insert(lhs.end(), rhs.begin(), rhs.end());
                }
@@ -1166,23 +1163,23 @@ struct Compile {
                                case Node::Exit:
                                        return {};
                                case Node::Fork:
-                                       if (n.args[0] != 1) { // x|y|...
-                                               do
-                                                       add(k + 1), k = k + 1 + 
nodes[k].args[1];
-                                               while (nodes[k].type == 
Node::Goto);
-                                       } else {
+                                       do {
                                                add(k + 1);
-                                               add(k + 1 + nodes[k].args[1]);
-                                       }
+                                               k = k + 1 + nodes[k].args[0];
+                                       } while (nodes[k].type != Node::Join);
                                        break;
                                case Node::Goto:
-                                       add(k + 1 + n.args[1]);
+                                       add(k + 1 + n.args[0]);
                                        break;
                                case Node::Loop:
                                        add(k + 1);
                                        if (n.args[1])
                                                add(k + 1 + n.args[0]);
                                        break;
+                               case Node::Skip:
+                                       add(k + 1);
+                                       add(k + 1 + n.args[0]);
+                                       break;
                                default:
                                        add(k + 1);
                                        break;
@@ -1225,17 +1222,23 @@ struct Compile {
 struct Execute {
        typedef COWVec<std::size_t, (std::size_t) -1> Vec;
        struct NState {
+               std::size_t gen = 0;
                std::size_t boff;
                Vec substack;
                NState() {}
                NState(Vec::Allocator &allocator): substack(allocator) {}
                template <typename... XArgs>
-               bool cmpgt(const NState &ns, std::size_t nstk, XArgs... xargs) 
const {
-                       return boff != ns.boff ? boff < ns.boff : 
substack.cmpgt(ns.substack, nstk, xargs...);
+               bool cmpgt(const NState &ns, std::size_t gen, std::size_t nstk, 
XArgs... xargs) const {
+                       if (gen != ns.gen)
+                               return gen > ns.gen;
+                       if (boff != ns.boff)
+                               return boff < ns.boff;
+                       return substack.cmpgt(ns.substack, nstk, xargs...);
                }
        };
        const Regexp &r;
        const minrx_regexec_flags_t flags;
+       std::size_t gen = 0;
        WConv wconv;
        WChar wcprev = WConv::End;
        Vec::Allocator allocator { r.nstk + 2 * r.nsub };
@@ -1253,10 +1256,11 @@ struct Execute {
                                auto [newly, newns] = ncsv.insert(k, ns);
                                if (newly)
                                        new (&newns) NState(ns);
-                               else if (ns.cmpgt(newns, nstk, xargs...))
+                               else if (ns.cmpgt(newns, gen, nstk, xargs...))
                                        newns = ns;
                                else
                                        return;
+                               newns.gen = gen;
                                if constexpr (sizeof... (XArgs) > 0) {
                                        auto i = nstk - sizeof...(XArgs);
                                        (newns.substack.put(i++, xargs), ...);
@@ -1266,10 +1270,11 @@ struct Execute {
                        auto [newly, newns] = epsv.insert(k, ns);
                        if (newly)
                                new (&newns) NState(ns);
-                       else if (ns.cmpgt(newns, nstk, xargs...))
+                       else if (ns.cmpgt(newns, gen, nstk, xargs...))
                                newns = ns;
                        else
                                return;
+                       newns.gen = gen;
                        if constexpr (sizeof... (XArgs) > 0) {
                                auto i = nstk - sizeof...(XArgs);
                                (newns.substack.put(i++, xargs), ...);
@@ -1303,19 +1308,16 @@ struct Execute {
                                }
                                break;
                        case Node::Fork:
-                               if (auto pridelta = n.args[0]; pridelta != 1) { 
// x|y|...
-                                       NInt priority = 0;
+                               {
+                                       NInt priority = (NInt) -1;
                                        do {
-                                               add(ncsv, k + 1, nstk, ns, 
wcnext, priority += pridelta);
-                                               k = k + 1 + nodes[k].args[1];
+                                               add(ncsv, k + 1, nstk, ns, 
wcnext, priority--);
+                                               k = k + 1 + nodes[k].args[0];
                                        } while (nodes[k].type != Node::Join);
-                               } else { // x?
-                                       add(ncsv, k + 1, nstk, ns, wcnext, 
(NInt) 0);
-                                       add(ncsv, k + 1 + n.args[1], nstk, ns, 
wcnext, (NInt) 1);
                                }
                                break;
                        case Node::Goto:
-                               add(ncsv, k + 1 + n.args[0] + 1, nstk, ns, 
wcnext);
+                               add(ncsv, k + 1 + n.args[1], nstk, ns, wcnext);
                                break;
                        case Node::Join:
                                add(ncsv, k + 1, nstk, ns, wcnext);
@@ -1330,6 +1332,10 @@ struct Execute {
                                if (n.args[1] && wconv.off() > 
ns.substack.get(nstk + 3 - 1))
                                        add(ncsv, k - n.args[0], nstk + 3, ns, 
wcnext, ns.substack.get(nstk), ns.substack.get(nstk + 1) - 1, (NInt) 
wconv.off());
                                break;
+                       case Node::Skip:
+                               add(ncsv, k + 1, nstk, ns, wcnext, (NInt) 0);
+                               add(ncsv, k + 1 + n.args[0], nstk, ns, wcnext, 
(NInt) 1);
+                               break;
                        case Node::SubL:
                                {
                                        NState nscopy = ns;
@@ -1444,7 +1450,7 @@ struct Execute {
                for (;;) { // unrolled to ping-pong roles of mcsvs[0]/[1]
                        if (wcnext == WConv::End)
                                break;
-                       epsv.clear();
+                       ++gen;
                        wcprev = wcnext, wcnext = wconv.nextchr().look();
                        while (!mcsvs[0].empty()) {
                                auto [n, ns] = mcsvs[0].remove();
@@ -1464,7 +1470,7 @@ struct Execute {
                        }
                        if (wcnext == WConv::End)
                                break;
-                       epsv.clear();
+                       ++gen;
                        wcprev = wcnext, wcnext = wconv.nextchr().look();
                        while (!mcsvs[1].empty()) {
                                auto [n, ns] = mcsvs[1].remove();

-----------------------------------------------------------------------

Summary of changes:
 support/ChangeLog |  4 +++
 support/minrx.cpp | 74 ++++++++++++++++++++++++++++++-------------------------
 2 files changed, 44 insertions(+), 34 deletions(-)


hooks/post-receive
-- 
gawk



reply via email to

[Prev in Thread] Current Thread [Next in Thread]