From b0c496c2f6842dc26f59a334bc25f0a86c9601ba Mon Sep 17 00:00:00 2001
From: WrathfulSpatula <stranoj@gmail.com>
Date: Fri, 3 Jan 2025 08:02:12 -0500
Subject: [PATCH] Single-threaded

---
 Eratosthenes/_eratosthenes.cpp         | 276 ++++++++++---------------
 Eratosthenes/dispatchqueue.cpp         | 121 -----------
 Eratosthenes/include/dispatchqueue.hpp |  67 ------
 setup.py                               |   6 +-
 4 files changed, 117 insertions(+), 353 deletions(-)
 delete mode 100644 Eratosthenes/dispatchqueue.cpp
 delete mode 100644 Eratosthenes/include/dispatchqueue.hpp
diff --git a/Eratosthenes/_eratosthenes.cpp b/Eratosthenes/_eratosthenes.cpp
index f60d6db..5d6fb99 100644
--- a/Eratosthenes/_eratosthenes.cpp
+++ b/Eratosthenes/_eratosthenes.cpp
@@ -10,8 +10,6 @@
 // number under trial. Multiples of 2, 3, 5, 7, and 11 can
 // be entirely skipped in loop enumeration.
 
-#include "dispatchqueue.hpp"
-
 #include <cmath>
 #include <cstddef>
 #include <cstdint>
@@ -25,8 +23,6 @@
 
 namespace qimcifa {
 
-DispatchQueue dispatch(std::thread::hardware_concurrency());
-
 typedef boost::multiprecision::cpp_int BigInteger;
 
 inline BigInteger sqrt(const BigInteger& toTest)
@@ -170,12 +166,6 @@ std::vector<BigInteger> SieveOfEratosthenes(const BigInteger& n)
     std::unique_ptr<bool[]> uNotPrime(new bool[cardinality + 1U]());
     bool* notPrime = uNotPrime.get();
 
-    // We dispatch multiple marking asynchronously.
-    // If we've already marked all primes up to x,
-    // we're free to continue to up to x * x,
-    // then we synchronize.
-    BigInteger threadBoundary = 36U;
-
     // Get the remaining prime numbers.
     unsigned short wheel5 = 129U;
     unsigned long long wheel7 = 9009416540524545ULL;
@@ -188,63 +178,52 @@ std::vector<BigInteger> SieveOfEratosthenes(const BigInteger& n)
             break;
         }
 
-        if (threadBoundary < p) {
-            dispatch.finish();
-            threadBoundary *= threadBoundary;
-        }
-
         if (notPrime[backward5(p)]) {
             continue;
         }
 
         knownPrimes.push_back(p);
 
-        dispatch.dispatch([&n, p, &notPrime]() {
-            // We are skipping multiples of 2, 3, and 5
-            // for space complexity, for 4/15 the bits.
-            // More are skipped by the wheel for time.
-            const BigInteger p2 = p << 1U;
-            const BigInteger p4 = p << 2U;
-            BigInteger i = p * p;
-
-            // "p" already definitely not a multiple of 3.
-            // Its remainder when divided by 3 can be 1 or 2.
-            // If it is 2, we can do a "half iteration" of the
-            // loop that would handle remainder of 1, and then
-            // we can proceed with the 1 remainder loop.
-            // This saves 2/3 of updates (or modulo).
-            if ((p % 3U) == 2U) {
-                notPrime[backward5(i)] = true;
-                i += p2;
-                if (i > n) {
-                    return false;
-                }
+        // We are skipping multiples of 2, 3, and 5
+        // for space complexity, for 4/15 the bits.
+        // More are skipped by the wheel for time.
+        const BigInteger p2 = p << 1U;
+        const BigInteger p4 = p << 2U;
+        BigInteger i = p * p;
+
+        // "p" already definitely not a multiple of 3.
+        // Its remainder when divided by 3 can be 1 or 2.
+        // If it is 2, we can do a "half iteration" of the
+        // loop that would handle remainder of 1, and then
+        // we can proceed with the 1 remainder loop.
+        // This saves 2/3 of updates (or modulo).
+        if ((p % 3U) == 2U) {
+            notPrime[backward5(i)] = true;
+            i += p2;
+            if (i > n) {
+                continue;
             }
+        }
 
-            for (;;) {
-                if (i % 5U) {
-                    notPrime[backward5(i)] = true;
-                }
-                i += p4;
-                if (i > n) {
-                    return false;
-                }
-
-                if (i % 5U) {
-                    notPrime[backward5(i)] = true;
-                }
-                i += p2;
-                if (i > n) {
-                    return false;
-                }
+        for (;;) {
+            if (i % 5U) {
+                notPrime[backward5(i)] = true;
+            }
+            i += p4;
+            if (i > n) {
+                break;
             }
 
-            return false;
-        });
+            if (i % 5U) {
+                notPrime[backward5(i)] = true;
+            }
+            i += p2;
+            if (i > n) {
+                break;
+            }
+        }
     }
 
-    dispatch.finish();
-
     for (;;) {
         const BigInteger p = forward3(o);
         if (p > n) {
@@ -309,37 +288,32 @@ std::vector<BigInteger> SegmentedSieveOfEratosthenes(BigInteger n)
 
         for (size_t k = 3U; k < sqrtIndex; ++k) {
             const BigInteger& p = knownPrimes[k];
-            dispatch.dispatch([&fLo, &low, &cardinality, p, &notPrime]() {
-                // We are skipping multiples of 2.
-                const BigInteger p2 = p << 1U;
-
-                // Find the minimum number in [low..high] that is
-                // a multiple of prime[i] (divisible by prime[i])
-                // For example, if low is 31 and prime[i] is 3,
-                // we start with 33.
-                BigInteger i = (fLo / p) * p;
-                if (i < fLo) {
-                    i += p;
-                }
-                if ((i & 1U) == 0U) {
-                    i += p;
-                }
+            // We are skipping multiples of 2.
+            const BigInteger p2 = p << 1U;
 
-                for (;;) {
-                    const size_t o = backward5(i) - low;
-                    if (o > cardinality) {
-                        return false;
-                    }
-                    if ((i % 3U) && (i % 5U)) {
-                        notPrime[o] = true;
-                    }
-                    i += p2;
-                }
+            // Find the minimum number in [low..high] that is
+            // a multiple of prime[i] (divisible by prime[i])
+            // For example, if low is 31 and prime[i] is 3,
+            // we start with 33.
+            BigInteger i = (fLo / p) * p;
+            if (i < fLo) {
+                i += p;
+            }
+            if ((i & 1U) == 0U) {
+                i += p;
+            }
 
-                return false;
-            });
+            for (;;) {
+                const size_t o = backward5(i) - low;
+                if (o > cardinality) {
+                    break;
+                }
+                if ((i % 3U) && (i % 5U)) {
+                    notPrime[o] = true;
+                }
+                i += p2;
+            }
         }
-        dispatch.finish();
 
         // Numbers which are not marked are prime
         for (size_t o = 1U; o <= cardinality; ++o) {
@@ -387,12 +361,6 @@ BigInteger CountPrimesTo(const BigInteger& n)
     std::unique_ptr<bool[]> uNotPrime(new bool[cardinality + 1U]());
     bool* notPrime = uNotPrime.get();
 
-    // We dispatch multiple marking asynchronously.
-    // If we've already marked all primes up to x,
-    // we're free to continue to up to x * x,
-    // then we synchronize.
-    BigInteger threadBoundary = 36U;
-
     // Get the remaining prime numbers.
     unsigned short wheel5 = 129U;
     unsigned long long wheel7 = 9009416540524545ULL;
@@ -406,63 +374,52 @@ BigInteger CountPrimesTo(const BigInteger& n)
             break;
         }
 
-        if (threadBoundary < p) {
-            dispatch.finish();
-            threadBoundary *= threadBoundary;
-        }
-
         if (notPrime[backward5(p)]) {
             continue;
         }
 
         ++count;
 
-        dispatch.dispatch([&n, p, &notPrime]() {
-            // We are skipping multiples of 2, 3, and 5
-            // for space complexity, for 4/15 the bits.
-            // More are skipped by the wheel for time.
-            const BigInteger p2 = p << 1U;
-            const BigInteger p4 = p << 2U;
-            BigInteger i = p * p;
-
-            // "p" already definitely not a multiple of 3.
-            // Its remainder when divided by 3 can be 1 or 2.
-            // If it is 2, we can do a "half iteration" of the
-            // loop that would handle remainder of 1, and then
-            // we can proceed with the 1 remainder loop.
-            // This saves 2/3 of updates (or modulo).
-            if ((p % 3U) == 2U) {
-                notPrime[backward5(i)] = true;
-                i += p2;
-                if (i > n) {
-                    return false;
-                }
+        // We are skipping multiples of 2, 3, and 5
+        // for space complexity, for 4/15 the bits.
+        // More are skipped by the wheel for time.
+        const BigInteger p2 = p << 1U;
+        const BigInteger p4 = p << 2U;
+        BigInteger i = p * p;
+
+        // "p" already definitely not a multiple of 3.
+        // Its remainder when divided by 3 can be 1 or 2.
+        // If it is 2, we can do a "half iteration" of the
+        // loop that would handle remainder of 1, and then
+        // we can proceed with the 1 remainder loop.
+        // This saves 2/3 of updates (or modulo).
+        if ((p % 3U) == 2U) {
+            notPrime[backward5(i)] = true;
+            i += p2;
+            if (i > n) {
+                continue;
             }
+        }
 
-            for (;;) {
-                if (i % 5U) {
-                    notPrime[backward5(i)] = true;
-                }
-                i += p4;
-                if (i > n) {
-                    return false;
-                }
-
-                if (i % 5U) {
-                    notPrime[backward5(i)] = true;
-                }
-                i += p2;
-                if (i > n) {
-                    return false;
-                }
+        for (;;) {
+            if (i % 5U) {
+                notPrime[backward5(i)] = true;
+            }
+            i += p4;
+            if (i > n) {
+                break;
             }
 
-            return false;
-        });
+            if (i % 5U) {
+                notPrime[backward5(i)] = true;
+            }
+            i += p2;
+            if (i > n) {
+                break;
+            }
+        }
     }
 
-    dispatch.finish();
-
     for (;;) {
         const BigInteger p = forward3(o);
         if (p > n) {
@@ -536,37 +493,32 @@ BigInteger SegmentedCountPrimesTo(BigInteger n)
         // to find primes in current range
         for (size_t k = 3U; k < sqrtIndex; ++k) {
             const BigInteger& p = knownPrimes[k];
-            dispatch.dispatch([&fLo, &low, &cardinality, p, &notPrime]() {
-                // We are skipping multiples of 2.
-                const BigInteger p2 = p << 1U;
-
-                // Find the minimum number in [low..high] that is
-                // a multiple of prime[i] (divisible by prime[i])
-                // For example, if low is 31 and prime[i] is 3,
-                // we start with 33.
-                BigInteger i = (fLo / p) * p;
-                if (i < fLo) {
-                    i += p;
-                }
-                if ((i & 1U) == 0U) {
-                    i += p;
-                }
+            // We are skipping multiples of 2.
+            const BigInteger p2 = p << 1U;
 
-                for (;;) {
-                    const size_t o = backward5(i) - low;
-                    if (o > cardinality) {
-                        return false;
-                    }
-                    if ((i % 3U) && (i % 5U)) {
-                        notPrime[o] = true;
-                    }
-                    i += p2;
-                }
+            // Find the minimum number in [low..high] that is
+            // a multiple of prime[i] (divisible by prime[i])
+            // For example, if low is 31 and prime[i] is 3,
+            // we start with 33.
+            BigInteger i = (fLo / p) * p;
+            if (i < fLo) {
+                i += p;
+            }
+            if ((i & 1U) == 0U) {
+                i += p;
+            }
 
-                return false;
-            });
+            for (;;) {
+                const size_t o = backward5(i) - low;
+                if (o > cardinality) {
+                    break;
+                }
+                if ((i % 3U) && (i % 5U)) {
+                    notPrime[o] = true;
+                }
+                i += p2;
+            }
         }
-        dispatch.finish();
 
         if (knownPrimes.back() >= sqrtnp1) {
             for (size_t o = 1U; o <= cardinality; ++o) {
diff --git a/Eratosthenes/dispatchqueue.cpp b/Eratosthenes/dispatchqueue.cpp
deleted file mode 100644
index aa36cd1..0000000
--- a/Eratosthenes/dispatchqueue.cpp
+++ /dev/null
@@ -1,121 +0,0 @@
-//////////////////////////////////////////////////////////////////////////////////////
-//
-// (C) Daniel Strano and the Qrack contributors 2017-2023. All rights reserved.
-//
-// This is a multithreaded, universal quantum register simulation, allowing
-// (nonphysical) register cloning and direct measurement of probability and
-// phase, to leverage what advantages classical emulation of qubits can have.
-//
-// Licensed under the GNU Lesser General Public License V3.
-// See LICENSE.md in the project root or https://www.gnu.org/licenses/lgpl-3.0.en.html
-// for details.
-
-// From https://github.com/embeddedartistry/embedded-resources/blob/master/examples/cpp/dispatch.cpp
-
-#include "dispatchqueue.hpp"
-
-DispatchQueue::~DispatchQueue()
-{
-    std::unique_lock<std::mutex> lock(lock_);
-
-    if (!isStarted_) {
-        return;
-    }
-
-    std::queue<DispatchFn> empty;
-    std::swap(q_, empty);
-    quit_ = true;
-
-    lock.unlock();
-    cv_.notify_all();
-
-    // Wait for thread to finish before we exit
-    for (size_t i = 0U; i < threads_.size(); ++i) {
-        threads_[i].get();
-    }
-
-    isFinished_ = true;
-    cvFinished_.notify_all();
-}
-
-bool DispatchQueue::finish()
-{
-    std::unique_lock<std::mutex> lock(lock_);
-
-    if (quit_ || !isStarted_) {
-        return false;
-    }
-
-    cvFinished_.wait(lock, [this] { return isFinished_ || quit_; });
-
-    return result;
-}
-
-void DispatchQueue::dump()
-{
-    std::unique_lock<std::mutex> lock(lock_);
-
-    if (quit_ || !isStarted_) {
-        return;
-    }
-
-    std::queue<DispatchFn> empty;
-    std::swap(q_, empty);
-    isFinished_ = true;
-    lock.unlock();
-    cvFinished_.notify_all();
-}
-
-void DispatchQueue::dispatch(const DispatchFn& op)
-{
-    std::unique_lock<std::mutex> lock(lock_);
-
-    if (quit_) {
-        return;
-    }
-
-    q_.push(op);
-    isFinished_ = false;
-    if (!isStarted_) {
-        isStarted_ = true;
-        for (size_t i = 0U; i < threads_.size(); ++i) {
-            threads_[i] = std::async(std::launch::async, [this] { dispatch_thread_handler(); });
-        }
-    }
-
-    // Manual unlocking is done before notifying, to avoid waking up
-    // the waiting thread only to block again (see notify_one for details)
-    lock.unlock();
-    cv_.notify_one();
-}
-
-void DispatchQueue::dispatch_thread_handler(void)
-{
-    std::unique_lock<std::mutex> lock(lock_);
-
-    do {
-        // Wait until we have data or a quit signal
-        cv_.wait(lock, [this] { return (q_.size() || quit_); });
-        // after wait, we own the lock
-
-        if (quit_) {
-            continue;
-        }
-
-        auto op = std::move(q_.front());
-        q_.pop();
-
-        // unlock now that we're done messing with the queue
-        lock.unlock();
-
-        result |= op();
-        quit_ |= result;
-
-        lock.lock();
-
-        if (!q_.size()) {
-            isFinished_ = true;
-            cvFinished_.notify_all();
-        }
-    } while (!quit_);
-}
diff --git a/Eratosthenes/include/dispatchqueue.hpp b/Eratosthenes/include/dispatchqueue.hpp
deleted file mode 100644
index 1ac459b..0000000
--- a/Eratosthenes/include/dispatchqueue.hpp
+++ /dev/null
@@ -1,67 +0,0 @@
-//////////////////////////////////////////////////////////////////////////////////////
-//
-// (C) Daniel Strano and the Qrack contributors 2017-2023. All rights reserved.
-//
-// This is a multithreaded, universal quantum register simulation, allowing
-// (nonphysical) register cloning and direct measurement of probability and
-// phase, to leverage what advantages classical emulation of qubits can have.
-//
-// Licensed under the GNU Lesser General Public License V3.
-// See LICENSE.md in the project root or https://www.gnu.org/licenses/lgpl-3.0.en.html
-// for details.
-
-// From https://github.com/embeddedartistry/embedded-resources/blob/master/examples/cpp/dispatch.cpp
-
-#pragma once
-
-#include <condition_variable>
-#include <functional>
-#include <future>
-#include <mutex>
-#include <queue>
-
-typedef std::function<bool(void)> DispatchFn;
-
-class DispatchQueue {
-public:
-    DispatchQueue(size_t n)
-        : threads_(n)
-        , quit_(false)
-        , isFinished_(true)
-        , isStarted_(false)
-        , result(false)
-    {
-        // Intentionally left blank.
-    }
-    ~DispatchQueue();
-
-    // Reset output result.
-    void resetResult() { result = false; }
-    // dispatch and copy
-    void dispatch(const DispatchFn& op);
-    // finish queue
-    bool finish();
-    // dump queue
-    void dump();
-    // check if queue is finished
-    bool isFinished() { return isFinished_; }
-
-    // Deleted operations
-    DispatchQueue(const DispatchQueue& rhs) = delete;
-    DispatchQueue& operator=(const DispatchQueue& rhs) = delete;
-    DispatchQueue(DispatchQueue&& rhs) = delete;
-    DispatchQueue& operator=(DispatchQueue&& rhs) = delete;
-
-private:
-    std::mutex lock_;
-    std::vector<std::future<void>> threads_;
-    std::queue<DispatchFn> q_;
-    std::condition_variable cv_;
-    std::condition_variable cvFinished_;
-    bool quit_;
-    bool isFinished_;
-    bool isStarted_;
-    bool result;
-
-    void dispatch_thread_handler(void);
-};
diff --git a/setup.py b/setup.py
index 23109a1..9fb4094 100644
--- a/setup.py
+++ b/setup.py
@@ -11,8 +11,8 @@
 ext_modules = [
     Extension(
         '_eratosthenes',
-        ['Eratosthenes/_eratosthenes.cpp', "Eratosthenes/dispatchqueue.cpp"],
-        include_dirs=['Eratosthenes/include', 'pybind11/include'],
+        ['Eratosthenes/_eratosthenes.cpp'],
+        include_dirs=['pybind11/include'],
         language='c++',
         extra_compile_args = cpp_args,
     ),
@@ -20,7 +20,7 @@
 
 setup(
     name='Eratosthenes',
-    version='4.0.8',
+    version='4.0.9',
     author='Dan Strano',
     author_email='dan@unitary.fund',
     description='Fast prime generation for Python based on Sieve of Eratosthenes',