From b0c496c2f6842dc26f59a334bc25f0a86c9601ba Mon Sep 17 00:00:00 2001 From: WrathfulSpatula Date: Fri, 3 Jan 2025 08:02:12 -0500 Subject: [PATCH] Single-threaded --- Eratosthenes/_eratosthenes.cpp | 276 ++++++++++--------------- Eratosthenes/dispatchqueue.cpp | 121 ----------- Eratosthenes/include/dispatchqueue.hpp | 67 ------ setup.py | 6 +- 4 files changed, 117 insertions(+), 353 deletions(-) delete mode 100644 Eratosthenes/dispatchqueue.cpp delete mode 100644 Eratosthenes/include/dispatchqueue.hpp diff --git a/Eratosthenes/_eratosthenes.cpp b/Eratosthenes/_eratosthenes.cpp index f60d6db..5d6fb99 100644 --- a/Eratosthenes/_eratosthenes.cpp +++ b/Eratosthenes/_eratosthenes.cpp @@ -10,8 +10,6 @@ // number under trial. Multiples of 2, 3, 5, 7, and 11 can // be entirely skipped in loop enumeration. -#include "dispatchqueue.hpp" - #include #include #include @@ -25,8 +23,6 @@ namespace qimcifa { -DispatchQueue dispatch(std::thread::hardware_concurrency()); - typedef boost::multiprecision::cpp_int BigInteger; inline BigInteger sqrt(const BigInteger& toTest) @@ -170,12 +166,6 @@ std::vector SieveOfEratosthenes(const BigInteger& n) std::unique_ptr uNotPrime(new bool[cardinality + 1U]()); bool* notPrime = uNotPrime.get(); - // We dispatch multiple marking asynchronously. - // If we've already marked all primes up to x, - // we're free to continue to up to x * x, - // then we synchronize. - BigInteger threadBoundary = 36U; - // Get the remaining prime numbers. unsigned short wheel5 = 129U; unsigned long long wheel7 = 9009416540524545ULL; @@ -188,63 +178,52 @@ std::vector SieveOfEratosthenes(const BigInteger& n) break; } - if (threadBoundary < p) { - dispatch.finish(); - threadBoundary *= threadBoundary; - } - if (notPrime[backward5(p)]) { continue; } knownPrimes.push_back(p); - dispatch.dispatch([&n, p, ¬Prime]() { - // We are skipping multiples of 2, 3, and 5 - // for space complexity, for 4/15 the bits. - // More are skipped by the wheel for time. - const BigInteger p2 = p << 1U; - const BigInteger p4 = p << 2U; - BigInteger i = p * p; - - // "p" already definitely not a multiple of 3. - // Its remainder when divided by 3 can be 1 or 2. - // If it is 2, we can do a "half iteration" of the - // loop that would handle remainder of 1, and then - // we can proceed with the 1 remainder loop. - // This saves 2/3 of updates (or modulo). - if ((p % 3U) == 2U) { - notPrime[backward5(i)] = true; - i += p2; - if (i > n) { - return false; - } + // We are skipping multiples of 2, 3, and 5 + // for space complexity, for 4/15 the bits. + // More are skipped by the wheel for time. + const BigInteger p2 = p << 1U; + const BigInteger p4 = p << 2U; + BigInteger i = p * p; + + // "p" already definitely not a multiple of 3. + // Its remainder when divided by 3 can be 1 or 2. + // If it is 2, we can do a "half iteration" of the + // loop that would handle remainder of 1, and then + // we can proceed with the 1 remainder loop. + // This saves 2/3 of updates (or modulo). + if ((p % 3U) == 2U) { + notPrime[backward5(i)] = true; + i += p2; + if (i > n) { + continue; } + } - for (;;) { - if (i % 5U) { - notPrime[backward5(i)] = true; - } - i += p4; - if (i > n) { - return false; - } - - if (i % 5U) { - notPrime[backward5(i)] = true; - } - i += p2; - if (i > n) { - return false; - } + for (;;) { + if (i % 5U) { + notPrime[backward5(i)] = true; + } + i += p4; + if (i > n) { + break; } - return false; - }); + if (i % 5U) { + notPrime[backward5(i)] = true; + } + i += p2; + if (i > n) { + break; + } + } } - dispatch.finish(); - for (;;) { const BigInteger p = forward3(o); if (p > n) { @@ -309,37 +288,32 @@ std::vector SegmentedSieveOfEratosthenes(BigInteger n) for (size_t k = 3U; k < sqrtIndex; ++k) { const BigInteger& p = knownPrimes[k]; - dispatch.dispatch([&fLo, &low, &cardinality, p, ¬Prime]() { - // We are skipping multiples of 2. - const BigInteger p2 = p << 1U; - - // Find the minimum number in [low..high] that is - // a multiple of prime[i] (divisible by prime[i]) - // For example, if low is 31 and prime[i] is 3, - // we start with 33. - BigInteger i = (fLo / p) * p; - if (i < fLo) { - i += p; - } - if ((i & 1U) == 0U) { - i += p; - } + // We are skipping multiples of 2. + const BigInteger p2 = p << 1U; - for (;;) { - const size_t o = backward5(i) - low; - if (o > cardinality) { - return false; - } - if ((i % 3U) && (i % 5U)) { - notPrime[o] = true; - } - i += p2; - } + // Find the minimum number in [low..high] that is + // a multiple of prime[i] (divisible by prime[i]) + // For example, if low is 31 and prime[i] is 3, + // we start with 33. + BigInteger i = (fLo / p) * p; + if (i < fLo) { + i += p; + } + if ((i & 1U) == 0U) { + i += p; + } - return false; - }); + for (;;) { + const size_t o = backward5(i) - low; + if (o > cardinality) { + break; + } + if ((i % 3U) && (i % 5U)) { + notPrime[o] = true; + } + i += p2; + } } - dispatch.finish(); // Numbers which are not marked are prime for (size_t o = 1U; o <= cardinality; ++o) { @@ -387,12 +361,6 @@ BigInteger CountPrimesTo(const BigInteger& n) std::unique_ptr uNotPrime(new bool[cardinality + 1U]()); bool* notPrime = uNotPrime.get(); - // We dispatch multiple marking asynchronously. - // If we've already marked all primes up to x, - // we're free to continue to up to x * x, - // then we synchronize. - BigInteger threadBoundary = 36U; - // Get the remaining prime numbers. unsigned short wheel5 = 129U; unsigned long long wheel7 = 9009416540524545ULL; @@ -406,63 +374,52 @@ BigInteger CountPrimesTo(const BigInteger& n) break; } - if (threadBoundary < p) { - dispatch.finish(); - threadBoundary *= threadBoundary; - } - if (notPrime[backward5(p)]) { continue; } ++count; - dispatch.dispatch([&n, p, ¬Prime]() { - // We are skipping multiples of 2, 3, and 5 - // for space complexity, for 4/15 the bits. - // More are skipped by the wheel for time. - const BigInteger p2 = p << 1U; - const BigInteger p4 = p << 2U; - BigInteger i = p * p; - - // "p" already definitely not a multiple of 3. - // Its remainder when divided by 3 can be 1 or 2. - // If it is 2, we can do a "half iteration" of the - // loop that would handle remainder of 1, and then - // we can proceed with the 1 remainder loop. - // This saves 2/3 of updates (or modulo). - if ((p % 3U) == 2U) { - notPrime[backward5(i)] = true; - i += p2; - if (i > n) { - return false; - } + // We are skipping multiples of 2, 3, and 5 + // for space complexity, for 4/15 the bits. + // More are skipped by the wheel for time. + const BigInteger p2 = p << 1U; + const BigInteger p4 = p << 2U; + BigInteger i = p * p; + + // "p" already definitely not a multiple of 3. + // Its remainder when divided by 3 can be 1 or 2. + // If it is 2, we can do a "half iteration" of the + // loop that would handle remainder of 1, and then + // we can proceed with the 1 remainder loop. + // This saves 2/3 of updates (or modulo). + if ((p % 3U) == 2U) { + notPrime[backward5(i)] = true; + i += p2; + if (i > n) { + continue; } + } - for (;;) { - if (i % 5U) { - notPrime[backward5(i)] = true; - } - i += p4; - if (i > n) { - return false; - } - - if (i % 5U) { - notPrime[backward5(i)] = true; - } - i += p2; - if (i > n) { - return false; - } + for (;;) { + if (i % 5U) { + notPrime[backward5(i)] = true; + } + i += p4; + if (i > n) { + break; } - return false; - }); + if (i % 5U) { + notPrime[backward5(i)] = true; + } + i += p2; + if (i > n) { + break; + } + } } - dispatch.finish(); - for (;;) { const BigInteger p = forward3(o); if (p > n) { @@ -536,37 +493,32 @@ BigInteger SegmentedCountPrimesTo(BigInteger n) // to find primes in current range for (size_t k = 3U; k < sqrtIndex; ++k) { const BigInteger& p = knownPrimes[k]; - dispatch.dispatch([&fLo, &low, &cardinality, p, ¬Prime]() { - // We are skipping multiples of 2. - const BigInteger p2 = p << 1U; - - // Find the minimum number in [low..high] that is - // a multiple of prime[i] (divisible by prime[i]) - // For example, if low is 31 and prime[i] is 3, - // we start with 33. - BigInteger i = (fLo / p) * p; - if (i < fLo) { - i += p; - } - if ((i & 1U) == 0U) { - i += p; - } + // We are skipping multiples of 2. + const BigInteger p2 = p << 1U; - for (;;) { - const size_t o = backward5(i) - low; - if (o > cardinality) { - return false; - } - if ((i % 3U) && (i % 5U)) { - notPrime[o] = true; - } - i += p2; - } + // Find the minimum number in [low..high] that is + // a multiple of prime[i] (divisible by prime[i]) + // For example, if low is 31 and prime[i] is 3, + // we start with 33. + BigInteger i = (fLo / p) * p; + if (i < fLo) { + i += p; + } + if ((i & 1U) == 0U) { + i += p; + } - return false; - }); + for (;;) { + const size_t o = backward5(i) - low; + if (o > cardinality) { + break; + } + if ((i % 3U) && (i % 5U)) { + notPrime[o] = true; + } + i += p2; + } } - dispatch.finish(); if (knownPrimes.back() >= sqrtnp1) { for (size_t o = 1U; o <= cardinality; ++o) { diff --git a/Eratosthenes/dispatchqueue.cpp b/Eratosthenes/dispatchqueue.cpp deleted file mode 100644 index aa36cd1..0000000 --- a/Eratosthenes/dispatchqueue.cpp +++ /dev/null @@ -1,121 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// -// (C) Daniel Strano and the Qrack contributors 2017-2023. All rights reserved. -// -// This is a multithreaded, universal quantum register simulation, allowing -// (nonphysical) register cloning and direct measurement of probability and -// phase, to leverage what advantages classical emulation of qubits can have. -// -// Licensed under the GNU Lesser General Public License V3. -// See LICENSE.md in the project root or https://www.gnu.org/licenses/lgpl-3.0.en.html -// for details. - -// From https://github.com/embeddedartistry/embedded-resources/blob/master/examples/cpp/dispatch.cpp - -#include "dispatchqueue.hpp" - -DispatchQueue::~DispatchQueue() -{ - std::unique_lock lock(lock_); - - if (!isStarted_) { - return; - } - - std::queue empty; - std::swap(q_, empty); - quit_ = true; - - lock.unlock(); - cv_.notify_all(); - - // Wait for thread to finish before we exit - for (size_t i = 0U; i < threads_.size(); ++i) { - threads_[i].get(); - } - - isFinished_ = true; - cvFinished_.notify_all(); -} - -bool DispatchQueue::finish() -{ - std::unique_lock lock(lock_); - - if (quit_ || !isStarted_) { - return false; - } - - cvFinished_.wait(lock, [this] { return isFinished_ || quit_; }); - - return result; -} - -void DispatchQueue::dump() -{ - std::unique_lock lock(lock_); - - if (quit_ || !isStarted_) { - return; - } - - std::queue empty; - std::swap(q_, empty); - isFinished_ = true; - lock.unlock(); - cvFinished_.notify_all(); -} - -void DispatchQueue::dispatch(const DispatchFn& op) -{ - std::unique_lock lock(lock_); - - if (quit_) { - return; - } - - q_.push(op); - isFinished_ = false; - if (!isStarted_) { - isStarted_ = true; - for (size_t i = 0U; i < threads_.size(); ++i) { - threads_[i] = std::async(std::launch::async, [this] { dispatch_thread_handler(); }); - } - } - - // Manual unlocking is done before notifying, to avoid waking up - // the waiting thread only to block again (see notify_one for details) - lock.unlock(); - cv_.notify_one(); -} - -void DispatchQueue::dispatch_thread_handler(void) -{ - std::unique_lock lock(lock_); - - do { - // Wait until we have data or a quit signal - cv_.wait(lock, [this] { return (q_.size() || quit_); }); - // after wait, we own the lock - - if (quit_) { - continue; - } - - auto op = std::move(q_.front()); - q_.pop(); - - // unlock now that we're done messing with the queue - lock.unlock(); - - result |= op(); - quit_ |= result; - - lock.lock(); - - if (!q_.size()) { - isFinished_ = true; - cvFinished_.notify_all(); - } - } while (!quit_); -} diff --git a/Eratosthenes/include/dispatchqueue.hpp b/Eratosthenes/include/dispatchqueue.hpp deleted file mode 100644 index 1ac459b..0000000 --- a/Eratosthenes/include/dispatchqueue.hpp +++ /dev/null @@ -1,67 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// -// (C) Daniel Strano and the Qrack contributors 2017-2023. All rights reserved. -// -// This is a multithreaded, universal quantum register simulation, allowing -// (nonphysical) register cloning and direct measurement of probability and -// phase, to leverage what advantages classical emulation of qubits can have. -// -// Licensed under the GNU Lesser General Public License V3. -// See LICENSE.md in the project root or https://www.gnu.org/licenses/lgpl-3.0.en.html -// for details. - -// From https://github.com/embeddedartistry/embedded-resources/blob/master/examples/cpp/dispatch.cpp - -#pragma once - -#include -#include -#include -#include -#include - -typedef std::function DispatchFn; - -class DispatchQueue { -public: - DispatchQueue(size_t n) - : threads_(n) - , quit_(false) - , isFinished_(true) - , isStarted_(false) - , result(false) - { - // Intentionally left blank. - } - ~DispatchQueue(); - - // Reset output result. - void resetResult() { result = false; } - // dispatch and copy - void dispatch(const DispatchFn& op); - // finish queue - bool finish(); - // dump queue - void dump(); - // check if queue is finished - bool isFinished() { return isFinished_; } - - // Deleted operations - DispatchQueue(const DispatchQueue& rhs) = delete; - DispatchQueue& operator=(const DispatchQueue& rhs) = delete; - DispatchQueue(DispatchQueue&& rhs) = delete; - DispatchQueue& operator=(DispatchQueue&& rhs) = delete; - -private: - std::mutex lock_; - std::vector> threads_; - std::queue q_; - std::condition_variable cv_; - std::condition_variable cvFinished_; - bool quit_; - bool isFinished_; - bool isStarted_; - bool result; - - void dispatch_thread_handler(void); -}; diff --git a/setup.py b/setup.py index 23109a1..9fb4094 100644 --- a/setup.py +++ b/setup.py @@ -11,8 +11,8 @@ ext_modules = [ Extension( '_eratosthenes', - ['Eratosthenes/_eratosthenes.cpp', "Eratosthenes/dispatchqueue.cpp"], - include_dirs=['Eratosthenes/include', 'pybind11/include'], + ['Eratosthenes/_eratosthenes.cpp'], + include_dirs=['pybind11/include'], language='c++', extra_compile_args = cpp_args, ), @@ -20,7 +20,7 @@ setup( name='Eratosthenes', - version='4.0.8', + version='4.0.9', author='Dan Strano', author_email='dan@unitary.fund', description='Fast prime generation for Python based on Sieve of Eratosthenes',