diff --git a/CMakeLists.txt b/CMakeLists.txt index d89e6e5..fbd2149 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.25) -project(blt-gp VERSION 0.0.75) +project(blt-gp VERSION 0.0.76) include(CTest) diff --git a/include/blt/gp/program.h b/include/blt/gp/program.h index c862f96..bcfb92d 100644 --- a/include/blt/gp/program.h +++ b/include/blt/gp/program.h @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -54,79 +55,6 @@ namespace blt::gp { - namespace detail - { - // Author: Kirk Saunders (ks825016@ohio.edu) - // Description: Simple implementation of a thread barrier - // using C++ condition variables. - // Date: 2/17/2020 - - // https://github.com/kirksaunders/barrier/blob/master/barrier.hpp - class barrier - { - public: - // Construct barrier for use with num threads. - explicit barrier(std::atomic_bool& exit_cond, std::size_t num) - : num_threads(num), - wait_count(0), - instance(0), - mut(), - cv(), - exit_cond(exit_cond) - { - if (num == 0) - { - throw std::invalid_argument("Barrier thread count cannot be 0"); - } - } - - // disable copying of barrier - barrier(const barrier&) = delete; - - barrier& operator=(const barrier&) = delete; - - // This function blocks the calling thread until - // all threads (specified by num_threads) have - // called it. Blocking is achieved using a - // call to condition_variable.wait(). - void wait() - { - std::unique_lock lock(mut); // acquire lock - std::size_t inst = instance; // store current instance for comparison - // in predicate - - if (++wait_count == num_threads) - { // all threads reached barrier - wait_count = 0; // reset wait_count - instance++; // increment instance for next use of barrier and to - // pass condition variable predicate - cv.notify_all(); - } else - { // not all threads have reached barrier - cv.wait(lock, [this, &inst]() { return (instance != inst || exit_cond); }); - // NOTE: The predicate lambda here protects against spurious - // wakeups of the thread. As long as this->instance is - // equal to inst, the thread will not wake. - // this->instance will only increment when all threads - // have reached the barrier and are ready to be unblocked. - } - } - - void notify_all() - { - cv.notify_all(); - } - - private: - std::size_t num_threads; // number of threads using barrier - std::size_t wait_count; // counter to keep track of waiting threads - std::size_t instance; // counter to keep track of barrier use count - std::mutex mut; // mutex used to protect resources - std::condition_variable cv; // condition variable used to block threads - std::atomic_bool& exit_cond; // used to signal we should exit - }; - } - struct argc_t { blt::u32 argc = 0; @@ -420,6 +348,7 @@ namespace blt::gp } thread_helper.barrier.wait(); }); + thread_helper.thread_function_condition.notify_all(); } evaluate_fitness_internal(); } @@ -585,13 +514,16 @@ namespace blt::gp struct concurrency_storage { std::vector> threads; + std::mutex thread_function_control; + std::condition_variable thread_function_condition {}; + std::atomic_uint64_t evaluation_left = 0; std::atomic_bool lifetime_over = false; - detail::barrier barrier; + blt::barrier barrier; - explicit concurrency_storage(blt::size_t threads): barrier(lifetime_over, threads) + explicit concurrency_storage(blt::size_t threads): barrier(threads, lifetime_over) {} } thread_helper{config.threads}; diff --git a/include/blt/gp/selection.h b/include/blt/gp/selection.h index 6b4e4ea..ec3ce87 100644 --- a/include/blt/gp/selection.h +++ b/include/blt/gp/selection.h @@ -75,10 +75,7 @@ namespace blt::gp } for (blt::size_t i = 0; i < config.elites; i++) - { -// BLT_DEBUG("%lf at %ld", values[i].second, values[i].first); next_pop.get_individuals().push_back(current_pop.get_individuals()[values[i].first]); - } } while (next_pop.get_individuals().size() < config.population_size) diff --git a/src/program.cpp b/src/program.cpp index 42f3f5c..48ba60d 100644 --- a/src/program.cpp +++ b/src/program.cpp @@ -63,14 +63,12 @@ namespace blt::gp { if (execution_function == nullptr) { - std::scoped_lock lock(thread_helper.thread_function_control); - if (thread_execution_service != nullptr) - execution_function = thread_execution_service.load(std::memory_order_acquire); - std::cout.flush(); + std::unique_lock lock(thread_helper.thread_function_control); + thread_helper.thread_function_condition.wait(lock, [this]() { return thread_execution_service != nullptr; }); + execution_function = thread_execution_service.load(std::memory_order_acquire); } if (execution_function != nullptr) (*execution_function)(i); - std::this_thread::sleep_for(std::chrono::milliseconds(1)); } })); } diff --git a/test_perf_clang.sh b/test_perf_clang.sh new file mode 100755 index 0000000..6cf0e9e --- /dev/null +++ b/test_perf_clang.sh @@ -0,0 +1 @@ +perf stat -d -d -d -r 30 -e branches,branch-misses,cache-misses,cache-references,cycles,instructions,alignment-faults,cgroup-switches,faults,duration_time,user_time,system_time,L1-dcache-loads,L1-dcache-load-misses,L1-dcache-prefetches,L1-icache-loads,L1-icache-load-misses,dTLB-loads,dTLB-load-misses,iTLB-loads,iTLB-load-misses,l2_request_g1.all_no_prefetch,page-faults,page-faults:u,page-faults:k ./cmake-build-release-clang/blt-SR-playground-example