From b6d249b23e04b61476eef78115085331d6807cef Mon Sep 17 00:00:00 2001 From: Brett Laptop Date: Fri, 12 Jul 2024 18:33:39 -0400 Subject: [PATCH] replace lock with CXE loop --- CMakeLists.txt | 4 +- ...example.cpp => pg_symbolic_regression.cpp} | 2 +- include/blt/gp/program.h | 114 +++++++++--------- src/program.cpp | 43 ------- test_perf.sh | 1 + 5 files changed, 63 insertions(+), 101 deletions(-) rename examples/{gp_symbolic_regression_example.cpp => pg_symbolic_regression.cpp} (99%) create mode 100755 test_perf.sh diff --git a/CMakeLists.txt b/CMakeLists.txt index 1e1075a..fef4c80 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.25) -project(blt-gp VERSION 0.0.64) +project(blt-gp VERSION 0.0.65) include(CTest) @@ -79,6 +79,6 @@ if (${BUILD_EXAMPLES}) blt_add_example(blt-gp5 examples/gp_test_5.cpp) blt_add_example(blt-gp6 examples/gp_test_6.cpp) blt_add_example(blt-gp7 examples/gp_test_7.cpp) - blt_add_example(blt-symbolic-regression examples/gp_symbolic_regression_example.cpp) + blt_add_example(blt-SR-playground examples/pg_symbolic_regression.cpp) endif () \ No newline at end of file diff --git a/examples/gp_symbolic_regression_example.cpp b/examples/pg_symbolic_regression.cpp similarity index 99% rename from examples/gp_symbolic_regression_example.cpp rename to examples/pg_symbolic_regression.cpp index 787cf41..df5424c 100644 --- a/examples/gp_symbolic_regression_example.cpp +++ b/examples/pg_symbolic_regression.cpp @@ -36,7 +36,7 @@ blt::gp::prog_config_t config = blt::gp::prog_config_t() .set_initial_max_tree_size(6) .set_elite_count(0) .set_max_generations(50) - .set_pop_size(500) + .set_pop_size(5000) .set_thread_count(0); blt::gp::type_provider type_system; diff --git a/include/blt/gp/program.h b/include/blt/gp/program.h index c5c94d4..7977938 100644 --- a/include/blt/gp/program.h +++ b/include/blt/gp/program.h @@ -283,54 +283,60 @@ namespace blt::gp { current_pop = config.pop_initializer.get().generate( {*this, root_type, config.population_size, config.initial_min_tree_size, config.initial_max_tree_size}); - thread_execution_service = new std::function([this, &fitness_function]() { - if (thread_helper.evaluation_left > 0) - { - std::cout << "Thread Incrementing " << thread_helper.threads_left << std::endl; - auto old_value_start = thread_helper.threads_left.load(std::memory_order::memory_order_acquire); - while (!thread_helper.threads_left.compare_exchange_weak(old_value_start, old_value_start + 1, std::memory_order_release, - std::memory_order_relaxed)); - std::cout << "Thread beginning " << thread_helper.threads_left << std::endl; - while (thread_helper.evaluation_left > 0) + if (config.threads == 1) + { + thread_execution_service = new std::function([this, &fitness_function]() { + if (thread_helper.evaluation_left > 0) { - blt::size_t begin = 0; - blt::size_t end = 0; - { - std::scoped_lock lock(thread_helper.evaluation_control); - end = thread_helper.evaluation_left; - auto size = std::min(thread_helper.evaluation_left.load(), config.evaluation_size); - begin = thread_helper.evaluation_left - size; - thread_helper.evaluation_left -= size; - } - //std::cout << "Processing " << begin << " to " << end << " with " << thread_helper.evaluation_left << " left" << std::endl; - for (blt::size_t i = begin; i < end; i++) - { - auto& ind = current_pop.get_individuals()[i]; - - fitness_function(ind.tree, ind.fitness, i); - - auto old_best = current_stats.best_fitness.load(); - while (ind.fitness.adjusted_fitness > old_best && - !current_stats.best_fitness.compare_exchange_weak(old_best, ind.fitness.adjusted_fitness, - std::memory_order_release, std::memory_order_relaxed)); - - auto old_worst = current_stats.worst_fitness.load(); - while (ind.fitness.adjusted_fitness < old_worst && - !current_stats.worst_fitness.compare_exchange_weak(old_worst, ind.fitness.adjusted_fitness, - std::memory_order_release, std::memory_order_relaxed)); - - auto old_overall = current_stats.overall_fitness.load(); - while (!current_stats.overall_fitness.compare_exchange_weak(old_overall, ind.fitness.adjusted_fitness + old_overall, - std::memory_order_release, std::memory_order_relaxed)); - } + } - std::cout << "Thread Decrementing " << thread_helper.threads_left << std::endl; - auto old_value = thread_helper.threads_left.load(std::memory_order::memory_order_acquire); - while (!thread_helper.threads_left.compare_exchange_weak(old_value, old_value - 1, std::memory_order_release, - std::memory_order_relaxed)); - std::cout << "Thread Ending " << thread_helper.threads_left << std::endl; - } - }); + }); + } else + { + thread_execution_service = new std::function([this, &fitness_function]() { + if (thread_helper.evaluation_left > 0) + { + thread_helper.threads_left.fetch_add(1, std::memory_order::memory_order_relaxed); + while (thread_helper.evaluation_left > 0) + { + blt::size_t begin = 0; + blt::size_t end = thread_helper.evaluation_left.load(std::memory_order_acquire); + blt::size_t size = 0; + do + { + size = std::min(end, config.evaluation_size); + begin = end - size; + } while (!thread_helper.evaluation_left.compare_exchange_weak(end, end - size, + std::memory_order::memory_order_release, + std::memory_order::memory_order_acquire)); + + for (blt::size_t i = begin; i < end; i++) + { + auto& ind = current_pop.get_individuals()[i]; + + fitness_function(ind.tree, ind.fitness, i); + + auto old_best = current_stats.best_fitness.load(std::memory_order_relaxed); + while (ind.fitness.adjusted_fitness > old_best && + !current_stats.best_fitness.compare_exchange_weak(old_best, ind.fitness.adjusted_fitness, + std::memory_order_release, std::memory_order_relaxed)); + + auto old_worst = current_stats.worst_fitness.load(std::memory_order_relaxed); + while (ind.fitness.adjusted_fitness < old_worst && + !current_stats.worst_fitness.compare_exchange_weak(old_worst, ind.fitness.adjusted_fitness, + std::memory_order_release, std::memory_order_relaxed)); + + auto old_overall = current_stats.overall_fitness.load(std::memory_order_relaxed); + while (!current_stats.overall_fitness.compare_exchange_weak(old_overall, + ind.fitness.adjusted_fitness + old_overall, + std::memory_order_release, + std::memory_order_relaxed)); + } + } + thread_helper.threads_left.fetch_sub(1, std::memory_order::memory_order_relaxed); + } + }); + } evaluate_fitness_internal(); } @@ -494,7 +500,7 @@ namespace blt::gp struct concurrency_storage { std::vector> threads; - std::mutex evaluation_control; + //std::mutex evaluation_control; std::atomic_uint64_t evaluation_left = 0; std::atomic_int64_t threads_left = 0; @@ -518,8 +524,6 @@ namespace blt::gp void create_threads(); - void execute_thread(); - void evaluate_fitness_internal() { current_stats.clear(); @@ -529,22 +533,22 @@ namespace blt::gp } else { { - std::scoped_lock lock(thread_helper.evaluation_control); - thread_helper.evaluation_left = current_pop.get_individuals().size(); + //std::scoped_lock lock(thread_helper.evaluation_control); + thread_helper.evaluation_left.store(current_pop.get_individuals().size(), std::memory_order_release); } - std::cout << "Func" << std::endl; + //std::cout << "Func" << std::endl; while (thread_execution_service == nullptr) std::this_thread::sleep_for(std::chrono::milliseconds(1)); - std::cout << "Wait" << std::endl; + //std::cout << "Wait" << std::endl; (*thread_execution_service)(); - std::cout << "FINSIHED WAITING!!!!!!!! " << thread_helper.threads_left << std::endl; + //std::cout << "FINSIHED WAITING!!!!!!!! " << thread_helper.threads_left << std::endl; while (thread_helper.threads_left > 0) { //std::cout << thread_helper.threads_left << std::endl; std::this_thread::sleep_for(std::chrono::milliseconds(1)); } - std::cout << "Finished" << std::endl; + //std::cout << "Finished" << std::endl; } current_stats.average_fitness = current_stats.overall_fitness / static_cast(config.population_size); diff --git a/src/program.cpp b/src/program.cpp index 3384569..c059de9 100644 --- a/src/program.cpp +++ b/src/program.cpp @@ -67,47 +67,4 @@ namespace blt::gp })); } } - - void gp_program::execute_thread() - { - if (thread_helper.evaluation_left > 0) - { - std::cout << "Thread beginning" << std::endl; - while (thread_helper.evaluation_left > 0) - { - blt::size_t begin = 0; - blt::size_t end = 0; - { - std::scoped_lock lock(thread_helper.evaluation_control); - end = thread_helper.evaluation_left; - auto size = std::min(thread_helper.evaluation_left.load(), config.evaluation_size); - begin = thread_helper.evaluation_left - size; - thread_helper.evaluation_left -= size; - } - std::cout << "Processing " << begin << " to " << end << " with " << thread_helper.evaluation_left << " left" << std::endl; - for (blt::size_t i = begin; i < end; i++) - { - auto& ind = current_pop.get_individuals()[i]; - - //evaluate_fitness_func(ind.tree, ind.fitness, i); - - auto old_best = current_stats.best_fitness.load(); - while (ind.fitness.adjusted_fitness > old_best && - !current_stats.best_fitness.compare_exchange_weak(old_best, ind.fitness.adjusted_fitness, - std::memory_order_release, std::memory_order_relaxed)); - - auto old_worst = current_stats.worst_fitness.load(); - while (ind.fitness.adjusted_fitness < old_worst && - !current_stats.worst_fitness.compare_exchange_weak(old_worst, ind.fitness.adjusted_fitness, - std::memory_order_release, std::memory_order_relaxed)); - - auto old_overall = current_stats.overall_fitness.load(); - while (!current_stats.overall_fitness.compare_exchange_weak(old_overall, ind.fitness.adjusted_fitness + old_overall, - std::memory_order_release, std::memory_order_relaxed)); - } - } - thread_helper.threads_left--; - std::cout << "thread finished!" << std::endl; - } - } } \ No newline at end of file diff --git a/test_perf.sh b/test_perf.sh new file mode 100755 index 0000000..075b123 --- /dev/null +++ b/test_perf.sh @@ -0,0 +1 @@ +perf stat -d -d -d -r 30 -e branches,branch-misses,cache-misses,cache-references,cycles,instructions,alignment-faults,cgroup-switches,faults,duration_time,user_time,system_time,L1-dcache-loads,L1-dcache-load-misses,L1-dcache-prefetches,L1-icache-loads,L1-icache-load-misses,dTLB-loads,dTLB-load-misses,iTLB-loads,iTLB-load-misses,l2_request_g1.all_no_prefetch ./cmake-build-release/blt-SR-playground-example