diff --git a/.gitignore b/.gitignore index dc19cb5..7d3c8ea 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,5 @@ out/ ./out/ massif.* callgrind.* +*.out.* +heaptrack.* diff --git a/CMakeLists.txt b/CMakeLists.txt index 0b4d0fd..705aee8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.25) -project(blt-gp VERSION 0.0.69) +project(blt-gp VERSION 0.0.74) include(CTest) @@ -11,6 +11,9 @@ option(DEBUG_LEVEL "Enable debug features which prints extra information to the set(CMAKE_CXX_STANDARD 17) +set(THREADS_PREFER_PTHREAD_FLAG ON) +find_package(Threads REQUIRED) + add_subdirectory(lib/blt) include_directories(include/) @@ -21,7 +24,7 @@ add_library(blt-gp ${PROJECT_BUILD_FILES}) target_compile_options(blt-gp PRIVATE -Wall -Wextra -Werror -Wpedantic -Wno-comment) target_link_options(blt-gp PRIVATE -Wall -Wextra -Werror -Wpedantic -Wno-comment) -target_link_libraries(blt-gp PRIVATE BLT) +target_link_libraries(blt-gp PRIVATE BLT Threads::Threads) target_compile_definitions(blt-gp PRIVATE BLT_DEBUG_LEVEL=${DEBUG_LEVEL}) if (${ENABLE_ADDRSAN} MATCHES ON) @@ -46,7 +49,7 @@ if (${BUILD_EXAMPLES}) add_executable(${name}-example ${source}) - target_link_libraries(${name}-example PRIVATE BLT blt-gp) + target_link_libraries(${name}-example PRIVATE BLT blt-gp Threads::Threads) target_compile_options(${name}-example PRIVATE -Wall -Wextra -Wpedantic -Wno-comment) target_link_options(${name}-example PRIVATE -Wall -Wextra -Wpedantic -Wno-comment) diff --git a/examples/pg_symbolic_regression.cpp b/examples/pg_symbolic_regression.cpp index 5ecf031..21618ca 100644 --- a/examples/pg_symbolic_regression.cpp +++ b/examples/pg_symbolic_regression.cpp @@ -36,8 +36,8 @@ blt::gp::prog_config_t config = blt::gp::prog_config_t() .set_initial_max_tree_size(6) .set_elite_count(0) .set_max_generations(50) - .set_pop_size(500) - .set_thread_count(1); + .set_pop_size(5000) + .set_thread_count(0); blt::gp::type_provider type_system; blt::gp::gp_program program{type_system, SEED, config}; @@ -83,7 +83,9 @@ float example_function(float x) int main() { + BLT_INFO("Starting BLT-GP Symbolic Regression Example"); BLT_START_INTERVAL("Symbolic Regression", "Main"); + BLT_DEBUG("Setup Fitness cases"); for (auto& fitness_case : fitness_cases) { constexpr float range = 10; @@ -93,6 +95,7 @@ int main() fitness_case = {x, y}; } + BLT_DEBUG("Setup Types and Operators"); type_system.register_type(); blt::gp::operator_builder builder{type_system}; @@ -110,17 +113,24 @@ int main() program.set_operations(builder.build()); + BLT_DEBUG("Generate Initial Population"); program.generate_population(type_system.get_type().id(), fitness_function); + BLT_DEBUG("Begin Generation Loop"); while (!program.should_terminate()) { + BLT_TRACE("------------{Begin Generation %ld}------------", program.get_current_generation()); BLT_START_INTERVAL("Symbolic Regression", "Gen"); program.create_next_generation(blt::gp::select_tournament_t{}, blt::gp::select_tournament_t{}, blt::gp::select_tournament_t{}); BLT_END_INTERVAL("Symbolic Regression", "Gen"); + BLT_TRACE("Move to next generation"); BLT_START_INTERVAL("Symbolic Regression", "Fitness"); program.next_generation(); + BLT_TRACE("Evaluate Fitness"); program.evaluate_fitness(); BLT_END_INTERVAL("Symbolic Regression", "Fitness"); + BLT_TRACE("----------------------------------------------"); + std::cout << std::endl; } BLT_END_INTERVAL("Symbolic Regression", "Main"); diff --git a/include/blt/gp/config.h b/include/blt/gp/config.h index 22e0ddd..46e9616 100644 --- a/include/blt/gp/config.h +++ b/include/blt/gp/config.h @@ -131,6 +131,8 @@ namespace blt::gp prog_config_t& set_thread_count(blt::size_t t) { + if (t == 0) + t = std::thread::hardware_concurrency(); threads = t; //evaluation_size = (population_size / threads) / 2; return *this; diff --git a/include/blt/gp/program.h b/include/blt/gp/program.h index ac788fb..c862f96 100644 --- a/include/blt/gp/program.h +++ b/include/blt/gp/program.h @@ -33,6 +33,8 @@ #include #include #include +#include +#include #include #include @@ -52,6 +54,79 @@ namespace blt::gp { + namespace detail + { + // Author: Kirk Saunders (ks825016@ohio.edu) + // Description: Simple implementation of a thread barrier + // using C++ condition variables. + // Date: 2/17/2020 + + // https://github.com/kirksaunders/barrier/blob/master/barrier.hpp + class barrier + { + public: + // Construct barrier for use with num threads. + explicit barrier(std::atomic_bool& exit_cond, std::size_t num) + : num_threads(num), + wait_count(0), + instance(0), + mut(), + cv(), + exit_cond(exit_cond) + { + if (num == 0) + { + throw std::invalid_argument("Barrier thread count cannot be 0"); + } + } + + // disable copying of barrier + barrier(const barrier&) = delete; + + barrier& operator=(const barrier&) = delete; + + // This function blocks the calling thread until + // all threads (specified by num_threads) have + // called it. Blocking is achieved using a + // call to condition_variable.wait(). + void wait() + { + std::unique_lock lock(mut); // acquire lock + std::size_t inst = instance; // store current instance for comparison + // in predicate + + if (++wait_count == num_threads) + { // all threads reached barrier + wait_count = 0; // reset wait_count + instance++; // increment instance for next use of barrier and to + // pass condition variable predicate + cv.notify_all(); + } else + { // not all threads have reached barrier + cv.wait(lock, [this, &inst]() { return (instance != inst || exit_cond); }); + // NOTE: The predicate lambda here protects against spurious + // wakeups of the thread. As long as this->instance is + // equal to inst, the thread will not wake. + // this->instance will only increment when all threads + // have reached the barrier and are ready to be unblocked. + } + } + + void notify_all() + { + cv.notify_all(); + } + + private: + std::size_t num_threads; // number of threads using barrier + std::size_t wait_count; // counter to keep track of waiting threads + std::size_t instance; // counter to keep track of barrier use count + std::mutex mut; // mutex used to protect resources + std::condition_variable cv; // condition variable used to block threads + std::atomic_bool& exit_cond; // used to signal we should exit + }; + } + struct argc_t { blt::u32 argc = 0; @@ -74,7 +149,7 @@ namespace blt::gp // function to call this operator detail::callable_t function; // function used to transfer values between stacks - detail::transfer_t transfer; + //detail::transfer_t transfer; }; struct operator_storage @@ -125,24 +200,24 @@ namespace blt::gp BLT_ASSERT(info.argc.argc_context - info.argc.argc <= 1 && "Cannot pass multiple context as arguments!"); info.function = op.template make_callable(); - info.transfer = [](std::optional> to, stack_allocator& from) { -#if BLT_DEBUG_LEVEL >= 3 - auto value = from.pop(); - //BLT_TRACE_STREAM << value << "\n"; - if (to){ - to->get().push(value); - } -#else - if (to) - { - to->get().push(from.pop()); - } else - { - from.pop(); - } -#endif - - }; +// info.transfer = [](std::optional> to, stack_allocator& from) { +//#if BLT_DEBUG_LEVEL >= 3 +// auto value = from.pop(); +// //BLT_TRACE_STREAM << value << "\n"; +// if (to){ +// to->get().push(value); +// } +//#else +// if (to) +// { +// to->get().push(from.pop()); +// } else +// { +// from.pop(); +// } +//#endif +// +// }; storage.operators.push_back(info); storage.print_funcs.push_back([](std::ostream& out, stack_allocator& stack) { out << stack.pop(); @@ -285,7 +360,8 @@ namespace blt::gp {*this, root_type, config.population_size, config.initial_min_tree_size, config.initial_max_tree_size}); if (config.threads == 1) { - thread_execution_service = new std::function([this, &fitness_function]() { + BLT_INFO("Starting with single thread variant!"); + thread_execution_service = new std::function([this, &fitness_function](blt::size_t) { for (const auto& ind : blt::enumerate(current_pop.get_individuals())) { fitness_function(ind.second.tree, ind.second.fitness, ind.first); @@ -300,23 +376,24 @@ namespace blt::gp }); } else { - thread_execution_service = new std::function([this, &fitness_function]() { + BLT_INFO("Starting thread execution service!"); + std::scoped_lock lock(thread_helper.thread_function_control); + thread_execution_service = new std::function([this, &fitness_function](blt::size_t) { + thread_helper.barrier.wait(); if (thread_helper.evaluation_left > 0) { - thread_helper.threads_left.fetch_add(1, std::memory_order::memory_order_relaxed); while (thread_helper.evaluation_left > 0) { blt::size_t size = 0; blt::size_t begin = 0; - blt::size_t end = thread_helper.evaluation_left.load(std::memory_order_acquire); + blt::size_t end = thread_helper.evaluation_left.load(std::memory_order_relaxed); do { size = std::min(end, config.evaluation_size); begin = end - size; } while (!thread_helper.evaluation_left.compare_exchange_weak(end, end - size, - std::memory_order::memory_order_release, - std::memory_order::memory_order_acquire)); - + std::memory_order::memory_order_relaxed, + std::memory_order::memory_order_relaxed)); for (blt::size_t i = begin; i < end; i++) { auto& ind = current_pop.get_individuals()[i]; @@ -326,22 +403,22 @@ namespace blt::gp auto old_best = current_stats.best_fitness.load(std::memory_order_relaxed); while (ind.fitness.adjusted_fitness > old_best && !current_stats.best_fitness.compare_exchange_weak(old_best, ind.fitness.adjusted_fitness, - std::memory_order_release, std::memory_order_relaxed)); + std::memory_order_relaxed, std::memory_order_relaxed)); auto old_worst = current_stats.worst_fitness.load(std::memory_order_relaxed); while (ind.fitness.adjusted_fitness < old_worst && !current_stats.worst_fitness.compare_exchange_weak(old_worst, ind.fitness.adjusted_fitness, - std::memory_order_release, std::memory_order_relaxed)); + std::memory_order_relaxed, std::memory_order_relaxed)); auto old_overall = current_stats.overall_fitness.load(std::memory_order_relaxed); while (!current_stats.overall_fitness.compare_exchange_weak(old_overall, ind.fitness.adjusted_fitness + old_overall, - std::memory_order_release, + std::memory_order_relaxed, std::memory_order_relaxed)); } } - thread_helper.threads_left.fetch_sub(1, std::memory_order::memory_order_relaxed); } + thread_helper.barrier.wait(); }); } evaluate_fitness_internal(); @@ -482,6 +559,7 @@ namespace blt::gp ~gp_program() { thread_helper.lifetime_over = true; + thread_helper.barrier.notify_all(); for (auto& thread : thread_helper.threads) { if (thread->joinable()) @@ -507,15 +585,18 @@ namespace blt::gp struct concurrency_storage { std::vector> threads; - //std::mutex evaluation_control; + std::mutex thread_function_control; std::atomic_uint64_t evaluation_left = 0; - std::atomic_int64_t threads_left = 0; std::atomic_bool lifetime_over = false; - } thread_helper; + detail::barrier barrier; + + explicit concurrency_storage(blt::size_t threads): barrier(lifetime_over, threads) + {} + } thread_helper{config.threads}; // for convenience, shouldn't decrease performance too much - std::atomic*> thread_execution_service = nullptr; + std::atomic*> thread_execution_service = nullptr; inline selector_args get_selector_args() { @@ -534,52 +615,11 @@ namespace blt::gp void evaluate_fitness_internal() { current_stats.clear(); - if (config.threads == 1) - { - (*thread_execution_service)(); - } else - { - { - //std::scoped_lock lock(thread_helper.evaluation_control); - thread_helper.evaluation_left.store(current_pop.get_individuals().size(), std::memory_order_release); - } - - //std::cout << "Func" << std::endl; - while (thread_execution_service == nullptr) - std::this_thread::sleep_for(std::chrono::milliseconds(1)); - //std::cout << "Wait" << std::endl; - (*thread_execution_service)(); - //std::cout << "FINSIHED WAITING!!!!!!!! " << thread_helper.threads_left << std::endl; - while (thread_helper.threads_left > 0) - { - //std::cout << thread_helper.threads_left << std::endl; - std::this_thread::sleep_for(std::chrono::milliseconds(1)); - } - //std::cout << "Finished" << std::endl; - } + if (config.threads != 1) + thread_helper.evaluation_left.store(current_pop.get_individuals().size(), std::memory_order_release); + (*thread_execution_service)(0); current_stats.average_fitness = current_stats.overall_fitness / static_cast(config.population_size); - - - /*current_stats = {}; - for (const auto& ind : blt::enumerate(current_pop.get_individuals())) - { - fitness_function(ind.second.tree, ind.second.fitness, ind.first); - if (ind.second.fitness.adjusted_fitness > current_stats.best_fitness) - { - current_stats.best_fitness = ind.second.fitness.adjusted_fitness; - current_stats.best_individual = &ind.second; - } - - if (ind.second.fitness.adjusted_fitness < current_stats.worst_fitness) - { - current_stats.worst_fitness = ind.second.fitness.adjusted_fitness; - current_stats.worst_individual = &ind.second; - } - - current_stats.overall_fitness += ind.second.fitness.adjusted_fitness; - } - current_stats.average_fitness = current_stats.overall_fitness / static_cast(config.population_size);*/ } }; diff --git a/include/blt/gp/stack.h b/include/blt/gp/stack.h index d119694..43cd334 100644 --- a/include/blt/gp/stack.h +++ b/include/blt/gp/stack.h @@ -73,15 +73,14 @@ namespace blt::gp if (head->used_bytes_in_block() < static_cast(aligned_size())) throw std::runtime_error((std::string("Mismatched Types! Not enough space left in block! Bytes: ") += std::to_string( head->used_bytes_in_block()) += " Size: " + std::to_string(sizeof(T))).c_str()); + if (head->used_bytes_in_block() == 0) + move_back(); // make copy T t = *reinterpret_cast(head->metadata.offset - TYPE_SIZE); // call destructor reinterpret_cast(head->metadata.offset - TYPE_SIZE)->~T(); + // move offset back head->metadata.offset -= TYPE_SIZE; - if (head->used_bytes_in_block() == 0) - { - move_back(); - } return t; } @@ -140,9 +139,9 @@ namespace blt::gp if (diff <= 0) { bytes -= head->used_bytes_in_block(); - move_back(); if (diff == 0) break; + move_back(); } else { // otherwise update the offset pointer @@ -164,13 +163,15 @@ namespace blt::gp throw std::runtime_error("This stack is empty!"); if (head->used_bytes_in_block() < static_cast(bytes)) BLT_ABORT("This stack doesn't contain enough data for this type! This is an invalid runtime state!"); + + if (head->used_bytes_in_block() == 0) + move_back(); + auto type_size = aligned_size(bytes); auto ptr = to.allocate_bytes(bytes); to.head->metadata.offset = static_cast(ptr) + type_size; std::memcpy(ptr, head->metadata.offset - type_size, type_size); head->metadata.offset -= type_size; - if (head->used_bytes_in_block() == 0) - move_back(); } template @@ -218,10 +219,7 @@ namespace blt::gp stack_allocator() = default; - // it should be possible to remove the complex copy contrusctor along with trasnfer functions - // simply keep track of the start of the stack, aloing with the current pointer and never dealloacted - // it adds another 8 bytes to each block but should prevent the need for copying when you can just reset the stack. - // (read copy) + // TODO: cleanup this allocator! // if you keep track of type size information you can memcpy between stack allocators as you already only allow trivially copyable types stack_allocator(const stack_allocator& copy) { @@ -401,7 +399,10 @@ namespace blt::gp auto old = head; head = head->metadata.prev; if (head == nullptr) + { head = old; + head->reset(); + } //free_chain(old); // required to prevent silly memory :3 // if (head != nullptr) diff --git a/include/blt/gp/tree.h b/include/blt/gp/tree.h index a1d5d28..6fedd9f 100644 --- a/include/blt/gp/tree.h +++ b/include/blt/gp/tree.h @@ -34,12 +34,16 @@ namespace blt::gp struct op_container_t { - op_container_t(detail::callable_t& func, detail::transfer_t& transfer, operator_id id, bool is_value): - func(func), transfer(transfer), id(id), is_value(is_value) +// op_container_t(detail::callable_t& func, detail::transfer_t& transfer, operator_id id, bool is_value): +// func(func), transfer(transfer), id(id), is_value(is_value) +// {} + op_container_t(detail::callable_t& func, blt::size_t type_size, operator_id id, bool is_value): + func(func), type_size(type_size), id(id), is_value(is_value) {} std::reference_wrapper func; - std::reference_wrapper transfer; + blt::size_t type_size; + //std::reference_wrapper transfer; operator_id id; bool is_value; }; diff --git a/src/generators.cpp b/src/generators.cpp index 5259208..2872460 100644 --- a/src/generators.cpp +++ b/src/generators.cpp @@ -64,7 +64,7 @@ namespace blt::gp tree.get_operations().emplace_back( info.function, - info.transfer, + args.program.get_typesystem().get_type(info.return_type).size(), top.id, args.program.is_static(top.id)); max_depth = std::max(max_depth, top.depth); diff --git a/src/program.cpp b/src/program.cpp index c059de9..42f3f5c 100644 --- a/src/program.cpp +++ b/src/program.cpp @@ -57,11 +57,19 @@ namespace blt::gp // main thread is thread0 for (blt::size_t i = 1; i < config.threads; i++) { - thread_helper.threads.emplace_back(new std::thread([this]() { + thread_helper.threads.emplace_back(new std::thread([i, this]() { + std::function* execution_function = nullptr; while (!should_thread_terminate()) { - if (thread_execution_service != nullptr) - (*thread_execution_service)(); + if (execution_function == nullptr) + { + std::scoped_lock lock(thread_helper.thread_function_control); + if (thread_execution_service != nullptr) + execution_function = thread_execution_service.load(std::memory_order_acquire); + std::cout.flush(); + } + if (execution_function != nullptr) + (*execution_function)(i); std::this_thread::sleep_for(std::chrono::milliseconds(1)); } })); diff --git a/src/transformers.cpp b/src/transformers.cpp index 943ec58..956bb87 100644 --- a/src/transformers.cpp +++ b/src/transformers.cpp @@ -169,7 +169,7 @@ namespace blt::gp for (auto it = c1_ops.end() - 1; it != crossover_point_end_itr - 1; it--) { if (it->is_value) - it->transfer(c1_stack_after_copy, c1_stack_init); + c1_stack_init.transfer_bytes(c1_stack_after_copy, it->type_size); } #if BLT_DEBUG_LEVEL > 1 @@ -179,7 +179,7 @@ namespace blt::gp for (auto it = crossover_point_end_itr - 1; it != crossover_point_begin_itr - 1; it--) { if (it->is_value) - it->transfer(c1_stack_for_copy, c1_stack_init); + c1_stack_init.transfer_bytes(c1_stack_for_copy, it->type_size); } #if BLT_DEBUG_LEVEL > 1 @@ -189,7 +189,7 @@ namespace blt::gp for (auto it = c2_ops.end() - 1; it != found_point_end_itr - 1; it--) { if (it->is_value) - it->transfer(c2_stack_after_copy, c2_stack_init); + c2_stack_init.transfer_bytes(c2_stack_after_copy, it->type_size); } #if BLT_DEBUG_LEVEL > 1 @@ -198,7 +198,7 @@ namespace blt::gp for (auto it = found_point_end_itr - 1; it != found_point_begin_itr - 1; it--) { if (it->is_value) - it->transfer(c2_stack_for_copy, c2_stack_init); + c2_stack_init.transfer_bytes(c2_stack_for_copy, it->type_size); } #if BLT_DEBUG_LEVEL > 1 @@ -208,7 +208,7 @@ namespace blt::gp for (auto it = found_point_begin_itr; it != found_point_end_itr; it++) { if (it->is_value) - it->transfer(c1.get_values(), c2_stack_for_copy); + c2_stack_for_copy.transfer_bytes(c1.get_values(), it->type_size); } #if BLT_DEBUG_LEVEL > 1 @@ -217,7 +217,7 @@ namespace blt::gp for (auto it = crossover_point_begin_itr; it != crossover_point_end_itr; it++) { if (it->is_value) - it->transfer(c2.get_values(), c1_stack_for_copy); + c1_stack_for_copy.transfer_bytes(c2.get_values(), it->type_size); } #if BLT_DEBUG_LEVEL > 1 @@ -227,7 +227,7 @@ namespace blt::gp for (auto it = crossover_point_end_itr; it != c1_ops.end(); it++) { if (it->is_value) - it->transfer(c1.get_values(), c1_stack_after_copy); + c1_stack_after_copy.transfer_bytes(c1.get_values(), it->type_size); } #if BLT_DEBUG_LEVEL > 1 @@ -236,7 +236,7 @@ namespace blt::gp for (auto it = found_point_end_itr; it != c2_ops.end(); it++) { if (it->is_value) - it->transfer(c2.get_values(), c2_stack_after_copy); + c2_stack_after_copy.transfer_bytes(c2.get_values(), it->type_size); } // now swap the operators @@ -288,7 +288,7 @@ namespace blt::gp { if (it->is_value) { - it->transfer(after_stack, vals); + vals.transfer_bytes(after_stack, it->type_size); //after_ops.push_back(*it); } } @@ -296,7 +296,7 @@ namespace blt::gp for (auto it = end_p - 1; it != begin_p - 1; it--) { if (it->is_value) - it->transfer(std::optional>{}, vals); + vals.pop_bytes(static_cast(it->type_size)); } auto before = begin_p - 1; @@ -313,7 +313,7 @@ namespace blt::gp for (const auto& op : new_ops) { if (op.is_value) - op.transfer(vals, new_vals); + new_vals.transfer_bytes(vals, op.type_size); } auto new_end_point = point + new_ops.size(); @@ -322,7 +322,7 @@ namespace blt::gp for (auto it = new_end_p; it != ops.end(); it++) { if (it->is_value) - it->transfer(vals, after_stack); + after_stack.transfer_bytes(vals, it->type_size); } return c; diff --git a/src/tree.cpp b/src/tree.cpp index 035ddc2..b6b8c12 100644 --- a/src/tree.cpp +++ b/src/tree.cpp @@ -42,11 +42,11 @@ namespace blt::gp operations_stack.pop_back(); if (operation.is_value) { - operation.transfer(values_process, value_stack); + value_stack.transfer_bytes(values_process, operation.type_size); continue; } operation.func(context, values_process, value_stack); - operations_stack.emplace_back(empty_callable, operation.transfer, operation.id, true); + operations_stack.emplace_back(empty_callable, operation.type_size, operation.id, true); } return results; @@ -88,7 +88,7 @@ namespace blt::gp for (const auto& v : operations) { if (v.is_value) - v.transfer(reversed, copy); + copy.transfer_bytes(reversed, v.type_size); } } for (const auto& v : operations) @@ -187,7 +187,7 @@ namespace blt::gp values_process.pop_back(); } value_stack.push_back(local_depth + 1); - operations_stack.emplace_back(empty_callable, operation.transfer, operation.id, true); + operations_stack.emplace_back(empty_callable, operation.type_size, operation.id, true); } return depth;