diff --git a/CMakeLists.txt b/CMakeLists.txt index 65e3368..81a0333 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.25) -project(blt-gp VERSION 0.1.41) +project(blt-gp VERSION 0.1.42) include(CTest) @@ -16,7 +16,7 @@ set(CMAKE_CXX_STANDARD 17) set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) -#SET(CMAKE_CXX_FLAGS_RELEASE "-O3 -g") +SET(CMAKE_CXX_FLAGS_RELEASE "-O3 -g") if (NOT TARGET BLT) add_subdirectory(lib/blt) diff --git a/examples/symbolic_regression.cpp b/examples/symbolic_regression.cpp index ce3206a..66fc8d9 100644 --- a/examples/symbolic_regression.cpp +++ b/examples/symbolic_regression.cpp @@ -140,20 +140,27 @@ int main() BLT_PRINT_PROFILE("Symbolic Regression", blt::PRINT_CYCLES | blt::PRINT_THREAD | blt::PRINT_WALL); #ifdef BLT_TRACK_ALLOCATIONS - BLT_TRACE("Total Allocations: %ld times with a total of %s", blt::gp::tracker.getAllocations(), - blt::byte_convert_t(blt::gp::tracker.getAllocatedBytes()).convert_to_nearest_type().to_pretty_string().c_str()); + BLT_TRACE("Total Allocations: %ld times with a total of %s, peak allocated bytes %s", blt::gp::tracker.getAllocations(), + blt::byte_convert_t(blt::gp::tracker.getAllocatedBytes()).convert_to_nearest_type().to_pretty_string().c_str(), + blt::byte_convert_t(blt::gp::tracker.getPeakAllocatedBytes()).convert_to_nearest_type().to_pretty_string().c_str()); auto crossover_calls_v = blt::gp::crossover_calls.get_calls(); auto crossover_allocations_v = blt::gp::crossover_allocations.get_calls(); auto mutation_calls_v = blt::gp::mutation_calls.get_calls(); auto mutation_allocations_v = blt::gp::mutation_allocations.get_calls(); auto reproduction_calls_v = blt::gp::reproduction_calls.get_calls(); auto reproduction_allocations_v = blt::gp::reproduction_allocations.get_calls(); - BLT_TRACE("Total Crossover Calls: %ld", crossover_calls_v); - BLT_TRACE("Total Mutation Calls: %ld", mutation_calls_v); - BLT_TRACE("Total Reproduction Calls: %ld", reproduction_calls_v); - BLT_TRACE("Total Crossover Allocations: %ld Bytes %s", crossover_allocations_v, blt::byte_convert_t(blt::gp::crossover_allocations.get_value()).convert_to_nearest_type().to_pretty_string().c_str()); - BLT_TRACE("Total Mutation Allocations: %ld Bytes %s", mutation_allocations_v, blt::byte_convert_t(blt::gp::mutation_allocations.get_value()).convert_to_nearest_type().to_pretty_string().c_str()); - BLT_TRACE("Total Reproduction Allocations: %ld Bytes %s", reproduction_allocations_v, blt::byte_convert_t(blt::gp::reproduction_allocations.get_value()).convert_to_nearest_type().to_pretty_string().c_str()); + BLT_TRACE("Total Crossover Calls: %ld Peak Bytes Allocated %s", crossover_calls_v, + blt::byte_convert_t(blt::gp::crossover_calls.get_value()).convert_to_nearest_type().to_pretty_string().c_str()); + BLT_TRACE("Total Mutation Calls: %ld Peak Bytes Allocated %s", mutation_calls_v, + blt::byte_convert_t(blt::gp::mutation_calls.get_value()).convert_to_nearest_type().to_pretty_string().c_str()); + BLT_TRACE("Total Reproduction Calls: %ld Peak Bytes Allocated %s", reproduction_calls_v, + blt::byte_convert_t(blt::gp::reproduction_calls.get_value()).convert_to_nearest_type().to_pretty_string().c_str()); + BLT_TRACE("Total Crossover Allocations: %ld Bytes %s", crossover_allocations_v, + blt::byte_convert_t(blt::gp::crossover_allocations.get_value()).convert_to_nearest_type().to_pretty_string().c_str()); + BLT_TRACE("Total Mutation Allocations: %ld Bytes %s", mutation_allocations_v, + blt::byte_convert_t(blt::gp::mutation_allocations.get_value()).convert_to_nearest_type().to_pretty_string().c_str()); + BLT_TRACE("Total Reproduction Allocations: %ld Bytes %s", reproduction_allocations_v, + blt::byte_convert_t(blt::gp::reproduction_allocations.get_value()).convert_to_nearest_type().to_pretty_string().c_str()); BLT_TRACE("Percent Crossover calls allocate? %lf%%", static_cast(crossover_allocations_v) / static_cast(crossover_calls_v == 0 ? 1 : crossover_calls_v) * 100); BLT_TRACE("Percent Mutation calls allocate? %lf%%", diff --git a/include/blt/gp/selection.h b/include/blt/gp/selection.h index 029e29b..38c1f81 100644 --- a/include/blt/gp/selection.h +++ b/include/blt/gp/selection.h @@ -102,6 +102,7 @@ namespace blt::gp #ifdef BLT_TRACK_ALLOCATIONS tracker.stop_measurement_thread_local(state); crossover_calls.call(); + crossover_calls.set_value(std::max(crossover_calls.get_value(), state.getAllocatedByteDifference())); if (state.getAllocatedByteDifference() != 0) { crossover_allocations.call(state.getAllocatedByteDifference()); @@ -125,6 +126,7 @@ namespace blt::gp #ifdef BLT_TRACK_ALLOCATIONS tracker.stop_measurement_thread_local(state); mutation_calls.call(); + mutation_calls.set_value(std::max(mutation_calls.get_value(), state.getAllocatedByteDifference())); if (state.getAllocationDifference() != 0) { mutation_allocations.call(state.getAllocatedByteDifference()); @@ -144,6 +146,7 @@ namespace blt::gp #ifdef BLT_TRACK_ALLOCATIONS tracker.stop_measurement_thread_local(state); reproduction_calls.call(); + reproduction_calls.set_value(std::max(reproduction_calls.get_value(), state.getAllocatedByteDifference())); if (state.getAllocationDifference() != 0) { reproduction_allocations.call(state.getAllocatedByteDifference()); diff --git a/include/blt/gp/stats.h b/include/blt/gp/stats.h index 4a88012..d08800b 100644 --- a/include/blt/gp/stats.h +++ b/include/blt/gp/stats.h @@ -27,6 +27,7 @@ #include #include #include +#include namespace blt::gp { @@ -72,6 +73,9 @@ namespace blt::gp blt::hashmap_t> deallocations; blt::hashmap_t> allocated_bytes; blt::hashmap_t> deallocated_bytes; + + std::mutex mutex; + std::condition_variable var; }; struct allocation_data_t @@ -111,17 +115,39 @@ namespace blt::gp void reserve() { - std::scoped_lock lock(mutex); - tl.allocations[std::this_thread::get_id()] = std::make_unique(); - tl.deallocations[std::this_thread::get_id()] = std::make_unique(); - tl.allocated_bytes[std::this_thread::get_id()] = std::make_unique(); - tl.deallocated_bytes[std::this_thread::get_id()] = std::make_unique(); + { + std::scoped_lock lock(tl.mutex); + tl.allocations.insert({std::this_thread::get_id(), std::make_unique()}); + tl.deallocations.insert({std::this_thread::get_id(), std::make_unique()}); + tl.allocated_bytes.insert({std::this_thread::get_id(), std::make_unique()}); + tl.deallocated_bytes.insert({std::this_thread::get_id(), std::make_unique()}); + } + tl.var.notify_all(); + } + + blt::size_t reserved_threads() + { + return tl.allocations.size(); + } + + void await_completion(blt::u64 required_threads) + { + std::unique_lock lock(tl.mutex); + tl.var.wait(lock, [this, required_threads]() { + return reserved_threads() == required_threads; + }); } void allocate(blt::size_t bytes) { allocations++; allocated_bytes += bytes; + + auto diff = getCurrentlyAllocatedBytes(); + auto atomic_val = peak_allocated_bytes.load(std::memory_order_relaxed); + while (diff > atomic_val && + !peak_allocated_bytes.compare_exchange_weak(atomic_val, diff, std::memory_order_relaxed, std::memory_order_relaxed)); + add_map(tl.allocations, 1); add_map(tl.allocated_bytes, bytes); } @@ -164,6 +190,11 @@ namespace blt::gp return getAllocatedBytes() - getDeallocatedBytes(); } + [[nodiscard]] blt::u64 getPeakAllocatedBytes() const + { + return peak_allocated_bytes; + } + allocation_tracker_t::tl_t& get_thread_local() { return tl; @@ -230,7 +261,7 @@ namespace blt::gp std::atomic_uint64_t allocated_bytes = 0; std::atomic_uint64_t deallocated_bytes = 0; - std::mutex mutex; + std::atomic_uint64_t peak_allocated_bytes = 0; }; class call_tracker_t @@ -259,6 +290,11 @@ namespace blt::gp secondary_value += value; } + void set_value(blt::u64 value) + { + secondary_value = value; + } + void call() { primary_calls++; diff --git a/src/program.cpp b/src/program.cpp index ce96d53..e8032a9 100644 --- a/src/program.cpp +++ b/src/program.cpp @@ -69,6 +69,7 @@ namespace blt::gp thread_helper.threads.emplace_back(new std::thread([i, this]() { #ifdef BLT_TRACK_ALLOCATIONS tracker.reserve(); + tracker.await_completion(config.threads); #endif std::function* execution_function = nullptr; while (!should_thread_terminate()) @@ -89,5 +90,8 @@ namespace blt::gp } })); } +#ifdef BLT_TRACK_ALLOCATIONS + tracker.await_completion(config.threads); +#endif } } \ No newline at end of file