diff --git a/include/blt/profiling/profiler_v2.h b/include/blt/profiling/profiler_v2.h index ce49c95..4efa824 100644 --- a/include/blt/profiling/profiler_v2.h +++ b/include/blt/profiling/profiler_v2.h @@ -12,41 +12,117 @@ #include #include #include +#include namespace blt { + // use the historical values (average) instead of the latest values + static inline constexpr std::uint32_t PRINT_HISTORY = 0x1; + // print out the cycles + static inline constexpr std::uint32_t PRINT_CYCLES = 0x2; + // print out the wall time + static inline constexpr std::uint32_t PRINT_WALL = 0x4; + // print out the thread CPU time + static inline constexpr std::uint32_t PRINT_THREAD = 0x8; + + enum class sort_by + { + CYCLES, + WALL, + THREAD + }; + // 32 bit currently not supported typedef std::int64_t pf_time_t; + typedef std::uint64_t pf_cycle_t; struct interval_t { - pf_time_t start = 0; - pf_time_t end = 0; - pf_time_t total = 0; - pf_time_t count = 0; + pf_time_t wall_start = 0; + pf_time_t wall_end = 0; + pf_time_t wall_total = 0; + + pf_time_t thread_start = 0; + pf_time_t thread_end = 0; + pf_time_t thread_total = 0; + + pf_cycle_t cycles_start = 0; + pf_cycle_t cycles_end = 0; + pf_cycle_t cycles_total = 0; + + std::uint64_t count = 0; + std::string interval_name; + }; + + struct cycle_interval_t + { + pf_cycle_t cycles_start = 0; + pf_cycle_t cycles_end = 0; + pf_cycle_t cycles_total = 0; + std::uint64_t count = 0; + std::string interval_name; }; struct profile_t { - std::vector intervals; + std::vector intervals; + std::vector cycle_intervals; + std::string name; + + explicit profile_t(std::string name): name(std::move(name)) + {} + + profile_t(const profile_t& p) = delete; + + profile_t& operator=(const profile_t& p) = delete; + + ~profile_t(); }; - void startInterval(interval_t& interval); + interval_t* createInterval(profile_t& profiler, std::string interval_name); + void startInterval(interval_t* interval); - void endInterval(profile_t& profiler, interval_t& interval); + void endInterval(interval_t* interval); - void printProfile(const profile_t& profiler); + void printProfile(profile_t& profiler, std::uint32_t flags = PRINT_HISTORY | PRINT_CYCLES | PRINT_THREAD | PRINT_WALL, + sort_by sort = sort_by::CYCLES, blt::logging::log_level log_level = blt::logging::log_level::NONE); void writeProfile(std::ifstream& stream, const profile_t& profiler); + void clearProfile(profile_t& profiler); + namespace _internal { void startInterval(const std::string& profile_name, const std::string& interval_name); + void endInterval(const std::string& profile_name, const std::string& interval_name); + void printProfile(const std::string& profile_name); + void writeProfile(const std::string& profile_name); } + class auto_interval + { + private: + interval_t* iv; + public: + auto_interval(std::string interval_name, profile_t& profiler) + { + iv = blt::createInterval(profiler, std::move(interval_name)); + blt::startInterval(iv); + } + + auto_interval(const auto_interval& i) = delete; + + auto_interval& operator=(const auto_interval& i) = delete; + + ~auto_interval() + { + blt::endInterval(iv); + } + }; + } #endif //BLT_PROFILER_V2_H diff --git a/src/blt/profiling/profiler_v2.cpp b/src/blt/profiling/profiler_v2.cpp index 72616a4..1e42be4 100644 --- a/src/blt/profiling/profiler_v2.cpp +++ b/src/blt/profiling/profiler_v2.cpp @@ -4,8 +4,204 @@ * See LICENSE file for license detail */ #include +#include +#include +#include +#include namespace blt { +#define SORT_INTERVALS_FUNC_MACRO(use_history, TYPE_END, TYPE_START, TYPE_TOTAL) \ + [&use_history](const interval_t* a, const interval_t* b) -> bool { \ + if (use_history){ \ + auto a_diff = a->TYPE_END - a->TYPE_START; \ + auto b_diff = b->TYPE_END - b->TYPE_START; \ + return a_diff > b_diff; \ + } else { \ + return a->TYPE_TOTAL > b->TYPE_TOTAL; \ + } \ + } + +#define INTERVAL_DIFFERENCE_MACRO(printHistory, interval) \ + auto wall = printHistory \ + ? (static_cast(interval->wall_total) / static_cast(interval->count)) \ + : static_cast(interval->wall_end - interval->wall_start); \ + \ + auto thread = printHistory \ + ? (static_cast(interval->thread_total) / static_cast(interval->count)) \ + : static_cast(interval->thread_end - interval->thread_start); \ + \ + auto cycles = printHistory \ + ? (static_cast(interval->cycles_total) / static_cast(interval->count)) \ + : static_cast(interval->cycles_end - interval->cycles_start); + + enum class unit + { + MS, NS, S + }; + + struct unit_container + { + unit thread; + unit wall; + }; + + interval_t* createInterval(profile_t& profiler, std::string interval_name) + { + auto interval = new interval_t( + blt::system::getCurrentTimeNanoseconds(), 0, 0, + blt::system::getCPUThreadTime(), 0, 0, + blt::system::rdtsc(), 0, 0, + 0, std::move(interval_name)); + profiler.intervals.push_back(interval); + return interval; + } + + void startInterval(interval_t* interval) + { + interval->wall_start = blt::system::getCurrentTimeNanoseconds(); + interval->thread_start = blt::system::getCPUThreadTime(); + interval->cycles_start = blt::system::rdtsc(); + } + + void endInterval(interval_t* interval) + { + interval->cycles_end = blt::system::rdtsc(); + interval->wall_end = blt::system::getCurrentTimeNanoseconds(); + interval->thread_end = blt::system::getCPUThreadTime(); + + interval->cycles_total += interval->cycles_end - interval->cycles_start; + interval->wall_total += interval->wall_end - interval->wall_start; + interval->thread_total += interval->thread_end - interval->thread_start; + + interval->count++; + } + + void clearProfile(profile_t& profiler) + { + for (auto* p : profiler.intervals) + delete p; + for (auto* p : profiler.cycle_intervals) + delete p; + profiler.intervals.clear(); + profiler.cycle_intervals.clear(); + } + + void writeProfile(std::ifstream&, const profile_t&) + { + BLT_WARN("Write profile for V2 is currently a TODO"); + } + + void sort_intervals(std::vector& intervals, sort_by sort, bool use_history) + { + std::function sort_func; + switch (sort) + { + case sort_by::CYCLES: + BLT_DEBUG("Cycles"); + sort_func = SORT_INTERVALS_FUNC_MACRO(use_history, cycles_start, cycles_end, cycles_total); + break; + case sort_by::WALL: + BLT_TRACE("WaLL"); + sort_func = SORT_INTERVALS_FUNC_MACRO(use_history, wall_start, wall_end, wall_total); + break; + case sort_by::THREAD: + BLT_INFO("Thread!"); + sort_func = SORT_INTERVALS_FUNC_MACRO(use_history, thread_start, thread_end, thread_total); + break; + } + std::sort(intervals.begin(), intervals.end(), sort_func); + } + + unit_container determine_max_unit(const std::vector& intervals, bool use_history) + { + // start at lowest unit + unit_container container{unit::NS, unit::NS}; + // last one will always be the longest, at least according to the sort order + // which should at least be close. + auto interval = intervals[intervals.size() - 1]; + INTERVAL_DIFFERENCE_MACRO(use_history, interval); + (void)(cycles); + // make sure is in range + that we don't convert Seconds back to MS + // we want to use the largest unit possible to keep the numbers small + // we might run into a case where something took 1000ns but other thing took 100s + // which would be hard to deal with either way. So TODO + if (wall > 1e5 && wall <= 1e8 && container.wall == unit::NS) + container.wall = unit::MS; + else if (wall > 1e8) + container.wall = unit::S; + + if (thread > 1e5 && thread <= 1e8 && container.thread == unit::NS) + container.thread = unit::MS; + else if (thread > 1e8) + container.thread = unit::S; + return container; + } + + inline void println(const std::vector&& lines, logging::log_level level) { + for (const auto& line : lines) + BLT_LOG_STREAM(level) << line << "\n"; + } + + void printProfile(profile_t& profiler, std::uint32_t flags, sort_by sort, blt::logging::log_level log_level) + { + bool printHistory = flags & PRINT_HISTORY; + bool printCycles = flags & PRINT_CYCLES; + bool printThread = flags & PRINT_THREAD; + bool printWall = flags & PRINT_WALL; + + sort_intervals(profiler.intervals, sort, printHistory); + + auto units = determine_max_unit(profiler.intervals, printHistory); + std::string thread_unit_string = units.thread == unit::MS ? "ms" : units.thread == unit::NS ? "ns" : "s"; + std::string wall_unit_string = units.wall == unit::MS ? "ms" : units.wall == unit::NS ? "ns" : "s"; + auto thread_unit_divide = units.thread == unit::MS ? 1e6 : units.thread == unit::NS ? 1 : 1e9; + auto wall_unit_divide = units.wall == unit::MS ? 1e6 : units.wall == unit::NS ? 1 : 1e9; + + string::TableFormatter formatter{profiler.name}; + formatter.addColumn({"Order"}); + if (printHistory) + formatter.addColumn({"Count"}); + formatter.addColumn({"Interval"}); + if (printCycles) + formatter.addColumn({"Cycles"}); + if (printThread) + formatter.addColumn({"CPU Time (" + thread_unit_string += ")"}); + if (printWall) + formatter.addColumn({"Wall Time (" + wall_unit_string += ")"}); + + for (size_t i = 0; i < profiler.intervals.size(); i++) + { + blt::string::TableRow row; + auto interval = profiler.intervals[i]; + + if (interval->count == 0) + continue; + + INTERVAL_DIFFERENCE_MACRO(printHistory, interval); + + row.rowValues.push_back(std::to_string(i + 1)); + if (printHistory) + row.rowValues.push_back(std::to_string(interval->count)); + row.rowValues.push_back(interval->interval_name); + if (printCycles) + row.rowValues.push_back(std::to_string(cycles)); + if (printThread) + row.rowValues.push_back(std::to_string(thread / static_cast(thread_unit_divide))); + if (printWall) + row.rowValues.push_back(std::to_string(wall / static_cast(wall_unit_divide))); + formatter.addRow(row); + } + + println(formatter.createTable(true, true), log_level); + } + + profile_t::~profile_t() + { + for (auto* p : intervals) + delete p; + for (auto* p : cycle_intervals) + delete p; + } } \ No newline at end of file