/* * Created by Brett on 20/09/23. * Licensed under GNU General Public License V3.0 * See LICENSE file for license detail */ #include #include #include #include #include #include #include namespace blt { /** * General profiler functions * -------------------------- */ #define SORT_INTERVALS_FUNC_MACRO(use_history, TYPE_END, TYPE_START, TYPE_TOTAL) \ [&use_history](const interval_t* a, const interval_t* b) -> bool { \ /*if (!use_history){ \ auto a_diff = a->TYPE_END - a->TYPE_START; \ auto b_diff = b->TYPE_END - b->TYPE_START; \ return a_diff > b_diff; \ } else { \ */return a->TYPE_TOTAL < b->TYPE_TOTAL; \ /*}*/ \ } #define INTERVAL_DIFFERENCE_MACRO(printHistory, interval) \ auto wall = printHistory \ ? (static_cast(interval->wall_total) / static_cast(interval->count)) \ : static_cast(interval->wall_total); \ \ auto thread = printHistory \ ? (static_cast(interval->thread_total) / static_cast(interval->count)) \ : (static_cast(interval->thread_total)); \ \ auto cycles = printHistory \ ? ((interval->cycles_total) / (interval->count)) \ : (interval->cycles_total); enum class unit { MS, NS, S }; struct unit_container { unit thread; unit wall; }; interval_t* createInterval(profile_t& profiler, std::string interval_name) { auto interval = new interval_t( blt::system::getCurrentTimeNanoseconds(), 0, 0, blt::system::getCPUThreadTime(), 0, 0, blt::system::rdtsc(), 0, 0, 0, std::move(interval_name)); profiler.intervals.push_back(interval); return interval; } void startInterval(interval_t* interval) { interval->wall_start = blt::system::getCurrentTimeNanoseconds(); interval->thread_start = blt::system::getCPUThreadTime(); interval->cycles_start = blt::system::rdtsc(); } void endInterval(interval_t* interval) { interval->cycles_end = blt::system::rdtsc(); interval->wall_end = blt::system::getCurrentTimeNanoseconds(); interval->thread_end = blt::system::getCPUThreadTime(); interval->cycles_total += interval->cycles_end - interval->cycles_start; interval->wall_total += interval->wall_end - interval->wall_start; interval->thread_total += interval->thread_end - interval->thread_start; interval->count++; } void clearProfile(profile_t& profiler) { for (auto* p : profiler.intervals) delete p; for (auto* p : profiler.cycle_intervals) delete p; profiler.intervals.clear(); profiler.cycle_intervals.clear(); } void writeProfile(std::ifstream&, const profile_t&) { BLT_WARN("Write profile for V2 is currently a TODO"); } void sort_intervals(std::vector& intervals, sort_by sort, bool use_history) { std::function sort_func; switch (sort) { case sort_by::CYCLES: sort_func = SORT_INTERVALS_FUNC_MACRO(use_history, cycles_start, cycles_end, cycles_total); break; case sort_by::WALL: sort_func = SORT_INTERVALS_FUNC_MACRO(use_history, wall_start, wall_end, wall_total); break; case sort_by::THREAD: sort_func = SORT_INTERVALS_FUNC_MACRO(use_history, thread_start, thread_end, thread_total); break; } std::sort(intervals.begin(), intervals.end(), sort_func); } unit_container determine_max_unit(const std::vector& intervals, bool use_history) { // start at lowest unit unit_container container{unit::NS, unit::NS}; // last one will always be the longest, at least according to the sort order // which should at least be close. auto interval = intervals[intervals.size() - 1]; INTERVAL_DIFFERENCE_MACRO(use_history, interval); (void)(cycles); // make sure is in range + that we don't convert Seconds back to MS // we want to use the largest unit possible to keep the numbers small // we might run into a case where something took 1000ns but other thing took 100s // which would be hard to deal with either way. So TODO if (wall > 1e6 && wall < 1e9 && container.wall == unit::NS) container.wall = unit::MS; else if (wall > 1e9) container.wall = unit::S; if (thread > 1e6 && thread < 1e9 && container.thread == unit::NS) container.thread = unit::MS; else if (thread > 1e9) container.thread = unit::S; return container; } inline void println(const std::vector&& lines, logging::log_level level) { for (const auto& line : lines) BLT_LOG_STREAM(level) << line << "\n"; } void printProfile(profile_t& profiler, std::uint32_t flags, sort_by sort, blt::logging::log_level log_level) { bool printHistory = flags & AVERAGE_HISTORY; bool printCycles = flags & PRINT_CYCLES; bool printThread = flags & PRINT_THREAD; bool printWall = flags & PRINT_WALL; sort_intervals(profiler.intervals, sort, printHistory); auto units = determine_max_unit(profiler.intervals, printHistory); std::string thread_unit_string = units.thread == unit::MS ? "ms" : units.thread == unit::NS ? "ns" : "s"; std::string wall_unit_string = units.wall == unit::MS ? "ms" : units.wall == unit::NS ? "ns" : "s"; auto thread_unit_divide = units.thread == unit::MS ? 1e6 : units.thread == unit::NS ? 1 : 1e9; auto wall_unit_divide = units.wall == unit::MS ? 1e6 : units.wall == unit::NS ? 1 : 1e9; string::TableFormatter formatter{profiler.name}; formatter.addColumn("Order"); if (printHistory) formatter.addColumn("Count"); formatter.addColumn("Interval"); if (printCycles) formatter.addColumn("Cycles"); if (printThread) formatter.addColumn("CPU Time (" + thread_unit_string += ")"); if (printWall) formatter.addColumn("Wall Time (" + wall_unit_string += ")"); for (size_t i = 0; i < profiler.intervals.size(); i++) { blt::string::TableRow row; auto interval = profiler.intervals[i]; if (interval->count == 0) continue; INTERVAL_DIFFERENCE_MACRO(printHistory, interval); row.rowValues.push_back(std::to_string(i + 1)); if (printHistory) row.rowValues.push_back(std::to_string(interval->count)); row.rowValues.push_back(interval->interval_name); if (printCycles) row.rowValues.push_back(blt::string::withGrouping(cycles)); if (printThread) row.rowValues.push_back(std::to_string(thread / static_cast(thread_unit_divide))); if (printWall) row.rowValues.push_back(std::to_string(wall / static_cast(wall_unit_divide))); formatter.addRow(row); } println(formatter.createTable(true, true), log_level); } profile_t::~profile_t() { blt::clearProfile(*this); } /** * profiler V1 partial backwards compat * ---------------------------- */ hashmap_t> profiles; void _internal::startInterval(const std::string& profile_name, const std::string& interval_name) { auto& profile = profiles[profile_name]; if (profile.find(interval_name) == profile.end()) { auto interval = new interval_t(); interval->interval_name = interval_name; profile[interval_name] = interval; } blt::startInterval(profile[interval_name]); } void _internal::endInterval(const std::string& profile_name, const std::string& interval_name) { blt::endInterval(profiles[profile_name].at(interval_name)); } void _internal::writeProfile(std::ifstream& stream, const std::string& profile_name) { auto& pref = profiles[profile_name]; profile_t profile{profile_name}; for (const auto& i : pref) profile.intervals.push_back(i.second); blt::writeProfile(stream, profile); profiles.erase(profile_name); } void _internal::printProfile(const std::string& profile_name, std::uint32_t flags, sort_by sort, blt::logging::log_level log_level) { auto& pref = profiles[profile_name]; profile_t profile{profile_name}; for (const auto& i : pref) profile.intervals.push_back(i.second); blt::printProfile(profile, flags, sort, log_level); profiles.erase(profile_name); } interval_t::interval_t(pf_time_t wallStart, pf_time_t wallEnd, pf_time_t wallTotal, pf_time_t threadStart, pf_time_t threadEnd, pf_time_t threadTotal, pf_cycle_t cyclesStart, pf_cycle_t cyclesEnd, pf_cycle_t cyclesTotal, uint64_t count, std::string intervalName): wall_start(wallStart), wall_end(wallEnd), wall_total(wallTotal), thread_start(threadStart), thread_end(threadEnd), thread_total(threadTotal), cycles_start(cyclesStart), cycles_end(cyclesEnd), cycles_total(cyclesTotal), count(count), interval_name(std::move(intervalName)) {} }