diff --git a/src/benchmark.cc b/src/benchmark.cc index fc36fedb1..19129e4ca 100644 --- a/src/benchmark.cc +++ b/src/benchmark.cc @@ -141,6 +141,9 @@ BM_DEFINE_bool(benchmark_counters_tabular, false); // information about libpfm: https://man7.org/linux/man-pages/man3/libpfm.3.html BM_DEFINE_string(benchmark_perf_counters, ""); +// Setting this to false allows measuring only the main benchmark thread. +BM_DEFINE_bool(benchmark_perf_counters_inherit, true); + // Extra context to include in the output formatted as comma-separated key-value // pairs. Kept internal as it's only used for parsing from env/command line. BM_DEFINE_kvpairs(benchmark_context, {}); @@ -424,7 +427,8 @@ void RunBenchmarks(const std::vector& benchmarks, // This perfcounters object needs to be created before the runners vector // below so it outlasts their lifetime. PerfCountersMeasurement perfcounters( - StrSplit(FLAGS_benchmark_perf_counters, ',')); + StrSplit(FLAGS_benchmark_perf_counters, ','), + FLAGS_benchmark_perf_counters_inherit); // Vector of benchmarks to run std::vector runners; @@ -453,7 +457,8 @@ void RunBenchmarks(const std::vector& benchmarks, // The use of performance counters with threads would be unintuitive for // the average user so we need to warn them about this case - if ((benchmarks_with_threads > 0) && (perfcounters.num_counters() > 0)) { + if ((benchmarks_with_threads > 0) && (perfcounters.num_counters() > 0) && + FLAGS_benchmark_perf_counters_inherit) { GetErrorLogInstance() << "***WARNING*** There are " << benchmarks_with_threads << " benchmarks with threads and " << perfcounters.num_counters() @@ -770,6 +775,8 @@ void ParseCommandLineFlags(int* argc, char** argv) { &FLAGS_benchmark_report_aggregates_only) || ParseBoolFlag(argv[i], "benchmark_display_aggregates_only", &FLAGS_benchmark_display_aggregates_only) || + ParseBoolFlag(argv[i], "benchmark_perf_counters_inherit", + &FLAGS_benchmark_perf_counters_inherit) || ParseStringFlag(argv[i], "benchmark_format", &FLAGS_benchmark_format) || ParseStringFlag(argv[i], "benchmark_out", &FLAGS_benchmark_out) || ParseStringFlag(argv[i], "benchmark_out_format", @@ -891,6 +898,7 @@ void PrintDefaultHelp() { " [--benchmark_counters_tabular={true|false}]\n" #if defined HAVE_LIBPFM " [--benchmark_perf_counters=,...]\n" + " [--benchmark_perf_counters_inherit={true|false}]\n" // <--- Add this #endif " [--benchmark_context==,...]\n" " [--benchmark_time_unit={ns|us|ms|s}]\n" diff --git a/src/perf_counters.cc b/src/perf_counters.cc index e6f220921..fe21b67ed 100644 --- a/src/perf_counters.cc +++ b/src/perf_counters.cc @@ -135,7 +135,7 @@ static std::vector GetPMUTypesForEvent(const perf_event_attr& attr) { } PerfCounters PerfCounters::Create( - const std::vector& counter_names) { + const std::vector& counter_names, bool inherit) { if (!counter_names.empty()) { Initialize(); } @@ -202,8 +202,9 @@ PerfCounters PerfCounters::Create( // Note: the man page for perf_event_create suggests inherit = true and // read_format = PERF_FORMAT_GROUP don't work together, but that's not the // case. + attr.disabled = is_first; - attr.inherit = true; + attr.inherit = inherit; attr.pinned = is_first; attr.exclude_kernel = true; attr.exclude_user = false; @@ -311,10 +312,11 @@ bool PerfCounters::Initialize() { return false; } bool PerfCounters::IsCounterSupported(const std::string&) { return false; } PerfCounters PerfCounters::Create( - const std::vector& counter_names) { + const std::vector& counter_names, bool inherit) { if (!counter_names.empty()) { GetErrorLogInstance() << "Performance counters not supported.\n"; } + (void)inherit; // This just tells the compiler to ignore the variable return NoCounters(); } @@ -322,9 +324,9 @@ void PerfCounters::CloseCounters() const {} #endif // defined HAVE_LIBPFM PerfCountersMeasurement::PerfCountersMeasurement( - const std::vector& counter_names) + const std::vector& counter_names, bool inherit) : start_values_(counter_names.size()), end_values_(counter_names.size()) { - counters_ = PerfCounters::Create(counter_names); + counters_ = PerfCounters::Create(counter_names, inherit); } PerfCounters& PerfCounters::operator=(PerfCounters&& other) noexcept { diff --git a/src/perf_counters.h b/src/perf_counters.h index 4e4534431..2d3f09827 100644 --- a/src/perf_counters.h +++ b/src/perf_counters.h @@ -113,7 +113,7 @@ class BENCHMARK_EXPORT PerfCounters final { // implementation and OS specific. // In case of failure, this method will in the worst case return an // empty object whose state will still be valid. - static PerfCounters Create(const std::vector& counter_names); + static PerfCounters Create(const std::vector& counter_names, bool inherit = true); // Take a snapshot of the current value of the counters into the provided // valid PerfCounterValues storage. The values are populated such that: @@ -148,7 +148,7 @@ class BENCHMARK_EXPORT PerfCounters final { // Typical usage of the above primitives. class BENCHMARK_EXPORT PerfCountersMeasurement final { public: - PerfCountersMeasurement(const std::vector& counter_names); + PerfCountersMeasurement(const std::vector& counter_names, bool inherit); size_t num_counters() const { return counters_.num_counters(); }