intel/ds: allow user to select metric set at start time
Rather than using always the same metric set, let the user choose when starting the producer with : INTEL_PERFETTO_METRIC_SET=RasterizerAndPixelBackend ./build/src/tool/pps/pps-producer Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Rohan Garg <rohan.garg@intel.com> Acked-by: Antonio Caggiano <antonio.caggiano@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13996>
This commit is contained in:

committed by
Marge Bot

parent
69df00b33b
commit
6eb554a9c7
@@ -154,6 +154,13 @@ Another option to enable access wide data without root permissions would be runn
|
|||||||
|
|
||||||
Alternatively using the ``CAP_PERFMON`` permission on the binary should work too.
|
Alternatively using the ``CAP_PERFMON`` permission on the binary should work too.
|
||||||
|
|
||||||
|
A particular metric set can also be selected to capture a different
|
||||||
|
set of HW counters :
|
||||||
|
|
||||||
|
.. code-block:: console
|
||||||
|
|
||||||
|
INTEL_PERFETTO_METRIC_SET=RasterizerAndPixelBackend ./build/src/tool/pps/pps-producer
|
||||||
|
|
||||||
Panfrost
|
Panfrost
|
||||||
^^^^^^^^
|
^^^^^^^^
|
||||||
|
|
||||||
|
@@ -58,38 +58,17 @@ IntelDriver::~IntelDriver()
|
|||||||
void IntelDriver::enable_counter(uint32_t counter_id)
|
void IntelDriver::enable_counter(uint32_t counter_id)
|
||||||
{
|
{
|
||||||
auto &counter = counters[counter_id];
|
auto &counter = counters[counter_id];
|
||||||
auto &group = groups[counter.group];
|
|
||||||
if (perf->query) {
|
|
||||||
if (perf->query->symbol_name != group.name) {
|
|
||||||
PPS_LOG_ERROR(
|
|
||||||
"Unable to enable metrics from different sets: %u "
|
|
||||||
"belongs to %s but %s is currently in use.",
|
|
||||||
counter_id,
|
|
||||||
perf->query->symbol_name,
|
|
||||||
group.name.c_str());
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
enabled_counters.emplace_back(counter);
|
enabled_counters.emplace_back(counter);
|
||||||
if (!perf->query) {
|
|
||||||
perf->query = perf->find_query_by_name(group.name);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void IntelDriver::enable_all_counters()
|
void IntelDriver::enable_all_counters()
|
||||||
{
|
{
|
||||||
// We can only enable one metric set at a time so at least enable one.
|
// We should only have one group
|
||||||
for (auto &group : groups) {
|
assert(groups.size() == 1);
|
||||||
if (group.name == "RenderBasic") {
|
for (uint32_t counter_id : groups[0].counters) {
|
||||||
for (uint32_t counter_id : group.counters) {
|
auto &counter = counters[counter_id];
|
||||||
auto &counter = counters[counter_id];
|
enabled_counters.emplace_back(counter);
|
||||||
enabled_counters.emplace_back(counter);
|
|
||||||
}
|
|
||||||
|
|
||||||
perf->query = perf->find_query_by_name(group.name);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -99,49 +78,76 @@ bool IntelDriver::init_perfcnt()
|
|||||||
|
|
||||||
perf = std::make_unique<IntelPerf>(drm_device.fd);
|
perf = std::make_unique<IntelPerf>(drm_device.fd);
|
||||||
|
|
||||||
|
const char *metric_set_name = getenv("INTEL_PERFETTO_METRIC_SET");
|
||||||
|
|
||||||
|
struct intel_perf_query_info *default_query = nullptr;
|
||||||
|
selected_query = nullptr;
|
||||||
for (auto &query : perf->get_queries()) {
|
for (auto &query : perf->get_queries()) {
|
||||||
// Create group
|
if (!strcmp(query->symbol_name, "RenderBasic"))
|
||||||
CounterGroup group = {};
|
default_query = query;
|
||||||
group.id = groups.size();
|
if (metric_set_name && !strcmp(query->symbol_name, metric_set_name))
|
||||||
group.name = query->symbol_name;
|
selected_query = query;
|
||||||
|
|
||||||
for (int i = 0; i < query->n_counters; ++i) {
|
|
||||||
intel_perf_query_counter &counter = query->counters[i];
|
|
||||||
|
|
||||||
// Create counter
|
|
||||||
Counter counter_desc = {};
|
|
||||||
counter_desc.id = counters.size();
|
|
||||||
counter_desc.name = counter.symbol_name;
|
|
||||||
counter_desc.group = group.id;
|
|
||||||
counter_desc.getter = [counter, query, this](
|
|
||||||
const Counter &c, const Driver &dri) -> Counter::Value {
|
|
||||||
switch (counter.data_type) {
|
|
||||||
case INTEL_PERF_COUNTER_DATA_TYPE_UINT64:
|
|
||||||
case INTEL_PERF_COUNTER_DATA_TYPE_UINT32:
|
|
||||||
case INTEL_PERF_COUNTER_DATA_TYPE_BOOL32:
|
|
||||||
return (int64_t)counter.oa_counter_read_uint64(perf->cfg, query, &perf->result);
|
|
||||||
break;
|
|
||||||
case INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE:
|
|
||||||
case INTEL_PERF_COUNTER_DATA_TYPE_FLOAT:
|
|
||||||
return counter.oa_counter_read_float(perf->cfg, query, &perf->result);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
return {};
|
|
||||||
};
|
|
||||||
|
|
||||||
// Add counter id to the group
|
|
||||||
group.counters.emplace_back(counter_desc.id);
|
|
||||||
|
|
||||||
// Store counter
|
|
||||||
counters.emplace_back(std::move(counter_desc));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Store group
|
|
||||||
groups.emplace_back(std::move(group));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(groups.size() && "Failed to query groups");
|
assert(default_query);
|
||||||
|
|
||||||
|
if (!selected_query) {
|
||||||
|
if (metric_set_name) {
|
||||||
|
PPS_LOG_ERROR("Available metric sets:");
|
||||||
|
for (auto &query : perf->get_queries())
|
||||||
|
PPS_LOG_ERROR(" %s", query->symbol_name);
|
||||||
|
PPS_LOG_FATAL("Metric set '%s' not available.", metric_set_name);
|
||||||
|
}
|
||||||
|
selected_query = default_query;
|
||||||
|
}
|
||||||
|
|
||||||
|
PPS_LOG("Using metric set '%s': %s",
|
||||||
|
selected_query->symbol_name, selected_query->name);
|
||||||
|
|
||||||
|
// Create group
|
||||||
|
CounterGroup group = {};
|
||||||
|
group.id = groups.size();
|
||||||
|
group.name = selected_query->symbol_name;
|
||||||
|
|
||||||
|
for (int i = 0; i < selected_query->n_counters; ++i) {
|
||||||
|
intel_perf_query_counter &counter = selected_query->counters[i];
|
||||||
|
|
||||||
|
// Create counter
|
||||||
|
Counter counter_desc = {};
|
||||||
|
counter_desc.id = counters.size();
|
||||||
|
counter_desc.name = counter.symbol_name;
|
||||||
|
counter_desc.group = group.id;
|
||||||
|
counter_desc.getter = [counter, this](
|
||||||
|
const Counter &c, const Driver &dri) -> Counter::Value {
|
||||||
|
switch (counter.data_type) {
|
||||||
|
case INTEL_PERF_COUNTER_DATA_TYPE_UINT64:
|
||||||
|
case INTEL_PERF_COUNTER_DATA_TYPE_UINT32:
|
||||||
|
case INTEL_PERF_COUNTER_DATA_TYPE_BOOL32:
|
||||||
|
return (int64_t)counter.oa_counter_read_uint64(perf->cfg,
|
||||||
|
selected_query,
|
||||||
|
&perf->result);
|
||||||
|
break;
|
||||||
|
case INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE:
|
||||||
|
case INTEL_PERF_COUNTER_DATA_TYPE_FLOAT:
|
||||||
|
return counter.oa_counter_read_float(perf->cfg,
|
||||||
|
selected_query,
|
||||||
|
&perf->result);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return {};
|
||||||
|
};
|
||||||
|
|
||||||
|
// Add counter id to the group
|
||||||
|
group.counters.emplace_back(counter_desc.id);
|
||||||
|
|
||||||
|
// Store counter
|
||||||
|
counters.emplace_back(std::move(counter_desc));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Store group
|
||||||
|
groups.emplace_back(std::move(group));
|
||||||
|
|
||||||
assert(counters.size() && "Failed to query counters");
|
assert(counters.size() && "Failed to query counters");
|
||||||
|
|
||||||
// Clear accumulations
|
// Clear accumulations
|
||||||
@@ -154,7 +160,7 @@ void IntelDriver::enable_perfcnt(uint64_t sampling_period_ns)
|
|||||||
{
|
{
|
||||||
this->sampling_period_ns = sampling_period_ns;
|
this->sampling_period_ns = sampling_period_ns;
|
||||||
|
|
||||||
if (!perf->open(sampling_period_ns)) {
|
if (!perf->open(sampling_period_ns, selected_query)) {
|
||||||
PPS_LOG_FATAL("Failed to open intel perf");
|
PPS_LOG_FATAL("Failed to open intel perf");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -197,7 +203,7 @@ std::vector<PerfRecord> IntelDriver::parse_perf_records(const std::vector<uint8_
|
|||||||
// Report is next to the header
|
// Report is next to the header
|
||||||
const uint32_t *report = reinterpret_cast<const uint32_t *>(header + 1);
|
const uint32_t *report = reinterpret_cast<const uint32_t *>(header + 1);
|
||||||
uint64_t gpu_timestamp_ldw =
|
uint64_t gpu_timestamp_ldw =
|
||||||
intel_perf_report_timestamp(&perf->query.value(), report);
|
intel_perf_report_timestamp(selected_query, report);
|
||||||
|
|
||||||
/* Our HW only provides us with the lower 32 bits of the 36bits
|
/* Our HW only provides us with the lower 32 bits of the 36bits
|
||||||
* timestamp counter value. If we haven't captured the top bits yet,
|
* timestamp counter value. If we haven't captured the top bits yet,
|
||||||
@@ -292,11 +298,11 @@ uint64_t IntelDriver::gpu_next()
|
|||||||
auto record_b = reinterpret_cast<const drm_i915_perf_record_header *>(records[1].data.data());
|
auto record_b = reinterpret_cast<const drm_i915_perf_record_header *>(records[1].data.data());
|
||||||
|
|
||||||
intel_perf_query_result_accumulate_fields(&perf->result,
|
intel_perf_query_result_accumulate_fields(&perf->result,
|
||||||
&perf->query.value(),
|
selected_query,
|
||||||
&perf->devinfo,
|
&perf->devinfo,
|
||||||
record_a + 1,
|
record_a + 1,
|
||||||
record_b + 1,
|
record_b + 1,
|
||||||
false /* no_oa_accumulate */);
|
false /* no_oa_accumulate */);
|
||||||
|
|
||||||
// Get last timestamp
|
// Get last timestamp
|
||||||
auto gpu_timestamp = records[1].timestamp;
|
auto gpu_timestamp = records[1].timestamp;
|
||||||
|
@@ -9,6 +9,10 @@
|
|||||||
|
|
||||||
#include <pps/pps_driver.h>
|
#include <pps/pps_driver.h>
|
||||||
|
|
||||||
|
extern "C" {
|
||||||
|
struct intel_perf_query_info;
|
||||||
|
};
|
||||||
|
|
||||||
namespace pps
|
namespace pps
|
||||||
{
|
{
|
||||||
|
|
||||||
@@ -82,6 +86,9 @@ class IntelDriver : public Driver
|
|||||||
|
|
||||||
// Gpu clock ID used to correlate GPU/CPU timestamps
|
// Gpu clock ID used to correlate GPU/CPU timestamps
|
||||||
uint32_t clock_id = 0;
|
uint32_t clock_id = 0;
|
||||||
|
|
||||||
|
// Selected query
|
||||||
|
intel_perf_query_info *selected_query = nullptr;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace pps
|
} // namespace pps
|
||||||
|
@@ -36,13 +36,6 @@ IntelPerf::IntelPerf(const int drm_fd)
|
|||||||
false, // no pipeline statistics
|
false, // no pipeline statistics
|
||||||
false // no register snapshots
|
false // no register snapshots
|
||||||
);
|
);
|
||||||
|
|
||||||
// Enable RenderBasic counters
|
|
||||||
auto query_name = "RenderBasic";
|
|
||||||
query = find_query_by_name(query_name);
|
|
||||||
if (!query) {
|
|
||||||
PPS_LOG_FATAL("Failed to find %s query", query_name);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
IntelPerf::~IntelPerf()
|
IntelPerf::~IntelPerf()
|
||||||
@@ -58,20 +51,6 @@ IntelPerf::~IntelPerf()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// @return A query info, which is something like a group of counters
|
|
||||||
std::optional<struct intel_perf_query_info> IntelPerf::find_query_by_name(
|
|
||||||
const std::string &name) const
|
|
||||||
{
|
|
||||||
for (int i = 0; i < cfg->n_queries; ++i) {
|
|
||||||
struct intel_perf_query_info query = cfg->queries[i];
|
|
||||||
if (name == query.symbol_name) {
|
|
||||||
return query;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return std::nullopt;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<struct intel_perf_query_info *> IntelPerf::get_queries() const
|
std::vector<struct intel_perf_query_info *> IntelPerf::get_queries() const
|
||||||
{
|
{
|
||||||
assert(cfg && "Intel perf config should be valid");
|
assert(cfg && "Intel perf config should be valid");
|
||||||
@@ -98,7 +77,8 @@ static uint32_t get_oa_exponent(const intel_device_info *devinfo, const uint64_t
|
|||||||
return static_cast<uint32_t>(log2(sampling_period_ns * devinfo->timestamp_frequency / 1000000000ull)) - 1;
|
return static_cast<uint32_t>(log2(sampling_period_ns * devinfo->timestamp_frequency / 1000000000ull)) - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool IntelPerf::open(const uint64_t sampling_period_ns)
|
bool IntelPerf::open(const uint64_t sampling_period_ns,
|
||||||
|
struct intel_perf_query_info *query)
|
||||||
{
|
{
|
||||||
assert(!ctx && "Perf context should not be initialized at this point");
|
assert(!ctx && "Perf context should not be initialized at this point");
|
||||||
|
|
||||||
|
@@ -23,11 +23,9 @@ class IntelPerf
|
|||||||
IntelPerf(int drm_fd);
|
IntelPerf(int drm_fd);
|
||||||
~IntelPerf();
|
~IntelPerf();
|
||||||
|
|
||||||
std::optional<struct intel_perf_query_info> find_query_by_name(const std::string &name) const;
|
|
||||||
|
|
||||||
std::vector<struct intel_perf_query_info*> get_queries() const;
|
std::vector<struct intel_perf_query_info*> get_queries() const;
|
||||||
|
|
||||||
bool open(uint64_t sampling_period_ns);
|
bool open(uint64_t sampling_period_ns, struct intel_perf_query_info *query);
|
||||||
void close();
|
void close();
|
||||||
|
|
||||||
bool oa_stream_ready() const;
|
bool oa_stream_ready() const;
|
||||||
@@ -45,8 +43,6 @@ class IntelPerf
|
|||||||
struct intel_perf_query_result result = {};
|
struct intel_perf_query_result result = {};
|
||||||
|
|
||||||
struct intel_device_info devinfo = {};
|
struct intel_device_info devinfo = {};
|
||||||
|
|
||||||
std::optional<struct intel_perf_query_info> query = std::nullopt;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace pps
|
} // namespace pps
|
||||||
|
Reference in New Issue
Block a user