intel/perf: move gt_frequency to results
We want to unify things a bit between GL & Vulkan. So store those values in the results rather than just in the GL query code. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Marcin Ślusarz <marcin.slusarz@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8525>
This commit is contained in:
@@ -1088,6 +1088,35 @@ gen_perf_query_result_accumulate(struct gen_perf_query_result *result,
|
||||
|
||||
}
|
||||
|
||||
#define GET_FIELD(word, field) (((word) & field ## _MASK) >> field ## _SHIFT)
|
||||
|
||||
void
|
||||
gen_perf_query_result_read_gt_frequency(struct gen_perf_query_result *result,
|
||||
const struct gen_device_info *devinfo,
|
||||
const uint32_t start,
|
||||
const uint32_t end)
|
||||
{
|
||||
switch (devinfo->gen) {
|
||||
case 7:
|
||||
case 8:
|
||||
result->gt_frequency[0] = GET_FIELD(start, GEN7_RPSTAT1_CURR_GT_FREQ) * 50ULL;
|
||||
result->gt_frequency[1] = GET_FIELD(end, GEN7_RPSTAT1_CURR_GT_FREQ) * 50ULL;
|
||||
break;
|
||||
case 9:
|
||||
case 11:
|
||||
case 12:
|
||||
result->gt_frequency[0] = GET_FIELD(start, GEN9_RPSTAT0_CURR_GT_FREQ) * 50ULL / 3ULL;
|
||||
result->gt_frequency[1] = GET_FIELD(end, GEN9_RPSTAT0_CURR_GT_FREQ) * 50ULL / 3ULL;
|
||||
break;
|
||||
default:
|
||||
unreachable("unexpected gen");
|
||||
}
|
||||
|
||||
/* Put the numbers into Hz. */
|
||||
result->gt_frequency[0] *= 1000000ULL;
|
||||
result->gt_frequency[1] *= 1000000ULL;
|
||||
}
|
||||
|
||||
void
|
||||
gen_perf_query_result_clear(struct gen_perf_query_result *result)
|
||||
{
|
||||
|
@@ -150,6 +150,11 @@ struct gen_perf_query_result {
|
||||
*/
|
||||
uint64_t unslice_frequency[2];
|
||||
|
||||
/**
|
||||
* Frequency of the whole GT at the begin and end of the query.
|
||||
*/
|
||||
uint64_t gt_frequency[2];
|
||||
|
||||
/**
|
||||
* Timestamp of the query.
|
||||
*/
|
||||
@@ -357,6 +362,14 @@ void gen_perf_query_result_read_frequencies(struct gen_perf_query_result *result
|
||||
const struct gen_device_info *devinfo,
|
||||
const uint32_t *start,
|
||||
const uint32_t *end);
|
||||
|
||||
/** Store the GT frequency as reported by the RPSTAT register.
|
||||
*/
|
||||
void gen_perf_query_result_read_gt_frequency(struct gen_perf_query_result *result,
|
||||
const struct gen_device_info *devinfo,
|
||||
const uint32_t start,
|
||||
const uint32_t end);
|
||||
|
||||
/** Accumulate the delta between 2 OA reports into result for a given query.
|
||||
*/
|
||||
void gen_perf_query_result_accumulate(struct gen_perf_query_result *result,
|
||||
|
@@ -34,8 +34,8 @@
|
||||
int
|
||||
gen_perf_query_result_write_mdapi(void *data, uint32_t data_size,
|
||||
const struct gen_device_info *devinfo,
|
||||
const struct gen_perf_query_result *result,
|
||||
uint64_t freq_start, uint64_t freq_end)
|
||||
const struct gen_perf_query_info *query,
|
||||
const struct gen_perf_query_result *result)
|
||||
{
|
||||
switch (devinfo->gen) {
|
||||
case 7: {
|
||||
@@ -57,8 +57,8 @@ gen_perf_query_result_write_mdapi(void *data, uint32_t data_size,
|
||||
mdapi_data->ReportsCount = result->reports_accumulated;
|
||||
mdapi_data->TotalTime =
|
||||
gen_device_info_timebase_scale(devinfo, result->accumulator[0]);
|
||||
mdapi_data->CoreFrequency = freq_end;
|
||||
mdapi_data->CoreFrequencyChanged = freq_end != freq_start;
|
||||
mdapi_data->CoreFrequency = result->gt_frequency[1];
|
||||
mdapi_data->CoreFrequencyChanged = result->gt_frequency[1] != result->gt_frequency[0];
|
||||
mdapi_data->SplitOccured = result->query_disjoint;
|
||||
return sizeof(*mdapi_data);
|
||||
}
|
||||
@@ -82,8 +82,8 @@ gen_perf_query_result_write_mdapi(void *data, uint32_t data_size,
|
||||
mdapi_data->BeginTimestamp =
|
||||
gen_device_info_timebase_scale(devinfo, result->begin_timestamp);
|
||||
mdapi_data->GPUTicks = result->accumulator[1];
|
||||
mdapi_data->CoreFrequency = freq_end;
|
||||
mdapi_data->CoreFrequencyChanged = freq_end != freq_start;
|
||||
mdapi_data->CoreFrequency = result->gt_frequency[1];
|
||||
mdapi_data->CoreFrequencyChanged = result->gt_frequency[1] != result->gt_frequency[0];
|
||||
mdapi_data->SliceFrequency =
|
||||
(result->slice_frequency[0] + result->slice_frequency[1]) / 2ULL;
|
||||
mdapi_data->UnsliceFrequency =
|
||||
@@ -113,8 +113,8 @@ gen_perf_query_result_write_mdapi(void *data, uint32_t data_size,
|
||||
mdapi_data->BeginTimestamp =
|
||||
gen_device_info_timebase_scale(devinfo, result->begin_timestamp);
|
||||
mdapi_data->GPUTicks = result->accumulator[1];
|
||||
mdapi_data->CoreFrequency = freq_end;
|
||||
mdapi_data->CoreFrequencyChanged = freq_end != freq_start;
|
||||
mdapi_data->CoreFrequency = result->gt_frequency[1];
|
||||
mdapi_data->CoreFrequencyChanged = result->gt_frequency[1] != result->gt_frequency[0];
|
||||
mdapi_data->SliceFrequency =
|
||||
(result->slice_frequency[0] + result->slice_frequency[1]) / 2ULL;
|
||||
mdapi_data->UnsliceFrequency =
|
||||
|
@@ -129,8 +129,8 @@ struct mdapi_pipeline_metrics {
|
||||
|
||||
int gen_perf_query_result_write_mdapi(void *data, uint32_t data_size,
|
||||
const struct gen_device_info *devinfo,
|
||||
const struct gen_perf_query_result *result,
|
||||
uint64_t freq_start, uint64_t freq_end);
|
||||
const struct gen_perf_query_info *query,
|
||||
const struct gen_perf_query_result *result);
|
||||
|
||||
static inline void gen_perf_query_mdapi_write_perfcntr(void *data, uint32_t data_size,
|
||||
const struct gen_device_info *devinfo,
|
||||
|
@@ -218,11 +218,6 @@ struct gen_perf_query_object
|
||||
*/
|
||||
bool results_accumulated;
|
||||
|
||||
/**
|
||||
* Frequency of the GT at begin and end of the query.
|
||||
*/
|
||||
uint64_t gt_frequency[2];
|
||||
|
||||
/**
|
||||
* Accumulated OA results between begin and end of the query.
|
||||
*/
|
||||
@@ -1405,37 +1400,6 @@ gen_perf_delete_query(struct gen_perf_context *perf_ctx,
|
||||
free(query);
|
||||
}
|
||||
|
||||
#define GET_FIELD(word, field) (((word) & field ## _MASK) >> field ## _SHIFT)
|
||||
|
||||
static void
|
||||
read_gt_frequency(struct gen_perf_context *perf_ctx,
|
||||
struct gen_perf_query_object *obj)
|
||||
{
|
||||
const struct gen_device_info *devinfo = perf_ctx->devinfo;
|
||||
uint32_t start = *((uint32_t *)(obj->oa.map + MI_FREQ_START_OFFSET_BYTES)),
|
||||
end = *((uint32_t *)(obj->oa.map + MI_FREQ_END_OFFSET_BYTES));
|
||||
|
||||
switch (devinfo->gen) {
|
||||
case 7:
|
||||
case 8:
|
||||
obj->oa.gt_frequency[0] = GET_FIELD(start, GEN7_RPSTAT1_CURR_GT_FREQ) * 50ULL;
|
||||
obj->oa.gt_frequency[1] = GET_FIELD(end, GEN7_RPSTAT1_CURR_GT_FREQ) * 50ULL;
|
||||
break;
|
||||
case 9:
|
||||
case 11:
|
||||
case 12:
|
||||
obj->oa.gt_frequency[0] = GET_FIELD(start, GEN9_RPSTAT0_CURR_GT_FREQ) * 50ULL / 3ULL;
|
||||
obj->oa.gt_frequency[1] = GET_FIELD(end, GEN9_RPSTAT0_CURR_GT_FREQ) * 50ULL / 3ULL;
|
||||
break;
|
||||
default:
|
||||
unreachable("unexpected gen");
|
||||
}
|
||||
|
||||
/* Put the numbers into Hz. */
|
||||
obj->oa.gt_frequency[0] *= 1000000ULL;
|
||||
obj->oa.gt_frequency[1] *= 1000000ULL;
|
||||
}
|
||||
|
||||
static int
|
||||
get_oa_counter_data(struct gen_perf_context *perf_ctx,
|
||||
struct gen_perf_query_object *query,
|
||||
@@ -1540,7 +1504,6 @@ gen_perf_get_query_data(struct gen_perf_context *perf_ctx,
|
||||
while (!read_oa_samples_for_query(perf_ctx, query, current_batch))
|
||||
;
|
||||
|
||||
read_gt_frequency(perf_ctx, query);
|
||||
uint32_t *begin_report = query->oa.map;
|
||||
uint32_t *end_report = query->oa.map + MI_RPC_BO_END_OFFSET_BYTES;
|
||||
gen_perf_query_result_read_frequencies(&query->oa.result,
|
||||
@@ -1559,9 +1522,8 @@ gen_perf_get_query_data(struct gen_perf_context *perf_ctx,
|
||||
const struct gen_device_info *devinfo = perf_ctx->devinfo;
|
||||
|
||||
written = gen_perf_query_result_write_mdapi((uint8_t *)data, data_size,
|
||||
devinfo, &query->oa.result,
|
||||
query->oa.gt_frequency[0],
|
||||
query->oa.gt_frequency[1]);
|
||||
devinfo, query->queryinfo,
|
||||
&query->oa.result);
|
||||
}
|
||||
break;
|
||||
|
||||
|
@@ -528,29 +528,22 @@ VkResult genX(GetQueryPoolResults)(
|
||||
case VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL: {
|
||||
if (!write_results)
|
||||
break;
|
||||
const struct gen_perf_query_info *query = &device->physical->perf->queries[0];
|
||||
const void *query_data = query_slot(pool, firstQuery + i);
|
||||
const uint32_t *oa_begin = query_data + intel_perf_mi_rpc_offset(false);
|
||||
const uint32_t *oa_end = query_data + intel_perf_mi_rpc_offset(true);
|
||||
const uint32_t *rpstat_begin = query_data + intel_perf_rpstart_offset(false);
|
||||
const uint32_t *rpstat_end = query_data + intel_perf_mi_rpc_offset(true);
|
||||
struct gen_perf_query_result result;
|
||||
uint32_t core_freq[2];
|
||||
#if GEN_GEN < 9
|
||||
core_freq[0] = ((*rpstat_begin >> 7) & 0x7f) * 1000000ULL;
|
||||
core_freq[1] = ((*rpstat_end >> 7) & 0x7f) * 1000000ULL;
|
||||
#else
|
||||
core_freq[0] = ((*rpstat_begin >> 23) & 0x1ff) * 1000000ULL;
|
||||
core_freq[1] = ((*rpstat_end >> 23) & 0x1ff) * 1000000ULL;
|
||||
#endif
|
||||
gen_perf_query_result_clear(&result);
|
||||
gen_perf_query_result_accumulate(&result, &device->physical->perf->queries[0],
|
||||
oa_begin, oa_end);
|
||||
gen_perf_query_result_accumulate(&result, query, oa_begin, oa_end);
|
||||
gen_perf_query_result_read_frequencies(&result, &device->info,
|
||||
oa_begin, oa_end);
|
||||
gen_perf_query_result_read_gt_frequency(&result, &device->info,
|
||||
*rpstat_begin, *rpstat_end);
|
||||
gen_perf_query_result_write_mdapi(pData, stride,
|
||||
&device->info,
|
||||
&result,
|
||||
core_freq[0], core_freq[1]);
|
||||
query, &result);
|
||||
#if GEN_GEN >= 8 && GEN_GEN <= 11
|
||||
gen_perf_query_mdapi_write_perfcntr(pData, stride, &device->info,
|
||||
query_data + intel_perf_counter(false),
|
||||
|
Reference in New Issue
Block a user