intel/perf: move gt_frequency to results

We want to unify things a bit between GL & Vulkan. So store those
values in the results rather than just in the GL query code.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Marcin Ślusarz <marcin.slusarz@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8525>
This commit is contained in:
Lionel Landwerlin
2020-09-08 14:33:12 +03:00
parent b7032d6776
commit 9a54aa131e
6 changed files with 59 additions and 62 deletions

View File

@@ -1088,6 +1088,35 @@ gen_perf_query_result_accumulate(struct gen_perf_query_result *result,
}
#define GET_FIELD(word, field) (((word) & field ## _MASK) >> field ## _SHIFT)
void
gen_perf_query_result_read_gt_frequency(struct gen_perf_query_result *result,
const struct gen_device_info *devinfo,
const uint32_t start,
const uint32_t end)
{
switch (devinfo->gen) {
case 7:
case 8:
result->gt_frequency[0] = GET_FIELD(start, GEN7_RPSTAT1_CURR_GT_FREQ) * 50ULL;
result->gt_frequency[1] = GET_FIELD(end, GEN7_RPSTAT1_CURR_GT_FREQ) * 50ULL;
break;
case 9:
case 11:
case 12:
result->gt_frequency[0] = GET_FIELD(start, GEN9_RPSTAT0_CURR_GT_FREQ) * 50ULL / 3ULL;
result->gt_frequency[1] = GET_FIELD(end, GEN9_RPSTAT0_CURR_GT_FREQ) * 50ULL / 3ULL;
break;
default:
unreachable("unexpected gen");
}
/* Put the numbers into Hz. */
result->gt_frequency[0] *= 1000000ULL;
result->gt_frequency[1] *= 1000000ULL;
}
void
gen_perf_query_result_clear(struct gen_perf_query_result *result)
{

View File

@@ -150,6 +150,11 @@ struct gen_perf_query_result {
*/
uint64_t unslice_frequency[2];
/**
* Frequency of the whole GT at the begin and end of the query.
*/
uint64_t gt_frequency[2];
/**
* Timestamp of the query.
*/
@@ -357,6 +362,14 @@ void gen_perf_query_result_read_frequencies(struct gen_perf_query_result *result
const struct gen_device_info *devinfo,
const uint32_t *start,
const uint32_t *end);
/** Store the GT frequency as reported by the RPSTAT register.
*/
void gen_perf_query_result_read_gt_frequency(struct gen_perf_query_result *result,
const struct gen_device_info *devinfo,
const uint32_t start,
const uint32_t end);
/** Accumulate the delta between 2 OA reports into result for a given query.
*/
void gen_perf_query_result_accumulate(struct gen_perf_query_result *result,

View File

@@ -34,8 +34,8 @@
int
gen_perf_query_result_write_mdapi(void *data, uint32_t data_size,
const struct gen_device_info *devinfo,
const struct gen_perf_query_result *result,
uint64_t freq_start, uint64_t freq_end)
const struct gen_perf_query_info *query,
const struct gen_perf_query_result *result)
{
switch (devinfo->gen) {
case 7: {
@@ -57,8 +57,8 @@ gen_perf_query_result_write_mdapi(void *data, uint32_t data_size,
mdapi_data->ReportsCount = result->reports_accumulated;
mdapi_data->TotalTime =
gen_device_info_timebase_scale(devinfo, result->accumulator[0]);
mdapi_data->CoreFrequency = freq_end;
mdapi_data->CoreFrequencyChanged = freq_end != freq_start;
mdapi_data->CoreFrequency = result->gt_frequency[1];
mdapi_data->CoreFrequencyChanged = result->gt_frequency[1] != result->gt_frequency[0];
mdapi_data->SplitOccured = result->query_disjoint;
return sizeof(*mdapi_data);
}
@@ -82,8 +82,8 @@ gen_perf_query_result_write_mdapi(void *data, uint32_t data_size,
mdapi_data->BeginTimestamp =
gen_device_info_timebase_scale(devinfo, result->begin_timestamp);
mdapi_data->GPUTicks = result->accumulator[1];
mdapi_data->CoreFrequency = freq_end;
mdapi_data->CoreFrequencyChanged = freq_end != freq_start;
mdapi_data->CoreFrequency = result->gt_frequency[1];
mdapi_data->CoreFrequencyChanged = result->gt_frequency[1] != result->gt_frequency[0];
mdapi_data->SliceFrequency =
(result->slice_frequency[0] + result->slice_frequency[1]) / 2ULL;
mdapi_data->UnsliceFrequency =
@@ -113,8 +113,8 @@ gen_perf_query_result_write_mdapi(void *data, uint32_t data_size,
mdapi_data->BeginTimestamp =
gen_device_info_timebase_scale(devinfo, result->begin_timestamp);
mdapi_data->GPUTicks = result->accumulator[1];
mdapi_data->CoreFrequency = freq_end;
mdapi_data->CoreFrequencyChanged = freq_end != freq_start;
mdapi_data->CoreFrequency = result->gt_frequency[1];
mdapi_data->CoreFrequencyChanged = result->gt_frequency[1] != result->gt_frequency[0];
mdapi_data->SliceFrequency =
(result->slice_frequency[0] + result->slice_frequency[1]) / 2ULL;
mdapi_data->UnsliceFrequency =

View File

@@ -129,8 +129,8 @@ struct mdapi_pipeline_metrics {
int gen_perf_query_result_write_mdapi(void *data, uint32_t data_size,
const struct gen_device_info *devinfo,
const struct gen_perf_query_result *result,
uint64_t freq_start, uint64_t freq_end);
const struct gen_perf_query_info *query,
const struct gen_perf_query_result *result);
static inline void gen_perf_query_mdapi_write_perfcntr(void *data, uint32_t data_size,
const struct gen_device_info *devinfo,

View File

@@ -218,11 +218,6 @@ struct gen_perf_query_object
*/
bool results_accumulated;
/**
* Frequency of the GT at begin and end of the query.
*/
uint64_t gt_frequency[2];
/**
* Accumulated OA results between begin and end of the query.
*/
@@ -1405,37 +1400,6 @@ gen_perf_delete_query(struct gen_perf_context *perf_ctx,
free(query);
}
#define GET_FIELD(word, field) (((word) & field ## _MASK) >> field ## _SHIFT)
static void
read_gt_frequency(struct gen_perf_context *perf_ctx,
struct gen_perf_query_object *obj)
{
const struct gen_device_info *devinfo = perf_ctx->devinfo;
uint32_t start = *((uint32_t *)(obj->oa.map + MI_FREQ_START_OFFSET_BYTES)),
end = *((uint32_t *)(obj->oa.map + MI_FREQ_END_OFFSET_BYTES));
switch (devinfo->gen) {
case 7:
case 8:
obj->oa.gt_frequency[0] = GET_FIELD(start, GEN7_RPSTAT1_CURR_GT_FREQ) * 50ULL;
obj->oa.gt_frequency[1] = GET_FIELD(end, GEN7_RPSTAT1_CURR_GT_FREQ) * 50ULL;
break;
case 9:
case 11:
case 12:
obj->oa.gt_frequency[0] = GET_FIELD(start, GEN9_RPSTAT0_CURR_GT_FREQ) * 50ULL / 3ULL;
obj->oa.gt_frequency[1] = GET_FIELD(end, GEN9_RPSTAT0_CURR_GT_FREQ) * 50ULL / 3ULL;
break;
default:
unreachable("unexpected gen");
}
/* Put the numbers into Hz. */
obj->oa.gt_frequency[0] *= 1000000ULL;
obj->oa.gt_frequency[1] *= 1000000ULL;
}
static int
get_oa_counter_data(struct gen_perf_context *perf_ctx,
struct gen_perf_query_object *query,
@@ -1540,7 +1504,6 @@ gen_perf_get_query_data(struct gen_perf_context *perf_ctx,
while (!read_oa_samples_for_query(perf_ctx, query, current_batch))
;
read_gt_frequency(perf_ctx, query);
uint32_t *begin_report = query->oa.map;
uint32_t *end_report = query->oa.map + MI_RPC_BO_END_OFFSET_BYTES;
gen_perf_query_result_read_frequencies(&query->oa.result,
@@ -1559,9 +1522,8 @@ gen_perf_get_query_data(struct gen_perf_context *perf_ctx,
const struct gen_device_info *devinfo = perf_ctx->devinfo;
written = gen_perf_query_result_write_mdapi((uint8_t *)data, data_size,
devinfo, &query->oa.result,
query->oa.gt_frequency[0],
query->oa.gt_frequency[1]);
devinfo, query->queryinfo,
&query->oa.result);
}
break;

View File

@@ -528,29 +528,22 @@ VkResult genX(GetQueryPoolResults)(
case VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL: {
if (!write_results)
break;
const struct gen_perf_query_info *query = &device->physical->perf->queries[0];
const void *query_data = query_slot(pool, firstQuery + i);
const uint32_t *oa_begin = query_data + intel_perf_mi_rpc_offset(false);
const uint32_t *oa_end = query_data + intel_perf_mi_rpc_offset(true);
const uint32_t *rpstat_begin = query_data + intel_perf_rpstart_offset(false);
const uint32_t *rpstat_end = query_data + intel_perf_mi_rpc_offset(true);
struct gen_perf_query_result result;
uint32_t core_freq[2];
#if GEN_GEN < 9
core_freq[0] = ((*rpstat_begin >> 7) & 0x7f) * 1000000ULL;
core_freq[1] = ((*rpstat_end >> 7) & 0x7f) * 1000000ULL;
#else
core_freq[0] = ((*rpstat_begin >> 23) & 0x1ff) * 1000000ULL;
core_freq[1] = ((*rpstat_end >> 23) & 0x1ff) * 1000000ULL;
#endif
gen_perf_query_result_clear(&result);
gen_perf_query_result_accumulate(&result, &device->physical->perf->queries[0],
oa_begin, oa_end);
gen_perf_query_result_accumulate(&result, query, oa_begin, oa_end);
gen_perf_query_result_read_frequencies(&result, &device->info,
oa_begin, oa_end);
gen_perf_query_result_read_gt_frequency(&result, &device->info,
*rpstat_begin, *rpstat_end);
gen_perf_query_result_write_mdapi(pData, stride,
&device->info,
&result,
core_freq[0], core_freq[1]);
query, &result);
#if GEN_GEN >= 8 && GEN_GEN <= 11
gen_perf_query_mdapi_write_perfcntr(pData, stride, &device->info,
query_data + intel_perf_counter(false),