diff --git a/src/intel/perf/gen_perf.c b/src/intel/perf/gen_perf.c index 0b88fb2c594..4530bb02e33 100644 --- a/src/intel/perf/gen_perf.c +++ b/src/intel/perf/gen_perf.c @@ -1088,6 +1088,35 @@ gen_perf_query_result_accumulate(struct gen_perf_query_result *result, } +#define GET_FIELD(word, field) (((word) & field ## _MASK) >> field ## _SHIFT) + +void +gen_perf_query_result_read_gt_frequency(struct gen_perf_query_result *result, + const struct gen_device_info *devinfo, + const uint32_t start, + const uint32_t end) +{ + switch (devinfo->gen) { + case 7: + case 8: + result->gt_frequency[0] = GET_FIELD(start, GEN7_RPSTAT1_CURR_GT_FREQ) * 50ULL; + result->gt_frequency[1] = GET_FIELD(end, GEN7_RPSTAT1_CURR_GT_FREQ) * 50ULL; + break; + case 9: + case 11: + case 12: + result->gt_frequency[0] = GET_FIELD(start, GEN9_RPSTAT0_CURR_GT_FREQ) * 50ULL / 3ULL; + result->gt_frequency[1] = GET_FIELD(end, GEN9_RPSTAT0_CURR_GT_FREQ) * 50ULL / 3ULL; + break; + default: + unreachable("unexpected gen"); + } + + /* Put the numbers into Hz. */ + result->gt_frequency[0] *= 1000000ULL; + result->gt_frequency[1] *= 1000000ULL; +} + void gen_perf_query_result_clear(struct gen_perf_query_result *result) { diff --git a/src/intel/perf/gen_perf.h b/src/intel/perf/gen_perf.h index c91f9eeb55c..4348c731109 100644 --- a/src/intel/perf/gen_perf.h +++ b/src/intel/perf/gen_perf.h @@ -150,6 +150,11 @@ struct gen_perf_query_result { */ uint64_t unslice_frequency[2]; + /** + * Frequency of the whole GT at the begin and end of the query. + */ + uint64_t gt_frequency[2]; + /** * Timestamp of the query. */ @@ -357,6 +362,14 @@ void gen_perf_query_result_read_frequencies(struct gen_perf_query_result *result const struct gen_device_info *devinfo, const uint32_t *start, const uint32_t *end); + +/** Store the GT frequency as reported by the RPSTAT register. + */ +void gen_perf_query_result_read_gt_frequency(struct gen_perf_query_result *result, + const struct gen_device_info *devinfo, + const uint32_t start, + const uint32_t end); + /** Accumulate the delta between 2 OA reports into result for a given query. */ void gen_perf_query_result_accumulate(struct gen_perf_query_result *result, diff --git a/src/intel/perf/gen_perf_mdapi.c b/src/intel/perf/gen_perf_mdapi.c index 19f1be28bbc..2452b99f59f 100644 --- a/src/intel/perf/gen_perf_mdapi.c +++ b/src/intel/perf/gen_perf_mdapi.c @@ -34,8 +34,8 @@ int gen_perf_query_result_write_mdapi(void *data, uint32_t data_size, const struct gen_device_info *devinfo, - const struct gen_perf_query_result *result, - uint64_t freq_start, uint64_t freq_end) + const struct gen_perf_query_info *query, + const struct gen_perf_query_result *result) { switch (devinfo->gen) { case 7: { @@ -57,8 +57,8 @@ gen_perf_query_result_write_mdapi(void *data, uint32_t data_size, mdapi_data->ReportsCount = result->reports_accumulated; mdapi_data->TotalTime = gen_device_info_timebase_scale(devinfo, result->accumulator[0]); - mdapi_data->CoreFrequency = freq_end; - mdapi_data->CoreFrequencyChanged = freq_end != freq_start; + mdapi_data->CoreFrequency = result->gt_frequency[1]; + mdapi_data->CoreFrequencyChanged = result->gt_frequency[1] != result->gt_frequency[0]; mdapi_data->SplitOccured = result->query_disjoint; return sizeof(*mdapi_data); } @@ -82,8 +82,8 @@ gen_perf_query_result_write_mdapi(void *data, uint32_t data_size, mdapi_data->BeginTimestamp = gen_device_info_timebase_scale(devinfo, result->begin_timestamp); mdapi_data->GPUTicks = result->accumulator[1]; - mdapi_data->CoreFrequency = freq_end; - mdapi_data->CoreFrequencyChanged = freq_end != freq_start; + mdapi_data->CoreFrequency = result->gt_frequency[1]; + mdapi_data->CoreFrequencyChanged = result->gt_frequency[1] != result->gt_frequency[0]; mdapi_data->SliceFrequency = (result->slice_frequency[0] + result->slice_frequency[1]) / 2ULL; mdapi_data->UnsliceFrequency = @@ -113,8 +113,8 @@ gen_perf_query_result_write_mdapi(void *data, uint32_t data_size, mdapi_data->BeginTimestamp = gen_device_info_timebase_scale(devinfo, result->begin_timestamp); mdapi_data->GPUTicks = result->accumulator[1]; - mdapi_data->CoreFrequency = freq_end; - mdapi_data->CoreFrequencyChanged = freq_end != freq_start; + mdapi_data->CoreFrequency = result->gt_frequency[1]; + mdapi_data->CoreFrequencyChanged = result->gt_frequency[1] != result->gt_frequency[0]; mdapi_data->SliceFrequency = (result->slice_frequency[0] + result->slice_frequency[1]) / 2ULL; mdapi_data->UnsliceFrequency = diff --git a/src/intel/perf/gen_perf_mdapi.h b/src/intel/perf/gen_perf_mdapi.h index 8be8d2033ac..acf1edd6e79 100644 --- a/src/intel/perf/gen_perf_mdapi.h +++ b/src/intel/perf/gen_perf_mdapi.h @@ -129,8 +129,8 @@ struct mdapi_pipeline_metrics { int gen_perf_query_result_write_mdapi(void *data, uint32_t data_size, const struct gen_device_info *devinfo, - const struct gen_perf_query_result *result, - uint64_t freq_start, uint64_t freq_end); + const struct gen_perf_query_info *query, + const struct gen_perf_query_result *result); static inline void gen_perf_query_mdapi_write_perfcntr(void *data, uint32_t data_size, const struct gen_device_info *devinfo, diff --git a/src/intel/perf/gen_perf_query.c b/src/intel/perf/gen_perf_query.c index 288f261f55a..e6d38b6bb72 100644 --- a/src/intel/perf/gen_perf_query.c +++ b/src/intel/perf/gen_perf_query.c @@ -218,11 +218,6 @@ struct gen_perf_query_object */ bool results_accumulated; - /** - * Frequency of the GT at begin and end of the query. - */ - uint64_t gt_frequency[2]; - /** * Accumulated OA results between begin and end of the query. */ @@ -1405,37 +1400,6 @@ gen_perf_delete_query(struct gen_perf_context *perf_ctx, free(query); } -#define GET_FIELD(word, field) (((word) & field ## _MASK) >> field ## _SHIFT) - -static void -read_gt_frequency(struct gen_perf_context *perf_ctx, - struct gen_perf_query_object *obj) -{ - const struct gen_device_info *devinfo = perf_ctx->devinfo; - uint32_t start = *((uint32_t *)(obj->oa.map + MI_FREQ_START_OFFSET_BYTES)), - end = *((uint32_t *)(obj->oa.map + MI_FREQ_END_OFFSET_BYTES)); - - switch (devinfo->gen) { - case 7: - case 8: - obj->oa.gt_frequency[0] = GET_FIELD(start, GEN7_RPSTAT1_CURR_GT_FREQ) * 50ULL; - obj->oa.gt_frequency[1] = GET_FIELD(end, GEN7_RPSTAT1_CURR_GT_FREQ) * 50ULL; - break; - case 9: - case 11: - case 12: - obj->oa.gt_frequency[0] = GET_FIELD(start, GEN9_RPSTAT0_CURR_GT_FREQ) * 50ULL / 3ULL; - obj->oa.gt_frequency[1] = GET_FIELD(end, GEN9_RPSTAT0_CURR_GT_FREQ) * 50ULL / 3ULL; - break; - default: - unreachable("unexpected gen"); - } - - /* Put the numbers into Hz. */ - obj->oa.gt_frequency[0] *= 1000000ULL; - obj->oa.gt_frequency[1] *= 1000000ULL; -} - static int get_oa_counter_data(struct gen_perf_context *perf_ctx, struct gen_perf_query_object *query, @@ -1540,7 +1504,6 @@ gen_perf_get_query_data(struct gen_perf_context *perf_ctx, while (!read_oa_samples_for_query(perf_ctx, query, current_batch)) ; - read_gt_frequency(perf_ctx, query); uint32_t *begin_report = query->oa.map; uint32_t *end_report = query->oa.map + MI_RPC_BO_END_OFFSET_BYTES; gen_perf_query_result_read_frequencies(&query->oa.result, @@ -1559,9 +1522,8 @@ gen_perf_get_query_data(struct gen_perf_context *perf_ctx, const struct gen_device_info *devinfo = perf_ctx->devinfo; written = gen_perf_query_result_write_mdapi((uint8_t *)data, data_size, - devinfo, &query->oa.result, - query->oa.gt_frequency[0], - query->oa.gt_frequency[1]); + devinfo, query->queryinfo, + &query->oa.result); } break; diff --git a/src/intel/vulkan/genX_query.c b/src/intel/vulkan/genX_query.c index 3fd662cc062..5994488960d 100644 --- a/src/intel/vulkan/genX_query.c +++ b/src/intel/vulkan/genX_query.c @@ -528,29 +528,22 @@ VkResult genX(GetQueryPoolResults)( case VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL: { if (!write_results) break; + const struct gen_perf_query_info *query = &device->physical->perf->queries[0]; const void *query_data = query_slot(pool, firstQuery + i); const uint32_t *oa_begin = query_data + intel_perf_mi_rpc_offset(false); const uint32_t *oa_end = query_data + intel_perf_mi_rpc_offset(true); const uint32_t *rpstat_begin = query_data + intel_perf_rpstart_offset(false); const uint32_t *rpstat_end = query_data + intel_perf_mi_rpc_offset(true); struct gen_perf_query_result result; - uint32_t core_freq[2]; -#if GEN_GEN < 9 - core_freq[0] = ((*rpstat_begin >> 7) & 0x7f) * 1000000ULL; - core_freq[1] = ((*rpstat_end >> 7) & 0x7f) * 1000000ULL; -#else - core_freq[0] = ((*rpstat_begin >> 23) & 0x1ff) * 1000000ULL; - core_freq[1] = ((*rpstat_end >> 23) & 0x1ff) * 1000000ULL; -#endif gen_perf_query_result_clear(&result); - gen_perf_query_result_accumulate(&result, &device->physical->perf->queries[0], - oa_begin, oa_end); + gen_perf_query_result_accumulate(&result, query, oa_begin, oa_end); gen_perf_query_result_read_frequencies(&result, &device->info, oa_begin, oa_end); + gen_perf_query_result_read_gt_frequency(&result, &device->info, + *rpstat_begin, *rpstat_end); gen_perf_query_result_write_mdapi(pData, stride, &device->info, - &result, - core_freq[0], core_freq[1]); + query, &result); #if GEN_GEN >= 8 && GEN_GEN <= 11 gen_perf_query_mdapi_write_perfcntr(pData, stride, &device->info, query_data + intel_perf_counter(false),