anv: implement u_trace support
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Rohan Garg <rohan.garg@intel.com> Acked-by: Antonio Caggiano <antonio.caggiano@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13996>
This commit is contained in:

committed by
Marge Bot

parent
bb541d1159
commit
cc5843a573
@@ -37,6 +37,7 @@
|
||||
#include "perf/intel_perf.h"
|
||||
|
||||
#include "util/debug.h"
|
||||
#include "util/perf/u_trace.h"
|
||||
|
||||
/** \file anv_batch_chain.c
|
||||
*
|
||||
@@ -1956,6 +1957,94 @@ setup_empty_execbuf(struct anv_execbuf *execbuf, struct anv_queue *queue)
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
setup_utrace_execbuf(struct anv_execbuf *execbuf, struct anv_queue *queue,
|
||||
struct anv_utrace_flush_copy *flush)
|
||||
{
|
||||
struct anv_device *device = queue->device;
|
||||
VkResult result = anv_execbuf_add_bo(device, execbuf,
|
||||
flush->batch_bo,
|
||||
&flush->relocs, 0);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
result = anv_execbuf_add_sync(device, execbuf, flush->sync,
|
||||
true /* is_signal */, 0 /* value */);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
if (flush->batch_bo->exec_obj_index != execbuf->bo_count - 1) {
|
||||
uint32_t idx = flush->batch_bo->exec_obj_index;
|
||||
uint32_t last_idx = execbuf->bo_count - 1;
|
||||
|
||||
struct drm_i915_gem_exec_object2 tmp_obj = execbuf->objects[idx];
|
||||
assert(execbuf->bos[idx] == flush->batch_bo);
|
||||
|
||||
execbuf->objects[idx] = execbuf->objects[last_idx];
|
||||
execbuf->bos[idx] = execbuf->bos[last_idx];
|
||||
execbuf->bos[idx]->exec_obj_index = idx;
|
||||
|
||||
execbuf->objects[last_idx] = tmp_obj;
|
||||
execbuf->bos[last_idx] = flush->batch_bo;
|
||||
flush->batch_bo->exec_obj_index = last_idx;
|
||||
}
|
||||
|
||||
if (!device->info.has_llc) {
|
||||
__builtin_ia32_mfence();
|
||||
for (uint32_t i = 0; i < flush->batch_bo->size; i += CACHELINE_SIZE)
|
||||
__builtin_ia32_clflush(flush->batch_bo->map);
|
||||
}
|
||||
|
||||
execbuf->execbuf = (struct drm_i915_gem_execbuffer2) {
|
||||
.buffers_ptr = (uintptr_t) execbuf->objects,
|
||||
.buffer_count = execbuf->bo_count,
|
||||
.batch_start_offset = 0,
|
||||
.batch_len = flush->batch.next - flush->batch.start,
|
||||
.flags = I915_EXEC_HANDLE_LUT | I915_EXEC_FENCE_ARRAY | queue->exec_flags |
|
||||
(execbuf->has_relocs ? 0 : I915_EXEC_NO_RELOC),
|
||||
.rsvd1 = device->context_id,
|
||||
.rsvd2 = 0,
|
||||
.num_cliprects = execbuf->syncobj_count,
|
||||
.cliprects_ptr = (uintptr_t)execbuf->syncobjs,
|
||||
};
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
anv_queue_exec_utrace_locked(struct anv_queue *queue,
|
||||
struct anv_utrace_flush_copy *flush)
|
||||
{
|
||||
assert(flush->batch_bo);
|
||||
|
||||
struct anv_device *device = queue->device;
|
||||
struct anv_execbuf execbuf;
|
||||
anv_execbuf_init(&execbuf);
|
||||
execbuf.alloc = &device->vk.alloc;
|
||||
execbuf.alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE;
|
||||
|
||||
VkResult result = setup_utrace_execbuf(&execbuf, queue, flush);
|
||||
if (result != VK_SUCCESS)
|
||||
goto error;
|
||||
|
||||
int ret = queue->device->info.no_hw ? 0 :
|
||||
anv_gem_execbuffer(queue->device, &execbuf.execbuf);
|
||||
if (ret)
|
||||
result = vk_queue_set_lost(&queue->vk, "execbuf2 failed: %m");
|
||||
|
||||
struct drm_i915_gem_exec_object2 *objects = execbuf.objects;
|
||||
for (uint32_t k = 0; k < execbuf.bo_count; k++) {
|
||||
if (anv_bo_is_pinned(execbuf.bos[k]))
|
||||
assert(execbuf.bos[k]->offset == objects[k].offset);
|
||||
execbuf.bos[k]->offset = objects[k].offset;
|
||||
}
|
||||
|
||||
error:
|
||||
anv_execbuf_finish(&execbuf);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/* We lock around execbuf for three main reasons:
|
||||
*
|
||||
* 1) When a block pool is resized, we create a new gem handle with a
|
||||
@@ -1992,16 +2081,37 @@ anv_queue_exec_locked(struct anv_queue *queue,
|
||||
uint32_t perf_query_pass)
|
||||
{
|
||||
struct anv_device *device = queue->device;
|
||||
struct anv_utrace_flush_copy *utrace_flush_data = NULL;
|
||||
struct anv_execbuf execbuf;
|
||||
anv_execbuf_init(&execbuf);
|
||||
execbuf.alloc = &queue->device->vk.alloc;
|
||||
execbuf.alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE;
|
||||
execbuf.perf_query_pass = perf_query_pass;
|
||||
|
||||
/* Flush the trace points first, they need to be moved */
|
||||
VkResult result =
|
||||
anv_device_utrace_flush_cmd_buffers(queue,
|
||||
cmd_buffer_count,
|
||||
cmd_buffers,
|
||||
&utrace_flush_data);
|
||||
if (result != VK_SUCCESS)
|
||||
goto error;
|
||||
|
||||
if (utrace_flush_data && !utrace_flush_data->batch_bo) {
|
||||
result = anv_execbuf_add_sync(device, &execbuf,
|
||||
utrace_flush_data->sync,
|
||||
true /* is_signal */,
|
||||
0);
|
||||
if (result != VK_SUCCESS)
|
||||
goto error;
|
||||
|
||||
utrace_flush_data = NULL;
|
||||
}
|
||||
|
||||
/* Always add the workaround BO as it includes a driver identifier for the
|
||||
* error_state.
|
||||
*/
|
||||
VkResult result =
|
||||
result =
|
||||
anv_execbuf_add_bo(device, &execbuf, device->workaround_bo, NULL, 0);
|
||||
if (result != VK_SUCCESS)
|
||||
goto error;
|
||||
@@ -2148,6 +2258,9 @@ anv_queue_exec_locked(struct anv_queue *queue,
|
||||
error:
|
||||
anv_execbuf_finish(&execbuf);
|
||||
|
||||
if (result == VK_SUCCESS && utrace_flush_data)
|
||||
result = anv_queue_exec_utrace_locked(queue, utrace_flush_data);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@@ -302,6 +302,8 @@ static VkResult anv_create_cmd_buffer(
|
||||
|
||||
anv_measure_init(cmd_buffer);
|
||||
|
||||
u_trace_init(&cmd_buffer->trace, &device->trace_context);
|
||||
|
||||
*pCommandBuffer = anv_cmd_buffer_to_handle(cmd_buffer);
|
||||
|
||||
return VK_SUCCESS;
|
||||
@@ -343,6 +345,8 @@ VkResult anv_AllocateCommandBuffers(
|
||||
static void
|
||||
anv_cmd_buffer_destroy(struct anv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
u_trace_fini(&cmd_buffer->trace);
|
||||
|
||||
anv_measure_destroy(cmd_buffer);
|
||||
|
||||
list_del(&cmd_buffer->pool_link);
|
||||
@@ -401,6 +405,10 @@ anv_cmd_buffer_reset(struct anv_cmd_buffer *cmd_buffer)
|
||||
&cmd_buffer->device->general_state_pool, 16384);
|
||||
|
||||
anv_measure_reset(cmd_buffer);
|
||||
|
||||
u_trace_fini(&cmd_buffer->trace);
|
||||
u_trace_init(&cmd_buffer->trace, &cmd_buffer->device->trace_context);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
|
@@ -3297,6 +3297,8 @@ VkResult anv_CreateDevice(
|
||||
|
||||
anv_device_perf_init(device);
|
||||
|
||||
anv_device_utrace_init(device);
|
||||
|
||||
*pDevice = anv_device_to_handle(device);
|
||||
|
||||
return VK_SUCCESS;
|
||||
@@ -3364,6 +3366,8 @@ void anv_DestroyDevice(
|
||||
if (!device)
|
||||
return;
|
||||
|
||||
anv_device_utrace_finish(device);
|
||||
|
||||
anv_device_finish_blorp(device);
|
||||
|
||||
anv_device_finish_rt_shaders(device);
|
||||
|
@@ -143,8 +143,9 @@ void genX(blorp_exec)(struct blorp_batch *batch,
|
||||
const struct blorp_params *params);
|
||||
|
||||
void genX(cmd_emit_timestamp)(struct anv_batch *batch,
|
||||
struct anv_bo *bo,
|
||||
uint32_t offset);
|
||||
struct anv_device *device,
|
||||
struct anv_address addr,
|
||||
bool end_of_pipe);
|
||||
|
||||
void
|
||||
genX(rasterization_mode)(VkPolygonMode raster_mode,
|
||||
|
@@ -158,7 +158,11 @@ anv_measure_start_snapshot(struct anv_cmd_buffer *cmd_buffer,
|
||||
|
||||
unsigned index = measure->base.index++;
|
||||
|
||||
(*device->cmd_emit_timestamp)(batch, measure->bo, index * sizeof(uint64_t));
|
||||
(*device->cmd_emit_timestamp)(batch, cmd_buffer->device,
|
||||
(struct anv_address) {
|
||||
.bo = measure->bo,
|
||||
.offset = index * sizeof(uint64_t) },
|
||||
true /* end_of_pipe */);
|
||||
|
||||
if (event_name == NULL)
|
||||
event_name = intel_measure_snapshot_string(type);
|
||||
@@ -195,7 +199,11 @@ anv_measure_end_snapshot(struct anv_cmd_buffer *cmd_buffer,
|
||||
unsigned index = measure->base.index++;
|
||||
assert(index % 2 == 1);
|
||||
|
||||
(*device->cmd_emit_timestamp)(batch, measure->bo, index * sizeof(uint64_t));
|
||||
(*device->cmd_emit_timestamp)(batch, cmd_buffer->device,
|
||||
(struct anv_address) {
|
||||
.bo = measure->bo,
|
||||
.offset = index * sizeof(uint64_t) },
|
||||
true /* end_of_pipe */);
|
||||
|
||||
struct intel_measure_snapshot *snapshot = &(measure->base.snapshots[index]);
|
||||
memset(snapshot, 0, sizeof(*snapshot));
|
||||
|
@@ -57,6 +57,7 @@
|
||||
#include "util/macros.h"
|
||||
#include "util/hash_table.h"
|
||||
#include "util/list.h"
|
||||
#include "util/perf/u_trace.h"
|
||||
#include "util/sparse_array.h"
|
||||
#include "util/u_atomic.h"
|
||||
#include "util/u_vector.h"
|
||||
@@ -552,6 +553,46 @@ anv_bo_is_pinned(struct anv_bo *bo)
|
||||
#endif
|
||||
}
|
||||
|
||||
struct anv_address {
|
||||
struct anv_bo *bo;
|
||||
int64_t offset;
|
||||
};
|
||||
|
||||
#define ANV_NULL_ADDRESS ((struct anv_address) { NULL, 0 })
|
||||
|
||||
static inline struct anv_address
|
||||
anv_address_from_u64(uint64_t addr_u64)
|
||||
{
|
||||
assert(addr_u64 == intel_canonical_address(addr_u64));
|
||||
return (struct anv_address) {
|
||||
.bo = NULL,
|
||||
.offset = addr_u64,
|
||||
};
|
||||
}
|
||||
|
||||
static inline bool
|
||||
anv_address_is_null(struct anv_address addr)
|
||||
{
|
||||
return addr.bo == NULL && addr.offset == 0;
|
||||
}
|
||||
|
||||
static inline uint64_t
|
||||
anv_address_physical(struct anv_address addr)
|
||||
{
|
||||
if (addr.bo && anv_bo_is_pinned(addr.bo)) {
|
||||
return intel_canonical_address(addr.bo->offset + addr.offset);
|
||||
} else {
|
||||
return intel_canonical_address(addr.offset);
|
||||
}
|
||||
}
|
||||
|
||||
static inline struct anv_address
|
||||
anv_address_add(struct anv_address addr, uint64_t offset)
|
||||
{
|
||||
addr.offset += offset;
|
||||
return addr;
|
||||
}
|
||||
|
||||
/* Represents a lock-free linked list of "free" things. This is used by
|
||||
* both the block pool and the state pools. Unfortunately, in order to
|
||||
* solve the ABA problem, we can't use a single uint32_t head.
|
||||
@@ -986,7 +1027,7 @@ struct anv_physical_device {
|
||||
int64_t master_minor;
|
||||
struct drm_i915_query_engine_info * engine_info;
|
||||
|
||||
void (*cmd_emit_timestamp)(struct anv_batch *, struct anv_bo *, uint32_t );
|
||||
void (*cmd_emit_timestamp)(struct anv_batch *, struct anv_device *, struct anv_address, bool);
|
||||
struct intel_measure_device measure_device;
|
||||
};
|
||||
|
||||
@@ -1094,11 +1135,6 @@ anv_device_upload_nir(struct anv_device *device,
|
||||
const struct nir_shader *nir,
|
||||
unsigned char sha1_key[20]);
|
||||
|
||||
struct anv_address {
|
||||
struct anv_bo *bo;
|
||||
int64_t offset;
|
||||
};
|
||||
|
||||
struct anv_device {
|
||||
struct vk_device vk;
|
||||
|
||||
@@ -1179,6 +1215,8 @@ struct anv_device {
|
||||
const struct intel_l3_config *l3_config;
|
||||
|
||||
struct intel_debug_block_frame *debug_frame_desc;
|
||||
|
||||
struct u_trace_context trace_context;
|
||||
};
|
||||
|
||||
#if defined(GFX_VERx10) && GFX_VERx10 >= 90
|
||||
@@ -1506,42 +1544,6 @@ anv_batch_emit_reloc(struct anv_batch *batch,
|
||||
return address_u64;
|
||||
}
|
||||
|
||||
|
||||
#define ANV_NULL_ADDRESS ((struct anv_address) { NULL, 0 })
|
||||
|
||||
static inline struct anv_address
|
||||
anv_address_from_u64(uint64_t addr_u64)
|
||||
{
|
||||
assert(addr_u64 == intel_canonical_address(addr_u64));
|
||||
return (struct anv_address) {
|
||||
.bo = NULL,
|
||||
.offset = addr_u64,
|
||||
};
|
||||
}
|
||||
|
||||
static inline bool
|
||||
anv_address_is_null(struct anv_address addr)
|
||||
{
|
||||
return addr.bo == NULL && addr.offset == 0;
|
||||
}
|
||||
|
||||
static inline uint64_t
|
||||
anv_address_physical(struct anv_address addr)
|
||||
{
|
||||
if (addr.bo && anv_bo_is_pinned(addr.bo)) {
|
||||
return intel_canonical_address(addr.bo->offset + addr.offset);
|
||||
} else {
|
||||
return intel_canonical_address(addr.offset);
|
||||
}
|
||||
}
|
||||
|
||||
static inline struct anv_address
|
||||
anv_address_add(struct anv_address addr, uint64_t offset)
|
||||
{
|
||||
addr.offset += offset;
|
||||
return addr;
|
||||
}
|
||||
|
||||
static inline void
|
||||
write_reloc(const struct anv_device *device, void *p, uint64_t v, bool flush)
|
||||
{
|
||||
@@ -3088,6 +3090,11 @@ struct anv_cmd_buffer {
|
||||
* Used to increase allocation size for long command buffers.
|
||||
*/
|
||||
uint32_t total_batch_size;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
struct u_trace trace;
|
||||
};
|
||||
|
||||
/* Determine whether we can chain a given cmd_buffer to another one. We need
|
||||
@@ -4541,6 +4548,29 @@ struct anv_memcpy_state {
|
||||
struct anv_vb_cache_range vb_dirty;
|
||||
};
|
||||
|
||||
struct anv_utrace_flush_copy {
|
||||
struct u_trace trace;
|
||||
|
||||
struct anv_reloc_list relocs;
|
||||
struct anv_batch batch;
|
||||
struct anv_bo *batch_bo;
|
||||
|
||||
struct anv_bo *trace_bo;
|
||||
|
||||
struct vk_sync *sync;
|
||||
|
||||
struct anv_memcpy_state memcpy_state;
|
||||
};
|
||||
|
||||
void anv_device_utrace_init(struct anv_device *device);
|
||||
void anv_device_utrace_finish(struct anv_device *device);
|
||||
VkResult
|
||||
anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
|
||||
uint32_t cmd_buffer_count,
|
||||
struct anv_cmd_buffer **cmd_buffers,
|
||||
struct anv_utrace_flush_copy **out_flush_data);
|
||||
|
||||
|
||||
#define ANV_FROM_HANDLE(__anv_type, __name, __handle) \
|
||||
VK_FROM_HANDLE(__anv_type, __name, __handle)
|
||||
|
||||
|
163
src/intel/vulkan/anv_tracepoints.py
Normal file
163
src/intel/vulkan/anv_tracepoints.py
Normal file
@@ -0,0 +1,163 @@
|
||||
#
|
||||
# Copyright © 2021 Intel Corporation
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice (including the next
|
||||
# paragraph) shall be included in all copies or substantial portions of the
|
||||
# Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
#
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
#
|
||||
# Tracepoint definitions:
|
||||
#
|
||||
def define_tracepoints(args):
|
||||
from u_trace import Header, HeaderScope
|
||||
from u_trace import ForwardDecl
|
||||
from u_trace import Tracepoint
|
||||
from u_trace import TracepointArg as Arg
|
||||
from u_trace import TracepointArgStruct as ArgStruct
|
||||
|
||||
Header('anv_private.h', scope=HeaderScope.SOURCE)
|
||||
Header('blorp/blorp_priv.h', scope=HeaderScope.HEADER)
|
||||
|
||||
def begin_end_tp(name, tp_args=[], tp_struct=None, end_pipelined=True):
|
||||
Tracepoint('begin_{0}'.format(name))
|
||||
Tracepoint('end_{0}'.format(name),
|
||||
args=tp_args,
|
||||
tp_struct=tp_struct,
|
||||
end_of_pipe=end_pipelined)
|
||||
|
||||
|
||||
begin_end_tp('cmd_buffer',
|
||||
tp_args=[ArgStruct(type='uint8_t', var='level'),],
|
||||
tp_struct=[Arg(type='uint8_t', name='level', var='level', c_format='%hhu'),],
|
||||
end_pipelined=False)
|
||||
|
||||
begin_end_tp('render_pass',
|
||||
tp_args=[ArgStruct(type='uint16_t', var='width'),
|
||||
ArgStruct(type='uint16_t', var='height'),
|
||||
ArgStruct(type='uint8_t', var='att_count'),
|
||||
ArgStruct(type='uint8_t', var='msaa'),
|
||||
ArgStruct(type='uint32_t', var='subpass_count'),],
|
||||
tp_struct=[Arg(type='uint16_t', name='width', var='width', c_format='%hu'),
|
||||
Arg(type='uint16_t', name='height', var='height', c_format='%hu'),
|
||||
Arg(type='uint8_t', name='att_count', var='att_count', c_format='%hhu'),
|
||||
Arg(type='uint8_t', name='msaa', var='msaa', c_format='%hhu'),
|
||||
Arg(type='uint32_t', name='subpass_count', var='subpass_count', c_format='%ou'),])
|
||||
|
||||
begin_end_tp('blorp',
|
||||
tp_args=[ArgStruct(type='uint32_t', var='width'),
|
||||
ArgStruct(type='uint32_t', var='height'),
|
||||
ArgStruct(type='enum isl_aux_op', var='hiz_op'),
|
||||
ArgStruct(type='enum isl_aux_op', var='fast_clear_op'),
|
||||
ArgStruct(type='enum blorp_shader_type', var='shader_type'),
|
||||
ArgStruct(type='enum blorp_shader_pipeline', var='shader_pipe'),],
|
||||
tp_struct=[Arg(type='uint32_t', name='width', var='width', c_format='%u'),
|
||||
Arg(type='uint32_t', name='height', var='height', c_format='%u'),
|
||||
Arg(type='enum isl_aux_op', name='hiz_op', var='hiz_op', c_format='%s', to_prim_type='isl_aux_op_to_name({})'),
|
||||
Arg(type='enum isl_aux_op', name='fast_clear_op', var='fast_clear_op', c_format='%s', to_prim_type='isl_aux_op_to_name({})'),
|
||||
Arg(type='enum blorp_shader_type', name='type', var='shader_type', c_format='%s', to_prim_type='blorp_shader_type_to_name({})'),
|
||||
Arg(type='enum blorp_shader_pipeline', name='pipe', var='shader_pipe', c_format='%s', to_prim_type='blorp_shader_pipeline_to_name({})'),])
|
||||
|
||||
begin_end_tp('draw',
|
||||
tp_args=[ArgStruct(type='uint32_t', var='count'),],
|
||||
tp_struct=[Arg(type='uint32_t', name='count', var='count', c_format='%u'),])
|
||||
begin_end_tp('draw_multi',
|
||||
tp_args=[ArgStruct(type='uint32_t', var='count'),],
|
||||
tp_struct=[Arg(type='uint32_t', name='count', var='count', c_format='%u'),])
|
||||
begin_end_tp('draw_indexed',
|
||||
tp_args=[ArgStruct(type='uint32_t', var='count'),],
|
||||
tp_struct=[Arg(type='uint32_t', name='count', var='count', c_format='%u'),])
|
||||
begin_end_tp('draw_indexed_multi',
|
||||
tp_args=[ArgStruct(type='uint32_t', var='count'),],
|
||||
tp_struct=[Arg(type='uint32_t', name='count', var='count', c_format='%u'),])
|
||||
begin_end_tp('draw_indirect_byte_count',
|
||||
tp_args=[ArgStruct(type='uint32_t', var='instance_count'),],
|
||||
tp_struct=[Arg(type='uint32_t', name='instance_count', var='instance_count', c_format='%u'),])
|
||||
begin_end_tp('draw_indirect',
|
||||
tp_args=[ArgStruct(type='uint32_t', var='draw_count'),],
|
||||
tp_struct=[Arg(type='uint32_t', name='draw_count', var='draw_count', c_format='%u'),])
|
||||
begin_end_tp('draw_indexed_indirect',
|
||||
tp_args=[ArgStruct(type='uint32_t', var='draw_count'),],
|
||||
tp_struct=[Arg(type='uint32_t', name='draw_count', var='draw_count', c_format='%u'),])
|
||||
begin_end_tp('draw_indirect_count',
|
||||
tp_args=[ArgStruct(type='uint32_t', var='max_draw_count'),],
|
||||
tp_struct=[Arg(type='uint32_t', name='max_draw_count', var='max_draw_count', c_format='%u'),])
|
||||
begin_end_tp('draw_indexed_indirect_count',
|
||||
tp_args=[ArgStruct(type='uint32_t', var='max_draw_count'),],
|
||||
tp_struct=[Arg(type='uint32_t', name='max_draw_count', var='max_draw_count', c_format='%u'),])
|
||||
|
||||
begin_end_tp('compute',
|
||||
tp_args=[ArgStruct(type='uint32_t', var='group_x'),
|
||||
ArgStruct(type='uint32_t', var='group_y'),
|
||||
ArgStruct(type='uint32_t', var='group_z'),],
|
||||
tp_struct=[Arg(type='uint32_t', name='group_x', var='group_x', c_format='%u'),
|
||||
Arg(type='uint32_t', name='group_y', var='group_y', c_format='%u'),
|
||||
Arg(type='uint32_t', name='group_z', var='group_z', c_format='%u'),])
|
||||
|
||||
def stall_args(args):
|
||||
fmt = ''
|
||||
exprs = []
|
||||
for a in args:
|
||||
fmt += '%s'
|
||||
exprs.append('(__entry->flags & ANV_PIPE_{0}_BIT) ? "+{1}" : ""'.format(a[0], a[1]))
|
||||
fmt = [fmt]
|
||||
fmt += exprs
|
||||
return fmt
|
||||
|
||||
Tracepoint('stall',
|
||||
args=[ArgStruct(type='uint32_t', var='flags'),],
|
||||
tp_struct=[Arg(type='uint32_t', name='flags', var='flags', c_format='0x%x'),],
|
||||
tp_print=stall_args([['DEPTH_CACHE_FLUSH', 'depth_flush'],
|
||||
['DATA_CACHE_FLUSH', 'dc_flush'],
|
||||
['HDC_PIPELINE_FLUSH', 'hdc_flush'],
|
||||
['RENDER_TARGET_CACHE_FLUSH', 'rt_flush'],
|
||||
['TILE_CACHE_FLUSH', 'tile_flush'],
|
||||
['STATE_CACHE_INVALIDATE', 'state_inval'],
|
||||
['CONSTANT_CACHE_INVALIDATE', 'const_inval'],
|
||||
['VF_CACHE_INVALIDATE', 'vf_inval'],
|
||||
['TEXTURE_CACHE_INVALIDATE', 'tex_inval'],
|
||||
['INSTRUCTION_CACHE_INVALIDATE', 'ic_inval'],
|
||||
['STALL_AT_SCOREBOARD', 'pb_stall'],
|
||||
['DEPTH_STALL', 'depth_stall'],
|
||||
['CS_STALL', 'cs_stall'],
|
||||
]))
|
||||
|
||||
|
||||
|
||||
def generate_code(args):
|
||||
from u_trace import utrace_generate
|
||||
|
||||
utrace_generate(cpath=args.utrace_src, hpath=args.utrace_hdr, ctx_param='struct anv_device *dev')
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('-p', '--import-path', required=True)
|
||||
parser.add_argument('--utrace-src', required=True)
|
||||
parser.add_argument('--utrace-hdr', required=True)
|
||||
args = parser.parse_args()
|
||||
sys.path.insert(0, args.import_path)
|
||||
define_tracepoints(args)
|
||||
generate_code(args)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
279
src/intel/vulkan/anv_utrace.c
Normal file
279
src/intel/vulkan/anv_utrace.c
Normal file
@@ -0,0 +1,279 @@
|
||||
/*
|
||||
* Copyright © 2021 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "anv_private.h"
|
||||
|
||||
#include "perf/intel_perf.h"
|
||||
|
||||
static uint32_t
|
||||
command_buffers_count_utraces(struct anv_device *device,
|
||||
uint32_t cmd_buffer_count,
|
||||
struct anv_cmd_buffer **cmd_buffers,
|
||||
uint32_t *utrace_copies)
|
||||
{
|
||||
if (!u_trace_context_actively_tracing(&device->trace_context))
|
||||
return 0;
|
||||
|
||||
uint32_t utraces = 0;
|
||||
for (uint32_t i = 0; i < cmd_buffer_count; i++) {
|
||||
if (u_trace_has_points(&cmd_buffers[i]->trace)) {
|
||||
utraces++;
|
||||
if (!(cmd_buffers[i]->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT))
|
||||
*utrace_copies += list_length(&cmd_buffers[i]->trace.trace_chunks);
|
||||
}
|
||||
}
|
||||
|
||||
return utraces;
|
||||
}
|
||||
|
||||
static void
|
||||
anv_utrace_delete_flush_data(struct u_trace_context *utctx,
|
||||
void *flush_data)
|
||||
{
|
||||
struct anv_device *device =
|
||||
container_of(utctx, struct anv_device, trace_context);
|
||||
struct anv_utrace_flush_copy *flush = flush_data;
|
||||
|
||||
u_trace_fini(&flush->trace);
|
||||
|
||||
if (flush->trace_bo) {
|
||||
assert(flush->batch_bo);
|
||||
anv_reloc_list_finish(&flush->relocs, &device->vk.alloc);
|
||||
anv_device_release_bo(device, flush->batch_bo);
|
||||
anv_device_release_bo(device, flush->trace_bo);
|
||||
}
|
||||
|
||||
vk_sync_destroy(&device->vk, flush->sync);
|
||||
|
||||
vk_free(&device->vk.alloc, flush);
|
||||
}
|
||||
|
||||
static void
|
||||
anv_device_utrace_emit_copy_ts_buffer(struct u_trace_context *utctx,
|
||||
void *cmdstream,
|
||||
void *ts_from, uint32_t from_offset,
|
||||
void *ts_to, uint32_t to_offset,
|
||||
uint32_t count)
|
||||
{
|
||||
struct anv_device *device =
|
||||
container_of(utctx, struct anv_device, trace_context);
|
||||
struct anv_utrace_flush_copy *flush = cmdstream;
|
||||
struct anv_address from_addr = (struct anv_address) {
|
||||
.bo = ts_from, .offset = from_offset * sizeof(uint64_t) };
|
||||
struct anv_address to_addr = (struct anv_address) {
|
||||
.bo = ts_to, .offset = to_offset * sizeof(uint64_t) };
|
||||
|
||||
anv_genX(&device->info, emit_so_memcpy)(&flush->memcpy_state,
|
||||
to_addr, from_addr, count * sizeof(uint64_t));
|
||||
}
|
||||
|
||||
VkResult
|
||||
anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
|
||||
uint32_t cmd_buffer_count,
|
||||
struct anv_cmd_buffer **cmd_buffers,
|
||||
struct anv_utrace_flush_copy **out_flush_data)
|
||||
{
|
||||
struct anv_device *device = queue->device;
|
||||
uint32_t utrace_copies = 0;
|
||||
uint32_t utraces = command_buffers_count_utraces(device,
|
||||
cmd_buffer_count,
|
||||
cmd_buffers,
|
||||
&utrace_copies);
|
||||
if (!utraces) {
|
||||
*out_flush_data = NULL;
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VkResult result;
|
||||
struct anv_utrace_flush_copy *flush =
|
||||
vk_zalloc(&device->vk.alloc, sizeof(struct anv_utrace_flush_copy),
|
||||
8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
|
||||
if (!flush)
|
||||
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
u_trace_init(&flush->trace, &device->trace_context);
|
||||
|
||||
result = vk_sync_create(&device->vk, &device->physical->sync_syncobj_type,
|
||||
0, 0, &flush->sync);
|
||||
if (result != VK_SUCCESS)
|
||||
goto error_sync;
|
||||
|
||||
if (utrace_copies > 0) {
|
||||
result =
|
||||
anv_device_alloc_bo(device, "utrace-copy-buf", utrace_copies * 4096,
|
||||
ANV_BO_ALLOC_MAPPED, 0 /* explicit_address */,
|
||||
&flush->trace_bo);
|
||||
if (result != VK_SUCCESS)
|
||||
goto error_trace_buf;
|
||||
|
||||
result =
|
||||
anv_device_alloc_bo(device, "utrace-copy-batch",
|
||||
/* 128 dwords of setup + 64 dwords per copy */
|
||||
align_u32(512 + 64 * utrace_copies, 4096),
|
||||
ANV_BO_ALLOC_MAPPED, 0 /* explicit_address */,
|
||||
&flush->batch_bo);
|
||||
if (result != VK_SUCCESS)
|
||||
goto error_batch_buf;
|
||||
|
||||
result = anv_reloc_list_init(&flush->relocs, &device->vk.alloc);
|
||||
if (result != VK_SUCCESS)
|
||||
goto error_reloc_list;
|
||||
|
||||
flush->batch.alloc = &device->vk.alloc;
|
||||
flush->batch.relocs = &flush->relocs;
|
||||
anv_batch_set_storage(&flush->batch,
|
||||
(struct anv_address) { .bo = flush->batch_bo, },
|
||||
flush->batch_bo->map, flush->batch_bo->size);
|
||||
|
||||
/* Emit the copies */
|
||||
anv_genX(&device->info, emit_so_memcpy_init)(&flush->memcpy_state,
|
||||
device,
|
||||
&flush->batch);
|
||||
for (uint32_t i = 0; i < cmd_buffer_count; i++) {
|
||||
if (cmd_buffers[i]->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT) {
|
||||
u_trace_flush(&cmd_buffers[i]->trace, flush, false);
|
||||
} else {
|
||||
u_trace_clone_append(u_trace_begin_iterator(&cmd_buffers[i]->trace),
|
||||
u_trace_end_iterator(&cmd_buffers[i]->trace),
|
||||
&flush->trace,
|
||||
flush,
|
||||
anv_device_utrace_emit_copy_ts_buffer);
|
||||
}
|
||||
}
|
||||
anv_genX(&device->info, emit_so_memcpy_fini)(&flush->memcpy_state);
|
||||
|
||||
u_trace_flush(&flush->trace, flush, true);
|
||||
|
||||
if (flush->batch.status != VK_SUCCESS) {
|
||||
result = flush->batch.status;
|
||||
goto error_batch;
|
||||
}
|
||||
} else {
|
||||
for (uint32_t i = 0; i < cmd_buffer_count; i++) {
|
||||
assert(cmd_buffers[i]->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT);
|
||||
u_trace_flush(&cmd_buffers[i]->trace, flush, i == (cmd_buffer_count - 1));
|
||||
}
|
||||
}
|
||||
|
||||
*out_flush_data = flush;
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
||||
error_batch:
|
||||
anv_reloc_list_finish(&flush->relocs, &device->vk.alloc);
|
||||
error_reloc_list:
|
||||
anv_device_release_bo(device, flush->batch_bo);
|
||||
error_batch_buf:
|
||||
anv_device_release_bo(device, flush->trace_bo);
|
||||
error_trace_buf:
|
||||
vk_sync_destroy(&device->vk, flush->sync);
|
||||
error_sync:
|
||||
vk_free(&device->vk.alloc, flush);
|
||||
return result;
|
||||
}
|
||||
|
||||
static void *
|
||||
anv_utrace_create_ts_buffer(struct u_trace_context *utctx, uint32_t size_b)
|
||||
{
|
||||
struct anv_device *device =
|
||||
container_of(utctx, struct anv_device, trace_context);
|
||||
|
||||
struct anv_bo *bo = NULL;
|
||||
UNUSED VkResult result =
|
||||
anv_device_alloc_bo(device, "utrace-ts", align_u32(size_b, 4096),
|
||||
ANV_BO_ALLOC_MAPPED, 0, &bo);
|
||||
assert(result == VK_SUCCESS);
|
||||
|
||||
return bo;
|
||||
}
|
||||
|
||||
static void
|
||||
anv_utrace_destroy_ts_buffer(struct u_trace_context *utctx, void *timestamps)
|
||||
{
|
||||
struct anv_device *device =
|
||||
container_of(utctx, struct anv_device, trace_context);
|
||||
struct anv_bo *bo = timestamps;
|
||||
|
||||
anv_device_release_bo(device, bo);
|
||||
}
|
||||
|
||||
static void
|
||||
anv_utrace_record_ts(struct u_trace *ut, void *cs, void *timestamps, unsigned idx,
|
||||
bool end_of_pipe)
|
||||
{
|
||||
struct anv_cmd_buffer *cmd_buffer = cs;
|
||||
struct anv_device *device = cmd_buffer->device;
|
||||
struct anv_bo *bo = timestamps;
|
||||
|
||||
device->physical->cmd_emit_timestamp(&cmd_buffer->batch, device,
|
||||
(struct anv_address) {
|
||||
.bo = bo,
|
||||
.offset = idx * sizeof(uint64_t) },
|
||||
end_of_pipe);
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
anv_utrace_read_ts(struct u_trace_context *utctx,
|
||||
void *timestamps, unsigned idx, void *flush_data)
|
||||
{
|
||||
struct anv_device *device =
|
||||
container_of(utctx, struct anv_device, trace_context);
|
||||
struct anv_bo *bo = timestamps;
|
||||
struct anv_utrace_flush_copy *flush = flush_data;
|
||||
|
||||
/* Only need to stall on results for the first entry: */
|
||||
if (idx == 0) {
|
||||
UNUSED VkResult result =
|
||||
vk_sync_wait(&device->vk,
|
||||
flush->sync,
|
||||
0,
|
||||
VK_SYNC_WAIT_COMPLETE,
|
||||
os_time_get_absolute_timeout(OS_TIMEOUT_INFINITE));
|
||||
assert(result == VK_SUCCESS);
|
||||
}
|
||||
|
||||
uint64_t *ts = bo->map;
|
||||
|
||||
/* Don't translate the no-timestamp marker: */
|
||||
if (ts[idx] == U_TRACE_NO_TIMESTAMP)
|
||||
return U_TRACE_NO_TIMESTAMP;
|
||||
|
||||
return intel_device_info_timebase_scale(&device->info, ts[idx]);
|
||||
}
|
||||
|
||||
void
|
||||
anv_device_utrace_init(struct anv_device *device)
|
||||
{
|
||||
u_trace_context_init(&device->trace_context, device,
|
||||
anv_utrace_create_ts_buffer,
|
||||
anv_utrace_destroy_ts_buffer,
|
||||
anv_utrace_record_ts,
|
||||
anv_utrace_read_ts,
|
||||
anv_utrace_delete_flush_data);
|
||||
}
|
||||
|
||||
void
|
||||
anv_device_utrace_finish(struct anv_device *device)
|
||||
{
|
||||
u_trace_context_fini(&device->trace_context);
|
||||
}
|
@@ -107,5 +107,7 @@ VkResult anv_QueuePresentKHR(
|
||||
vk_semaphore_reset_temporary(&queue->device->vk, semaphore);
|
||||
}
|
||||
|
||||
u_trace_context_process(&queue->device->trace_context, true);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
@@ -34,10 +34,13 @@
|
||||
#include "common/intel_l3_config.h"
|
||||
#include "blorp/blorp_genX_exec.h"
|
||||
|
||||
#include "anv_tracepoints.h"
|
||||
|
||||
static void blorp_measure_start(struct blorp_batch *_batch,
|
||||
const struct blorp_params *params)
|
||||
{
|
||||
struct anv_cmd_buffer *cmd_buffer = _batch->driver_batch;
|
||||
trace_begin_blorp(&cmd_buffer->trace, cmd_buffer);
|
||||
anv_measure_snapshot(cmd_buffer,
|
||||
params->snapshot_type,
|
||||
NULL, 0);
|
||||
@@ -46,6 +49,14 @@ static void blorp_measure_start(struct blorp_batch *_batch,
|
||||
static void blorp_measure_end(struct blorp_batch *_batch,
|
||||
const struct blorp_params *params)
|
||||
{
|
||||
struct anv_cmd_buffer *cmd_buffer = _batch->driver_batch;
|
||||
trace_end_blorp(&cmd_buffer->trace, cmd_buffer,
|
||||
params->x1 - params->x0,
|
||||
params->y1 - params->y0,
|
||||
params->hiz_op,
|
||||
params->fast_clear_op,
|
||||
params->shader_type,
|
||||
params->shader_pipeline);
|
||||
}
|
||||
|
||||
static void *
|
||||
|
@@ -38,6 +38,8 @@
|
||||
|
||||
#include "nir/nir_xfb_info.h"
|
||||
|
||||
#include "anv_tracepoints.h"
|
||||
|
||||
/* We reserve :
|
||||
* - GPR 14 for secondary command buffer returns
|
||||
* - GPR 15 for conditional rendering
|
||||
@@ -1761,6 +1763,8 @@ genX(BeginCommandBuffer)(
|
||||
if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY)
|
||||
cmd_buffer->usage_flags &= ~VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT;
|
||||
|
||||
trace_begin_cmd_buffer(&cmd_buffer->trace, cmd_buffer);
|
||||
|
||||
genX(cmd_buffer_emit_state_base_address)(cmd_buffer);
|
||||
|
||||
/* We sometimes store vertex data in the dynamic state buffer for blorp
|
||||
@@ -1934,6 +1938,8 @@ genX(EndCommandBuffer)(
|
||||
|
||||
emit_isp_disable(cmd_buffer);
|
||||
|
||||
trace_end_cmd_buffer(&cmd_buffer->trace, cmd_buffer, cmd_buffer->level);
|
||||
|
||||
anv_cmd_buffer_end_batch_buffer(cmd_buffer);
|
||||
|
||||
return VK_SUCCESS;
|
||||
@@ -2399,6 +2405,9 @@ genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer)
|
||||
else if (bits == 0)
|
||||
return;
|
||||
|
||||
if (bits & (ANV_PIPE_FLUSH_BITS | ANV_PIPE_STALL_BITS | ANV_PIPE_INVALIDATE_BITS))
|
||||
trace_stall(&cmd_buffer->trace, cmd_buffer, bits);
|
||||
|
||||
if ((GFX_VER >= 8 && GFX_VER <= 9) &&
|
||||
(bits & ANV_PIPE_CS_STALL_BIT) &&
|
||||
(bits & ANV_PIPE_VF_CACHE_INVALIDATE_BIT)) {
|
||||
@@ -3954,6 +3963,7 @@ void genX(CmdDraw)(
|
||||
anv_measure_snapshot(cmd_buffer,
|
||||
INTEL_SNAPSHOT_DRAW,
|
||||
"draw", count);
|
||||
trace_begin_draw(&cmd_buffer->trace, cmd_buffer);
|
||||
|
||||
genX(cmd_buffer_flush_state)(cmd_buffer);
|
||||
|
||||
@@ -3982,6 +3992,8 @@ void genX(CmdDraw)(
|
||||
}
|
||||
|
||||
update_dirty_vbs_for_gfx8_vb_flush(cmd_buffer, SEQUENTIAL);
|
||||
|
||||
trace_end_draw(&cmd_buffer->trace, cmd_buffer, count);
|
||||
}
|
||||
|
||||
void genX(CmdDrawMultiEXT)(
|
||||
@@ -4006,6 +4018,7 @@ void genX(CmdDrawMultiEXT)(
|
||||
anv_measure_snapshot(cmd_buffer,
|
||||
INTEL_SNAPSHOT_DRAW,
|
||||
"draw_multi", count);
|
||||
trace_begin_draw_multi(&cmd_buffer->trace, cmd_buffer);
|
||||
|
||||
genX(cmd_buffer_flush_state)(cmd_buffer);
|
||||
|
||||
@@ -4037,6 +4050,8 @@ void genX(CmdDrawMultiEXT)(
|
||||
}
|
||||
|
||||
update_dirty_vbs_for_gfx8_vb_flush(cmd_buffer, SEQUENTIAL);
|
||||
|
||||
trace_end_draw_multi(&cmd_buffer->trace, cmd_buffer, count);
|
||||
}
|
||||
|
||||
void genX(CmdDrawIndexed)(
|
||||
@@ -4062,6 +4077,7 @@ void genX(CmdDrawIndexed)(
|
||||
INTEL_SNAPSHOT_DRAW,
|
||||
"draw indexed",
|
||||
count);
|
||||
trace_begin_draw_indexed(&cmd_buffer->trace, cmd_buffer);
|
||||
|
||||
genX(cmd_buffer_flush_state)(cmd_buffer);
|
||||
|
||||
@@ -4088,6 +4104,8 @@ void genX(CmdDrawIndexed)(
|
||||
}
|
||||
|
||||
update_dirty_vbs_for_gfx8_vb_flush(cmd_buffer, RANDOM);
|
||||
|
||||
trace_end_draw_indexed(&cmd_buffer->trace, cmd_buffer, count);
|
||||
}
|
||||
|
||||
void genX(CmdDrawMultiIndexedEXT)(
|
||||
@@ -4114,6 +4132,7 @@ void genX(CmdDrawMultiIndexedEXT)(
|
||||
INTEL_SNAPSHOT_DRAW,
|
||||
"draw indexed_multi",
|
||||
count);
|
||||
trace_begin_draw_indexed_multi(&cmd_buffer->trace, cmd_buffer);
|
||||
|
||||
genX(cmd_buffer_flush_state)(cmd_buffer);
|
||||
|
||||
@@ -4200,6 +4219,8 @@ void genX(CmdDrawMultiIndexedEXT)(
|
||||
}
|
||||
|
||||
update_dirty_vbs_for_gfx8_vb_flush(cmd_buffer, RANDOM);
|
||||
|
||||
trace_end_draw_indexed_multi(&cmd_buffer->trace, cmd_buffer, count);
|
||||
}
|
||||
|
||||
/* Auto-Draw / Indirect Registers */
|
||||
@@ -4235,6 +4256,7 @@ void genX(CmdDrawIndirectByteCountEXT)(
|
||||
INTEL_SNAPSHOT_DRAW,
|
||||
"draw indirect byte count",
|
||||
instanceCount);
|
||||
trace_begin_draw_indirect_byte_count(&cmd_buffer->trace, cmd_buffer);
|
||||
|
||||
genX(cmd_buffer_flush_state)(cmd_buffer);
|
||||
|
||||
@@ -4277,6 +4299,9 @@ void genX(CmdDrawIndirectByteCountEXT)(
|
||||
}
|
||||
|
||||
update_dirty_vbs_for_gfx8_vb_flush(cmd_buffer, SEQUENTIAL);
|
||||
|
||||
trace_end_draw_indirect_byte_count(&cmd_buffer->trace, cmd_buffer,
|
||||
instanceCount);
|
||||
#endif /* GFX_VERx10 >= 75 */
|
||||
}
|
||||
|
||||
@@ -4333,6 +4358,8 @@ void genX(CmdDrawIndirect)(
|
||||
if (anv_batch_has_error(&cmd_buffer->batch))
|
||||
return;
|
||||
|
||||
trace_begin_draw_indirect(&cmd_buffer->trace, cmd_buffer);
|
||||
|
||||
genX(cmd_buffer_flush_state)(cmd_buffer);
|
||||
|
||||
if (cmd_buffer->state.conditional_render_enabled)
|
||||
@@ -4365,6 +4392,8 @@ void genX(CmdDrawIndirect)(
|
||||
|
||||
offset += stride;
|
||||
}
|
||||
|
||||
trace_end_draw_indirect(&cmd_buffer->trace, cmd_buffer, drawCount);
|
||||
}
|
||||
|
||||
void genX(CmdDrawIndexedIndirect)(
|
||||
@@ -4382,6 +4411,8 @@ void genX(CmdDrawIndexedIndirect)(
|
||||
if (anv_batch_has_error(&cmd_buffer->batch))
|
||||
return;
|
||||
|
||||
trace_begin_draw_indexed_indirect(&cmd_buffer->trace, cmd_buffer);
|
||||
|
||||
genX(cmd_buffer_flush_state)(cmd_buffer);
|
||||
|
||||
if (cmd_buffer->state.conditional_render_enabled)
|
||||
@@ -4415,6 +4446,8 @@ void genX(CmdDrawIndexedIndirect)(
|
||||
|
||||
offset += stride;
|
||||
}
|
||||
|
||||
trace_end_draw_indexed_indirect(&cmd_buffer->trace, cmd_buffer, drawCount);
|
||||
}
|
||||
|
||||
static struct mi_value
|
||||
@@ -4541,6 +4574,8 @@ void genX(CmdDrawIndirectCount)(
|
||||
if (anv_batch_has_error(&cmd_buffer->batch))
|
||||
return;
|
||||
|
||||
trace_begin_draw_indirect_count(&cmd_buffer->trace, cmd_buffer);
|
||||
|
||||
genX(cmd_buffer_flush_state)(cmd_buffer);
|
||||
|
||||
struct mi_builder b;
|
||||
@@ -4580,6 +4615,8 @@ void genX(CmdDrawIndirectCount)(
|
||||
}
|
||||
|
||||
mi_value_unref(&b, max);
|
||||
|
||||
trace_end_draw_indirect_count(&cmd_buffer->trace, cmd_buffer, maxDrawCount);
|
||||
}
|
||||
|
||||
void genX(CmdDrawIndexedIndirectCount)(
|
||||
@@ -4601,6 +4638,8 @@ void genX(CmdDrawIndexedIndirectCount)(
|
||||
if (anv_batch_has_error(&cmd_buffer->batch))
|
||||
return;
|
||||
|
||||
trace_begin_draw_indexed_indirect_count(&cmd_buffer->trace, cmd_buffer);
|
||||
|
||||
genX(cmd_buffer_flush_state)(cmd_buffer);
|
||||
|
||||
struct mi_builder b;
|
||||
@@ -4641,6 +4680,9 @@ void genX(CmdDrawIndexedIndirectCount)(
|
||||
}
|
||||
|
||||
mi_value_unref(&b, max);
|
||||
|
||||
trace_end_draw_indexed_indirect_count(&cmd_buffer->trace, cmd_buffer, maxDrawCount);
|
||||
|
||||
}
|
||||
|
||||
void genX(CmdBeginTransformFeedbackEXT)(
|
||||
@@ -5016,6 +5058,8 @@ void genX(CmdDispatchBase)(
|
||||
prog_data->local_size[0] * prog_data->local_size[1] *
|
||||
prog_data->local_size[2]);
|
||||
|
||||
trace_begin_compute(&cmd_buffer->trace, cmd_buffer);
|
||||
|
||||
if (prog_data->uses_num_work_groups) {
|
||||
struct anv_state state =
|
||||
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 12, 4);
|
||||
@@ -5039,6 +5083,9 @@ void genX(CmdDispatchBase)(
|
||||
|
||||
emit_cs_walker(cmd_buffer, pipeline, false, prog_data, groupCountX,
|
||||
groupCountY, groupCountZ);
|
||||
|
||||
trace_end_compute(&cmd_buffer->trace, cmd_buffer,
|
||||
groupCountX, groupCountY, groupCountZ);
|
||||
}
|
||||
|
||||
#define GPGPU_DISPATCHDIMX 0x2500
|
||||
@@ -5072,6 +5119,7 @@ void genX(CmdDispatchIndirect)(
|
||||
INTEL_SNAPSHOT_COMPUTE,
|
||||
"compute indirect",
|
||||
0);
|
||||
trace_begin_compute(&cmd_buffer->trace, cmd_buffer);
|
||||
|
||||
if (prog_data->uses_num_work_groups) {
|
||||
cmd_buffer->state.compute.num_workgroups = addr;
|
||||
@@ -5145,6 +5193,8 @@ void genX(CmdDispatchIndirect)(
|
||||
#endif
|
||||
|
||||
emit_cs_walker(cmd_buffer, pipeline, true, prog_data, 0, 0, 0);
|
||||
|
||||
trace_end_compute(&cmd_buffer->trace, cmd_buffer, 0, 0, 0);
|
||||
}
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
@@ -6750,6 +6800,7 @@ void genX(CmdBeginRenderPass2)(
|
||||
cmd_buffer->state.render_area = pRenderPassBeginInfo->renderArea;
|
||||
|
||||
anv_measure_beginrenderpass(cmd_buffer);
|
||||
trace_begin_render_pass(&cmd_buffer->trace, cmd_buffer);
|
||||
|
||||
result = genX(cmd_buffer_setup_attachments)(cmd_buffer, pass,
|
||||
framebuffer,
|
||||
@@ -6792,6 +6843,14 @@ void genX(CmdEndRenderPass2)(
|
||||
|
||||
cmd_buffer_end_subpass(cmd_buffer);
|
||||
|
||||
trace_end_render_pass(&cmd_buffer->trace, cmd_buffer,
|
||||
cmd_buffer->state.render_area.extent.width,
|
||||
cmd_buffer->state.render_area.extent.height,
|
||||
cmd_buffer->state.pass->attachment_count,
|
||||
cmd_buffer->state.pass->attachment_count > 0 ?
|
||||
cmd_buffer->state.pass->attachments[0].samples : 0,
|
||||
cmd_buffer->state.pass->subpass_count);
|
||||
|
||||
cmd_buffer->state.hiz_enabled = false;
|
||||
|
||||
/* Remove references to render pass specific state. This enables us to
|
||||
@@ -7030,13 +7089,21 @@ VkResult genX(CmdSetPerformanceStreamMarkerINTEL)(
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
#define TIMESTAMP 0x2358
|
||||
|
||||
void genX(cmd_emit_timestamp)(struct anv_batch *batch,
|
||||
struct anv_bo *bo,
|
||||
uint32_t offset) {
|
||||
anv_batch_emit(batch, GENX(PIPE_CONTROL), pc) {
|
||||
pc.CommandStreamerStallEnable = true;
|
||||
pc.PostSyncOperation = WriteTimestamp;
|
||||
pc.Address = (struct anv_address) {bo, offset};
|
||||
anv_debug_dump_pc(pc);
|
||||
struct anv_device *device,
|
||||
struct anv_address addr,
|
||||
bool end_of_pipe) {
|
||||
if (end_of_pipe) {
|
||||
anv_batch_emit(batch, GENX(PIPE_CONTROL), pc) {
|
||||
pc.PostSyncOperation = WriteTimestamp;
|
||||
pc.Address = addr;
|
||||
anv_debug_dump_pc(pc);
|
||||
}
|
||||
} else {
|
||||
struct mi_builder b;
|
||||
mi_builder_init(&b, &device->info, batch);
|
||||
mi_store(&b, mi_mem64(addr), mi_reg64(TIMESTAMP));
|
||||
}
|
||||
}
|
||||
|
@@ -33,6 +33,19 @@ anv_entrypoints = custom_target(
|
||||
depend_files : vk_entrypoints_gen_depend_files,
|
||||
)
|
||||
|
||||
anv_tracepoints = custom_target(
|
||||
'anv_tracepoints.[ch]',
|
||||
input: 'anv_tracepoints.py',
|
||||
output: ['anv_tracepoints.h', 'anv_tracepoints.c'],
|
||||
command: [
|
||||
prog_python, '@INPUT@',
|
||||
'-p', join_paths(meson.source_root(), 'src/util/perf/'),
|
||||
'--utrace-hdr', '@OUTPUT0@',
|
||||
'--utrace-src', '@OUTPUT1@',
|
||||
],
|
||||
depend_files: u_trace_py,
|
||||
)
|
||||
|
||||
intel_icd = custom_target(
|
||||
'intel_icd',
|
||||
input : [vk_icd_gen, vk_api_xml],
|
||||
@@ -65,7 +78,7 @@ foreach g : [['70', ['gfx7_cmd_buffer.c']], ['75', ['gfx7_cmd_buffer.c']],
|
||||
_gfx_ver = g[0]
|
||||
libanv_per_hw_ver_libs += static_library(
|
||||
'anv_per_hw_ver@0@'.format(_gfx_ver),
|
||||
[anv_per_hw_ver_files, g[1], anv_entrypoints[0]],
|
||||
[anv_per_hw_ver_files, g[1], anv_entrypoints[0], anv_tracepoints[0]],
|
||||
include_directories : [
|
||||
inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_compiler, inc_intel,
|
||||
],
|
||||
@@ -111,6 +124,7 @@ libanv_files = files(
|
||||
'anv_private.h',
|
||||
'anv_queue.c',
|
||||
'anv_util.c',
|
||||
'anv_utrace.c',
|
||||
'anv_wsi.c',
|
||||
)
|
||||
|
||||
@@ -154,7 +168,7 @@ libanv_common = static_library(
|
||||
'anv_common',
|
||||
[
|
||||
libanv_files, anv_entrypoints, sha1_h,
|
||||
gen_xml_pack,
|
||||
gen_xml_pack
|
||||
],
|
||||
include_directories : [
|
||||
inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_intel, inc_compiler,
|
||||
@@ -167,7 +181,7 @@ libanv_common = static_library(
|
||||
|
||||
libvulkan_intel = shared_library(
|
||||
'vulkan_intel',
|
||||
[files('anv_gem.c'), anv_entrypoints[0]],
|
||||
[files('anv_gem.c'), anv_entrypoints[0], anv_tracepoints],
|
||||
include_directories : [
|
||||
inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_intel, inc_compiler,
|
||||
],
|
||||
@@ -202,7 +216,7 @@ endif
|
||||
if with_tests
|
||||
libvulkan_intel_test = static_library(
|
||||
'vulkan_intel_test',
|
||||
[files('anv_gem_stubs.c'), anv_entrypoints[0]],
|
||||
[files('anv_gem_stubs.c'), anv_entrypoints[0], anv_tracepoints[0]],
|
||||
include_directories : [
|
||||
inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_intel, inc_compiler,
|
||||
],
|
||||
@@ -227,7 +241,7 @@ if with_tests
|
||||
'anv_@0@'.format(t),
|
||||
executable(
|
||||
t,
|
||||
['tests/@0@.c'.format(t), anv_entrypoints[0]],
|
||||
['tests/@0@.c'.format(t), anv_entrypoints[0], anv_tracepoints[0]],
|
||||
c_args : [ c_sse2_args ],
|
||||
link_with : libvulkan_intel_test,
|
||||
dependencies : [
|
||||
|
Reference in New Issue
Block a user