From cc5843a573bd0412c547b4f2af3cce18263ecfd4 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Thu, 18 Nov 2021 17:45:57 +0200 Subject: [PATCH] anv: implement u_trace support Signed-off-by: Lionel Landwerlin Reviewed-by: Rohan Garg Acked-by: Antonio Caggiano Part-of: --- src/intel/vulkan/anv_batch_chain.c | 115 +++++++++++- src/intel/vulkan/anv_cmd_buffer.c | 8 + src/intel/vulkan/anv_device.c | 4 + src/intel/vulkan/anv_genX.h | 5 +- src/intel/vulkan/anv_measure.c | 12 +- src/intel/vulkan/anv_private.h | 114 +++++++----- src/intel/vulkan/anv_tracepoints.py | 163 ++++++++++++++++ src/intel/vulkan/anv_utrace.c | 279 ++++++++++++++++++++++++++++ src/intel/vulkan/anv_wsi.c | 2 + src/intel/vulkan/genX_blorp_exec.c | 11 ++ src/intel/vulkan/genX_cmd_buffer.c | 81 +++++++- src/intel/vulkan/meson.build | 24 ++- 12 files changed, 759 insertions(+), 59 deletions(-) create mode 100644 src/intel/vulkan/anv_tracepoints.py create mode 100644 src/intel/vulkan/anv_utrace.c diff --git a/src/intel/vulkan/anv_batch_chain.c b/src/intel/vulkan/anv_batch_chain.c index c7808c8419b..85062f1ea87 100644 --- a/src/intel/vulkan/anv_batch_chain.c +++ b/src/intel/vulkan/anv_batch_chain.c @@ -37,6 +37,7 @@ #include "perf/intel_perf.h" #include "util/debug.h" +#include "util/perf/u_trace.h" /** \file anv_batch_chain.c * @@ -1956,6 +1957,94 @@ setup_empty_execbuf(struct anv_execbuf *execbuf, struct anv_queue *queue) return VK_SUCCESS; } +static VkResult +setup_utrace_execbuf(struct anv_execbuf *execbuf, struct anv_queue *queue, + struct anv_utrace_flush_copy *flush) +{ + struct anv_device *device = queue->device; + VkResult result = anv_execbuf_add_bo(device, execbuf, + flush->batch_bo, + &flush->relocs, 0); + if (result != VK_SUCCESS) + return result; + + result = anv_execbuf_add_sync(device, execbuf, flush->sync, + true /* is_signal */, 0 /* value */); + if (result != VK_SUCCESS) + return result; + + if (flush->batch_bo->exec_obj_index != execbuf->bo_count - 1) { + uint32_t idx = flush->batch_bo->exec_obj_index; + uint32_t last_idx = execbuf->bo_count - 1; + + struct drm_i915_gem_exec_object2 tmp_obj = execbuf->objects[idx]; + assert(execbuf->bos[idx] == flush->batch_bo); + + execbuf->objects[idx] = execbuf->objects[last_idx]; + execbuf->bos[idx] = execbuf->bos[last_idx]; + execbuf->bos[idx]->exec_obj_index = idx; + + execbuf->objects[last_idx] = tmp_obj; + execbuf->bos[last_idx] = flush->batch_bo; + flush->batch_bo->exec_obj_index = last_idx; + } + + if (!device->info.has_llc) { + __builtin_ia32_mfence(); + for (uint32_t i = 0; i < flush->batch_bo->size; i += CACHELINE_SIZE) + __builtin_ia32_clflush(flush->batch_bo->map); + } + + execbuf->execbuf = (struct drm_i915_gem_execbuffer2) { + .buffers_ptr = (uintptr_t) execbuf->objects, + .buffer_count = execbuf->bo_count, + .batch_start_offset = 0, + .batch_len = flush->batch.next - flush->batch.start, + .flags = I915_EXEC_HANDLE_LUT | I915_EXEC_FENCE_ARRAY | queue->exec_flags | + (execbuf->has_relocs ? 0 : I915_EXEC_NO_RELOC), + .rsvd1 = device->context_id, + .rsvd2 = 0, + .num_cliprects = execbuf->syncobj_count, + .cliprects_ptr = (uintptr_t)execbuf->syncobjs, + }; + + return VK_SUCCESS; +} + +static VkResult +anv_queue_exec_utrace_locked(struct anv_queue *queue, + struct anv_utrace_flush_copy *flush) +{ + assert(flush->batch_bo); + + struct anv_device *device = queue->device; + struct anv_execbuf execbuf; + anv_execbuf_init(&execbuf); + execbuf.alloc = &device->vk.alloc; + execbuf.alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE; + + VkResult result = setup_utrace_execbuf(&execbuf, queue, flush); + if (result != VK_SUCCESS) + goto error; + + int ret = queue->device->info.no_hw ? 0 : + anv_gem_execbuffer(queue->device, &execbuf.execbuf); + if (ret) + result = vk_queue_set_lost(&queue->vk, "execbuf2 failed: %m"); + + struct drm_i915_gem_exec_object2 *objects = execbuf.objects; + for (uint32_t k = 0; k < execbuf.bo_count; k++) { + if (anv_bo_is_pinned(execbuf.bos[k])) + assert(execbuf.bos[k]->offset == objects[k].offset); + execbuf.bos[k]->offset = objects[k].offset; + } + + error: + anv_execbuf_finish(&execbuf); + + return result; +} + /* We lock around execbuf for three main reasons: * * 1) When a block pool is resized, we create a new gem handle with a @@ -1992,16 +2081,37 @@ anv_queue_exec_locked(struct anv_queue *queue, uint32_t perf_query_pass) { struct anv_device *device = queue->device; + struct anv_utrace_flush_copy *utrace_flush_data = NULL; struct anv_execbuf execbuf; anv_execbuf_init(&execbuf); execbuf.alloc = &queue->device->vk.alloc; execbuf.alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE; execbuf.perf_query_pass = perf_query_pass; + /* Flush the trace points first, they need to be moved */ + VkResult result = + anv_device_utrace_flush_cmd_buffers(queue, + cmd_buffer_count, + cmd_buffers, + &utrace_flush_data); + if (result != VK_SUCCESS) + goto error; + + if (utrace_flush_data && !utrace_flush_data->batch_bo) { + result = anv_execbuf_add_sync(device, &execbuf, + utrace_flush_data->sync, + true /* is_signal */, + 0); + if (result != VK_SUCCESS) + goto error; + + utrace_flush_data = NULL; + } + /* Always add the workaround BO as it includes a driver identifier for the * error_state. */ - VkResult result = + result = anv_execbuf_add_bo(device, &execbuf, device->workaround_bo, NULL, 0); if (result != VK_SUCCESS) goto error; @@ -2148,6 +2258,9 @@ anv_queue_exec_locked(struct anv_queue *queue, error: anv_execbuf_finish(&execbuf); + if (result == VK_SUCCESS && utrace_flush_data) + result = anv_queue_exec_utrace_locked(queue, utrace_flush_data); + return result; } diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index 48571622707..d404b3a305b 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -302,6 +302,8 @@ static VkResult anv_create_cmd_buffer( anv_measure_init(cmd_buffer); + u_trace_init(&cmd_buffer->trace, &device->trace_context); + *pCommandBuffer = anv_cmd_buffer_to_handle(cmd_buffer); return VK_SUCCESS; @@ -343,6 +345,8 @@ VkResult anv_AllocateCommandBuffers( static void anv_cmd_buffer_destroy(struct anv_cmd_buffer *cmd_buffer) { + u_trace_fini(&cmd_buffer->trace); + anv_measure_destroy(cmd_buffer); list_del(&cmd_buffer->pool_link); @@ -401,6 +405,10 @@ anv_cmd_buffer_reset(struct anv_cmd_buffer *cmd_buffer) &cmd_buffer->device->general_state_pool, 16384); anv_measure_reset(cmd_buffer); + + u_trace_fini(&cmd_buffer->trace); + u_trace_init(&cmd_buffer->trace, &cmd_buffer->device->trace_context); + return VK_SUCCESS; } diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 5591dfa294c..a11f35f0dd9 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -3297,6 +3297,8 @@ VkResult anv_CreateDevice( anv_device_perf_init(device); + anv_device_utrace_init(device); + *pDevice = anv_device_to_handle(device); return VK_SUCCESS; @@ -3364,6 +3366,8 @@ void anv_DestroyDevice( if (!device) return; + anv_device_utrace_finish(device); + anv_device_finish_blorp(device); anv_device_finish_rt_shaders(device); diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h index 45c3b7cc4c5..88b45a889ee 100644 --- a/src/intel/vulkan/anv_genX.h +++ b/src/intel/vulkan/anv_genX.h @@ -143,8 +143,9 @@ void genX(blorp_exec)(struct blorp_batch *batch, const struct blorp_params *params); void genX(cmd_emit_timestamp)(struct anv_batch *batch, - struct anv_bo *bo, - uint32_t offset); + struct anv_device *device, + struct anv_address addr, + bool end_of_pipe); void genX(rasterization_mode)(VkPolygonMode raster_mode, diff --git a/src/intel/vulkan/anv_measure.c b/src/intel/vulkan/anv_measure.c index 2ac654b7c05..2855b43bb38 100644 --- a/src/intel/vulkan/anv_measure.c +++ b/src/intel/vulkan/anv_measure.c @@ -158,7 +158,11 @@ anv_measure_start_snapshot(struct anv_cmd_buffer *cmd_buffer, unsigned index = measure->base.index++; - (*device->cmd_emit_timestamp)(batch, measure->bo, index * sizeof(uint64_t)); + (*device->cmd_emit_timestamp)(batch, cmd_buffer->device, + (struct anv_address) { + .bo = measure->bo, + .offset = index * sizeof(uint64_t) }, + true /* end_of_pipe */); if (event_name == NULL) event_name = intel_measure_snapshot_string(type); @@ -195,7 +199,11 @@ anv_measure_end_snapshot(struct anv_cmd_buffer *cmd_buffer, unsigned index = measure->base.index++; assert(index % 2 == 1); - (*device->cmd_emit_timestamp)(batch, measure->bo, index * sizeof(uint64_t)); + (*device->cmd_emit_timestamp)(batch, cmd_buffer->device, + (struct anv_address) { + .bo = measure->bo, + .offset = index * sizeof(uint64_t) }, + true /* end_of_pipe */); struct intel_measure_snapshot *snapshot = &(measure->base.snapshots[index]); memset(snapshot, 0, sizeof(*snapshot)); diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 74b9144afa6..5e028f1dfbb 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -57,6 +57,7 @@ #include "util/macros.h" #include "util/hash_table.h" #include "util/list.h" +#include "util/perf/u_trace.h" #include "util/sparse_array.h" #include "util/u_atomic.h" #include "util/u_vector.h" @@ -552,6 +553,46 @@ anv_bo_is_pinned(struct anv_bo *bo) #endif } +struct anv_address { + struct anv_bo *bo; + int64_t offset; +}; + +#define ANV_NULL_ADDRESS ((struct anv_address) { NULL, 0 }) + +static inline struct anv_address +anv_address_from_u64(uint64_t addr_u64) +{ + assert(addr_u64 == intel_canonical_address(addr_u64)); + return (struct anv_address) { + .bo = NULL, + .offset = addr_u64, + }; +} + +static inline bool +anv_address_is_null(struct anv_address addr) +{ + return addr.bo == NULL && addr.offset == 0; +} + +static inline uint64_t +anv_address_physical(struct anv_address addr) +{ + if (addr.bo && anv_bo_is_pinned(addr.bo)) { + return intel_canonical_address(addr.bo->offset + addr.offset); + } else { + return intel_canonical_address(addr.offset); + } +} + +static inline struct anv_address +anv_address_add(struct anv_address addr, uint64_t offset) +{ + addr.offset += offset; + return addr; +} + /* Represents a lock-free linked list of "free" things. This is used by * both the block pool and the state pools. Unfortunately, in order to * solve the ABA problem, we can't use a single uint32_t head. @@ -986,7 +1027,7 @@ struct anv_physical_device { int64_t master_minor; struct drm_i915_query_engine_info * engine_info; - void (*cmd_emit_timestamp)(struct anv_batch *, struct anv_bo *, uint32_t ); + void (*cmd_emit_timestamp)(struct anv_batch *, struct anv_device *, struct anv_address, bool); struct intel_measure_device measure_device; }; @@ -1094,11 +1135,6 @@ anv_device_upload_nir(struct anv_device *device, const struct nir_shader *nir, unsigned char sha1_key[20]); -struct anv_address { - struct anv_bo *bo; - int64_t offset; -}; - struct anv_device { struct vk_device vk; @@ -1179,6 +1215,8 @@ struct anv_device { const struct intel_l3_config *l3_config; struct intel_debug_block_frame *debug_frame_desc; + + struct u_trace_context trace_context; }; #if defined(GFX_VERx10) && GFX_VERx10 >= 90 @@ -1506,42 +1544,6 @@ anv_batch_emit_reloc(struct anv_batch *batch, return address_u64; } - -#define ANV_NULL_ADDRESS ((struct anv_address) { NULL, 0 }) - -static inline struct anv_address -anv_address_from_u64(uint64_t addr_u64) -{ - assert(addr_u64 == intel_canonical_address(addr_u64)); - return (struct anv_address) { - .bo = NULL, - .offset = addr_u64, - }; -} - -static inline bool -anv_address_is_null(struct anv_address addr) -{ - return addr.bo == NULL && addr.offset == 0; -} - -static inline uint64_t -anv_address_physical(struct anv_address addr) -{ - if (addr.bo && anv_bo_is_pinned(addr.bo)) { - return intel_canonical_address(addr.bo->offset + addr.offset); - } else { - return intel_canonical_address(addr.offset); - } -} - -static inline struct anv_address -anv_address_add(struct anv_address addr, uint64_t offset) -{ - addr.offset += offset; - return addr; -} - static inline void write_reloc(const struct anv_device *device, void *p, uint64_t v, bool flush) { @@ -3088,6 +3090,11 @@ struct anv_cmd_buffer { * Used to increase allocation size for long command buffers. */ uint32_t total_batch_size; + + /** + * + */ + struct u_trace trace; }; /* Determine whether we can chain a given cmd_buffer to another one. We need @@ -4541,6 +4548,29 @@ struct anv_memcpy_state { struct anv_vb_cache_range vb_dirty; }; +struct anv_utrace_flush_copy { + struct u_trace trace; + + struct anv_reloc_list relocs; + struct anv_batch batch; + struct anv_bo *batch_bo; + + struct anv_bo *trace_bo; + + struct vk_sync *sync; + + struct anv_memcpy_state memcpy_state; +}; + +void anv_device_utrace_init(struct anv_device *device); +void anv_device_utrace_finish(struct anv_device *device); +VkResult +anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue, + uint32_t cmd_buffer_count, + struct anv_cmd_buffer **cmd_buffers, + struct anv_utrace_flush_copy **out_flush_data); + + #define ANV_FROM_HANDLE(__anv_type, __name, __handle) \ VK_FROM_HANDLE(__anv_type, __name, __handle) diff --git a/src/intel/vulkan/anv_tracepoints.py b/src/intel/vulkan/anv_tracepoints.py new file mode 100644 index 00000000000..ef9a373aed6 --- /dev/null +++ b/src/intel/vulkan/anv_tracepoints.py @@ -0,0 +1,163 @@ +# +# Copyright © 2021 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# + +import argparse +import sys + +# +# Tracepoint definitions: +# +def define_tracepoints(args): + from u_trace import Header, HeaderScope + from u_trace import ForwardDecl + from u_trace import Tracepoint + from u_trace import TracepointArg as Arg + from u_trace import TracepointArgStruct as ArgStruct + + Header('anv_private.h', scope=HeaderScope.SOURCE) + Header('blorp/blorp_priv.h', scope=HeaderScope.HEADER) + + def begin_end_tp(name, tp_args=[], tp_struct=None, end_pipelined=True): + Tracepoint('begin_{0}'.format(name)) + Tracepoint('end_{0}'.format(name), + args=tp_args, + tp_struct=tp_struct, + end_of_pipe=end_pipelined) + + + begin_end_tp('cmd_buffer', + tp_args=[ArgStruct(type='uint8_t', var='level'),], + tp_struct=[Arg(type='uint8_t', name='level', var='level', c_format='%hhu'),], + end_pipelined=False) + + begin_end_tp('render_pass', + tp_args=[ArgStruct(type='uint16_t', var='width'), + ArgStruct(type='uint16_t', var='height'), + ArgStruct(type='uint8_t', var='att_count'), + ArgStruct(type='uint8_t', var='msaa'), + ArgStruct(type='uint32_t', var='subpass_count'),], + tp_struct=[Arg(type='uint16_t', name='width', var='width', c_format='%hu'), + Arg(type='uint16_t', name='height', var='height', c_format='%hu'), + Arg(type='uint8_t', name='att_count', var='att_count', c_format='%hhu'), + Arg(type='uint8_t', name='msaa', var='msaa', c_format='%hhu'), + Arg(type='uint32_t', name='subpass_count', var='subpass_count', c_format='%ou'),]) + + begin_end_tp('blorp', + tp_args=[ArgStruct(type='uint32_t', var='width'), + ArgStruct(type='uint32_t', var='height'), + ArgStruct(type='enum isl_aux_op', var='hiz_op'), + ArgStruct(type='enum isl_aux_op', var='fast_clear_op'), + ArgStruct(type='enum blorp_shader_type', var='shader_type'), + ArgStruct(type='enum blorp_shader_pipeline', var='shader_pipe'),], + tp_struct=[Arg(type='uint32_t', name='width', var='width', c_format='%u'), + Arg(type='uint32_t', name='height', var='height', c_format='%u'), + Arg(type='enum isl_aux_op', name='hiz_op', var='hiz_op', c_format='%s', to_prim_type='isl_aux_op_to_name({})'), + Arg(type='enum isl_aux_op', name='fast_clear_op', var='fast_clear_op', c_format='%s', to_prim_type='isl_aux_op_to_name({})'), + Arg(type='enum blorp_shader_type', name='type', var='shader_type', c_format='%s', to_prim_type='blorp_shader_type_to_name({})'), + Arg(type='enum blorp_shader_pipeline', name='pipe', var='shader_pipe', c_format='%s', to_prim_type='blorp_shader_pipeline_to_name({})'),]) + + begin_end_tp('draw', + tp_args=[ArgStruct(type='uint32_t', var='count'),], + tp_struct=[Arg(type='uint32_t', name='count', var='count', c_format='%u'),]) + begin_end_tp('draw_multi', + tp_args=[ArgStruct(type='uint32_t', var='count'),], + tp_struct=[Arg(type='uint32_t', name='count', var='count', c_format='%u'),]) + begin_end_tp('draw_indexed', + tp_args=[ArgStruct(type='uint32_t', var='count'),], + tp_struct=[Arg(type='uint32_t', name='count', var='count', c_format='%u'),]) + begin_end_tp('draw_indexed_multi', + tp_args=[ArgStruct(type='uint32_t', var='count'),], + tp_struct=[Arg(type='uint32_t', name='count', var='count', c_format='%u'),]) + begin_end_tp('draw_indirect_byte_count', + tp_args=[ArgStruct(type='uint32_t', var='instance_count'),], + tp_struct=[Arg(type='uint32_t', name='instance_count', var='instance_count', c_format='%u'),]) + begin_end_tp('draw_indirect', + tp_args=[ArgStruct(type='uint32_t', var='draw_count'),], + tp_struct=[Arg(type='uint32_t', name='draw_count', var='draw_count', c_format='%u'),]) + begin_end_tp('draw_indexed_indirect', + tp_args=[ArgStruct(type='uint32_t', var='draw_count'),], + tp_struct=[Arg(type='uint32_t', name='draw_count', var='draw_count', c_format='%u'),]) + begin_end_tp('draw_indirect_count', + tp_args=[ArgStruct(type='uint32_t', var='max_draw_count'),], + tp_struct=[Arg(type='uint32_t', name='max_draw_count', var='max_draw_count', c_format='%u'),]) + begin_end_tp('draw_indexed_indirect_count', + tp_args=[ArgStruct(type='uint32_t', var='max_draw_count'),], + tp_struct=[Arg(type='uint32_t', name='max_draw_count', var='max_draw_count', c_format='%u'),]) + + begin_end_tp('compute', + tp_args=[ArgStruct(type='uint32_t', var='group_x'), + ArgStruct(type='uint32_t', var='group_y'), + ArgStruct(type='uint32_t', var='group_z'),], + tp_struct=[Arg(type='uint32_t', name='group_x', var='group_x', c_format='%u'), + Arg(type='uint32_t', name='group_y', var='group_y', c_format='%u'), + Arg(type='uint32_t', name='group_z', var='group_z', c_format='%u'),]) + + def stall_args(args): + fmt = '' + exprs = [] + for a in args: + fmt += '%s' + exprs.append('(__entry->flags & ANV_PIPE_{0}_BIT) ? "+{1}" : ""'.format(a[0], a[1])) + fmt = [fmt] + fmt += exprs + return fmt + + Tracepoint('stall', + args=[ArgStruct(type='uint32_t', var='flags'),], + tp_struct=[Arg(type='uint32_t', name='flags', var='flags', c_format='0x%x'),], + tp_print=stall_args([['DEPTH_CACHE_FLUSH', 'depth_flush'], + ['DATA_CACHE_FLUSH', 'dc_flush'], + ['HDC_PIPELINE_FLUSH', 'hdc_flush'], + ['RENDER_TARGET_CACHE_FLUSH', 'rt_flush'], + ['TILE_CACHE_FLUSH', 'tile_flush'], + ['STATE_CACHE_INVALIDATE', 'state_inval'], + ['CONSTANT_CACHE_INVALIDATE', 'const_inval'], + ['VF_CACHE_INVALIDATE', 'vf_inval'], + ['TEXTURE_CACHE_INVALIDATE', 'tex_inval'], + ['INSTRUCTION_CACHE_INVALIDATE', 'ic_inval'], + ['STALL_AT_SCOREBOARD', 'pb_stall'], + ['DEPTH_STALL', 'depth_stall'], + ['CS_STALL', 'cs_stall'], + ])) + + + +def generate_code(args): + from u_trace import utrace_generate + + utrace_generate(cpath=args.utrace_src, hpath=args.utrace_hdr, ctx_param='struct anv_device *dev') + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('-p', '--import-path', required=True) + parser.add_argument('--utrace-src', required=True) + parser.add_argument('--utrace-hdr', required=True) + args = parser.parse_args() + sys.path.insert(0, args.import_path) + define_tracepoints(args) + generate_code(args) + + +if __name__ == '__main__': + main() diff --git a/src/intel/vulkan/anv_utrace.c b/src/intel/vulkan/anv_utrace.c new file mode 100644 index 00000000000..349fa554ffb --- /dev/null +++ b/src/intel/vulkan/anv_utrace.c @@ -0,0 +1,279 @@ +/* + * Copyright © 2021 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_private.h" + +#include "perf/intel_perf.h" + +static uint32_t +command_buffers_count_utraces(struct anv_device *device, + uint32_t cmd_buffer_count, + struct anv_cmd_buffer **cmd_buffers, + uint32_t *utrace_copies) +{ + if (!u_trace_context_actively_tracing(&device->trace_context)) + return 0; + + uint32_t utraces = 0; + for (uint32_t i = 0; i < cmd_buffer_count; i++) { + if (u_trace_has_points(&cmd_buffers[i]->trace)) { + utraces++; + if (!(cmd_buffers[i]->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT)) + *utrace_copies += list_length(&cmd_buffers[i]->trace.trace_chunks); + } + } + + return utraces; +} + +static void +anv_utrace_delete_flush_data(struct u_trace_context *utctx, + void *flush_data) +{ + struct anv_device *device = + container_of(utctx, struct anv_device, trace_context); + struct anv_utrace_flush_copy *flush = flush_data; + + u_trace_fini(&flush->trace); + + if (flush->trace_bo) { + assert(flush->batch_bo); + anv_reloc_list_finish(&flush->relocs, &device->vk.alloc); + anv_device_release_bo(device, flush->batch_bo); + anv_device_release_bo(device, flush->trace_bo); + } + + vk_sync_destroy(&device->vk, flush->sync); + + vk_free(&device->vk.alloc, flush); +} + +static void +anv_device_utrace_emit_copy_ts_buffer(struct u_trace_context *utctx, + void *cmdstream, + void *ts_from, uint32_t from_offset, + void *ts_to, uint32_t to_offset, + uint32_t count) +{ + struct anv_device *device = + container_of(utctx, struct anv_device, trace_context); + struct anv_utrace_flush_copy *flush = cmdstream; + struct anv_address from_addr = (struct anv_address) { + .bo = ts_from, .offset = from_offset * sizeof(uint64_t) }; + struct anv_address to_addr = (struct anv_address) { + .bo = ts_to, .offset = to_offset * sizeof(uint64_t) }; + + anv_genX(&device->info, emit_so_memcpy)(&flush->memcpy_state, + to_addr, from_addr, count * sizeof(uint64_t)); +} + +VkResult +anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue, + uint32_t cmd_buffer_count, + struct anv_cmd_buffer **cmd_buffers, + struct anv_utrace_flush_copy **out_flush_data) +{ + struct anv_device *device = queue->device; + uint32_t utrace_copies = 0; + uint32_t utraces = command_buffers_count_utraces(device, + cmd_buffer_count, + cmd_buffers, + &utrace_copies); + if (!utraces) { + *out_flush_data = NULL; + return VK_SUCCESS; + } + + VkResult result; + struct anv_utrace_flush_copy *flush = + vk_zalloc(&device->vk.alloc, sizeof(struct anv_utrace_flush_copy), + 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (!flush) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + u_trace_init(&flush->trace, &device->trace_context); + + result = vk_sync_create(&device->vk, &device->physical->sync_syncobj_type, + 0, 0, &flush->sync); + if (result != VK_SUCCESS) + goto error_sync; + + if (utrace_copies > 0) { + result = + anv_device_alloc_bo(device, "utrace-copy-buf", utrace_copies * 4096, + ANV_BO_ALLOC_MAPPED, 0 /* explicit_address */, + &flush->trace_bo); + if (result != VK_SUCCESS) + goto error_trace_buf; + + result = + anv_device_alloc_bo(device, "utrace-copy-batch", + /* 128 dwords of setup + 64 dwords per copy */ + align_u32(512 + 64 * utrace_copies, 4096), + ANV_BO_ALLOC_MAPPED, 0 /* explicit_address */, + &flush->batch_bo); + if (result != VK_SUCCESS) + goto error_batch_buf; + + result = anv_reloc_list_init(&flush->relocs, &device->vk.alloc); + if (result != VK_SUCCESS) + goto error_reloc_list; + + flush->batch.alloc = &device->vk.alloc; + flush->batch.relocs = &flush->relocs; + anv_batch_set_storage(&flush->batch, + (struct anv_address) { .bo = flush->batch_bo, }, + flush->batch_bo->map, flush->batch_bo->size); + + /* Emit the copies */ + anv_genX(&device->info, emit_so_memcpy_init)(&flush->memcpy_state, + device, + &flush->batch); + for (uint32_t i = 0; i < cmd_buffer_count; i++) { + if (cmd_buffers[i]->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT) { + u_trace_flush(&cmd_buffers[i]->trace, flush, false); + } else { + u_trace_clone_append(u_trace_begin_iterator(&cmd_buffers[i]->trace), + u_trace_end_iterator(&cmd_buffers[i]->trace), + &flush->trace, + flush, + anv_device_utrace_emit_copy_ts_buffer); + } + } + anv_genX(&device->info, emit_so_memcpy_fini)(&flush->memcpy_state); + + u_trace_flush(&flush->trace, flush, true); + + if (flush->batch.status != VK_SUCCESS) { + result = flush->batch.status; + goto error_batch; + } + } else { + for (uint32_t i = 0; i < cmd_buffer_count; i++) { + assert(cmd_buffers[i]->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT); + u_trace_flush(&cmd_buffers[i]->trace, flush, i == (cmd_buffer_count - 1)); + } + } + + *out_flush_data = flush; + + return VK_SUCCESS; + + error_batch: + anv_reloc_list_finish(&flush->relocs, &device->vk.alloc); + error_reloc_list: + anv_device_release_bo(device, flush->batch_bo); + error_batch_buf: + anv_device_release_bo(device, flush->trace_bo); + error_trace_buf: + vk_sync_destroy(&device->vk, flush->sync); + error_sync: + vk_free(&device->vk.alloc, flush); + return result; +} + +static void * +anv_utrace_create_ts_buffer(struct u_trace_context *utctx, uint32_t size_b) +{ + struct anv_device *device = + container_of(utctx, struct anv_device, trace_context); + + struct anv_bo *bo = NULL; + UNUSED VkResult result = + anv_device_alloc_bo(device, "utrace-ts", align_u32(size_b, 4096), + ANV_BO_ALLOC_MAPPED, 0, &bo); + assert(result == VK_SUCCESS); + + return bo; +} + +static void +anv_utrace_destroy_ts_buffer(struct u_trace_context *utctx, void *timestamps) +{ + struct anv_device *device = + container_of(utctx, struct anv_device, trace_context); + struct anv_bo *bo = timestamps; + + anv_device_release_bo(device, bo); +} + +static void +anv_utrace_record_ts(struct u_trace *ut, void *cs, void *timestamps, unsigned idx, + bool end_of_pipe) +{ + struct anv_cmd_buffer *cmd_buffer = cs; + struct anv_device *device = cmd_buffer->device; + struct anv_bo *bo = timestamps; + + device->physical->cmd_emit_timestamp(&cmd_buffer->batch, device, + (struct anv_address) { + .bo = bo, + .offset = idx * sizeof(uint64_t) }, + end_of_pipe); +} + +static uint64_t +anv_utrace_read_ts(struct u_trace_context *utctx, + void *timestamps, unsigned idx, void *flush_data) +{ + struct anv_device *device = + container_of(utctx, struct anv_device, trace_context); + struct anv_bo *bo = timestamps; + struct anv_utrace_flush_copy *flush = flush_data; + + /* Only need to stall on results for the first entry: */ + if (idx == 0) { + UNUSED VkResult result = + vk_sync_wait(&device->vk, + flush->sync, + 0, + VK_SYNC_WAIT_COMPLETE, + os_time_get_absolute_timeout(OS_TIMEOUT_INFINITE)); + assert(result == VK_SUCCESS); + } + + uint64_t *ts = bo->map; + + /* Don't translate the no-timestamp marker: */ + if (ts[idx] == U_TRACE_NO_TIMESTAMP) + return U_TRACE_NO_TIMESTAMP; + + return intel_device_info_timebase_scale(&device->info, ts[idx]); +} + +void +anv_device_utrace_init(struct anv_device *device) +{ + u_trace_context_init(&device->trace_context, device, + anv_utrace_create_ts_buffer, + anv_utrace_destroy_ts_buffer, + anv_utrace_record_ts, + anv_utrace_read_ts, + anv_utrace_delete_flush_data); +} + +void +anv_device_utrace_finish(struct anv_device *device) +{ + u_trace_context_fini(&device->trace_context); +} diff --git a/src/intel/vulkan/anv_wsi.c b/src/intel/vulkan/anv_wsi.c index df973b65237..037965a4ec8 100644 --- a/src/intel/vulkan/anv_wsi.c +++ b/src/intel/vulkan/anv_wsi.c @@ -107,5 +107,7 @@ VkResult anv_QueuePresentKHR( vk_semaphore_reset_temporary(&queue->device->vk, semaphore); } + u_trace_context_process(&queue->device->trace_context, true); + return result; } diff --git a/src/intel/vulkan/genX_blorp_exec.c b/src/intel/vulkan/genX_blorp_exec.c index 85ea5f65170..bbb90b4ace4 100644 --- a/src/intel/vulkan/genX_blorp_exec.c +++ b/src/intel/vulkan/genX_blorp_exec.c @@ -34,10 +34,13 @@ #include "common/intel_l3_config.h" #include "blorp/blorp_genX_exec.h" +#include "anv_tracepoints.h" + static void blorp_measure_start(struct blorp_batch *_batch, const struct blorp_params *params) { struct anv_cmd_buffer *cmd_buffer = _batch->driver_batch; + trace_begin_blorp(&cmd_buffer->trace, cmd_buffer); anv_measure_snapshot(cmd_buffer, params->snapshot_type, NULL, 0); @@ -46,6 +49,14 @@ static void blorp_measure_start(struct blorp_batch *_batch, static void blorp_measure_end(struct blorp_batch *_batch, const struct blorp_params *params) { + struct anv_cmd_buffer *cmd_buffer = _batch->driver_batch; + trace_end_blorp(&cmd_buffer->trace, cmd_buffer, + params->x1 - params->x0, + params->y1 - params->y0, + params->hiz_op, + params->fast_clear_op, + params->shader_type, + params->shader_pipeline); } static void * diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 57c398f6b4b..1aa79f3805a 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -38,6 +38,8 @@ #include "nir/nir_xfb_info.h" +#include "anv_tracepoints.h" + /* We reserve : * - GPR 14 for secondary command buffer returns * - GPR 15 for conditional rendering @@ -1761,6 +1763,8 @@ genX(BeginCommandBuffer)( if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) cmd_buffer->usage_flags &= ~VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT; + trace_begin_cmd_buffer(&cmd_buffer->trace, cmd_buffer); + genX(cmd_buffer_emit_state_base_address)(cmd_buffer); /* We sometimes store vertex data in the dynamic state buffer for blorp @@ -1934,6 +1938,8 @@ genX(EndCommandBuffer)( emit_isp_disable(cmd_buffer); + trace_end_cmd_buffer(&cmd_buffer->trace, cmd_buffer, cmd_buffer->level); + anv_cmd_buffer_end_batch_buffer(cmd_buffer); return VK_SUCCESS; @@ -2399,6 +2405,9 @@ genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer) else if (bits == 0) return; + if (bits & (ANV_PIPE_FLUSH_BITS | ANV_PIPE_STALL_BITS | ANV_PIPE_INVALIDATE_BITS)) + trace_stall(&cmd_buffer->trace, cmd_buffer, bits); + if ((GFX_VER >= 8 && GFX_VER <= 9) && (bits & ANV_PIPE_CS_STALL_BIT) && (bits & ANV_PIPE_VF_CACHE_INVALIDATE_BIT)) { @@ -3954,6 +3963,7 @@ void genX(CmdDraw)( anv_measure_snapshot(cmd_buffer, INTEL_SNAPSHOT_DRAW, "draw", count); + trace_begin_draw(&cmd_buffer->trace, cmd_buffer); genX(cmd_buffer_flush_state)(cmd_buffer); @@ -3982,6 +3992,8 @@ void genX(CmdDraw)( } update_dirty_vbs_for_gfx8_vb_flush(cmd_buffer, SEQUENTIAL); + + trace_end_draw(&cmd_buffer->trace, cmd_buffer, count); } void genX(CmdDrawMultiEXT)( @@ -4006,6 +4018,7 @@ void genX(CmdDrawMultiEXT)( anv_measure_snapshot(cmd_buffer, INTEL_SNAPSHOT_DRAW, "draw_multi", count); + trace_begin_draw_multi(&cmd_buffer->trace, cmd_buffer); genX(cmd_buffer_flush_state)(cmd_buffer); @@ -4037,6 +4050,8 @@ void genX(CmdDrawMultiEXT)( } update_dirty_vbs_for_gfx8_vb_flush(cmd_buffer, SEQUENTIAL); + + trace_end_draw_multi(&cmd_buffer->trace, cmd_buffer, count); } void genX(CmdDrawIndexed)( @@ -4062,6 +4077,7 @@ void genX(CmdDrawIndexed)( INTEL_SNAPSHOT_DRAW, "draw indexed", count); + trace_begin_draw_indexed(&cmd_buffer->trace, cmd_buffer); genX(cmd_buffer_flush_state)(cmd_buffer); @@ -4088,6 +4104,8 @@ void genX(CmdDrawIndexed)( } update_dirty_vbs_for_gfx8_vb_flush(cmd_buffer, RANDOM); + + trace_end_draw_indexed(&cmd_buffer->trace, cmd_buffer, count); } void genX(CmdDrawMultiIndexedEXT)( @@ -4114,6 +4132,7 @@ void genX(CmdDrawMultiIndexedEXT)( INTEL_SNAPSHOT_DRAW, "draw indexed_multi", count); + trace_begin_draw_indexed_multi(&cmd_buffer->trace, cmd_buffer); genX(cmd_buffer_flush_state)(cmd_buffer); @@ -4200,6 +4219,8 @@ void genX(CmdDrawMultiIndexedEXT)( } update_dirty_vbs_for_gfx8_vb_flush(cmd_buffer, RANDOM); + + trace_end_draw_indexed_multi(&cmd_buffer->trace, cmd_buffer, count); } /* Auto-Draw / Indirect Registers */ @@ -4235,6 +4256,7 @@ void genX(CmdDrawIndirectByteCountEXT)( INTEL_SNAPSHOT_DRAW, "draw indirect byte count", instanceCount); + trace_begin_draw_indirect_byte_count(&cmd_buffer->trace, cmd_buffer); genX(cmd_buffer_flush_state)(cmd_buffer); @@ -4277,6 +4299,9 @@ void genX(CmdDrawIndirectByteCountEXT)( } update_dirty_vbs_for_gfx8_vb_flush(cmd_buffer, SEQUENTIAL); + + trace_end_draw_indirect_byte_count(&cmd_buffer->trace, cmd_buffer, + instanceCount); #endif /* GFX_VERx10 >= 75 */ } @@ -4333,6 +4358,8 @@ void genX(CmdDrawIndirect)( if (anv_batch_has_error(&cmd_buffer->batch)) return; + trace_begin_draw_indirect(&cmd_buffer->trace, cmd_buffer); + genX(cmd_buffer_flush_state)(cmd_buffer); if (cmd_buffer->state.conditional_render_enabled) @@ -4365,6 +4392,8 @@ void genX(CmdDrawIndirect)( offset += stride; } + + trace_end_draw_indirect(&cmd_buffer->trace, cmd_buffer, drawCount); } void genX(CmdDrawIndexedIndirect)( @@ -4382,6 +4411,8 @@ void genX(CmdDrawIndexedIndirect)( if (anv_batch_has_error(&cmd_buffer->batch)) return; + trace_begin_draw_indexed_indirect(&cmd_buffer->trace, cmd_buffer); + genX(cmd_buffer_flush_state)(cmd_buffer); if (cmd_buffer->state.conditional_render_enabled) @@ -4415,6 +4446,8 @@ void genX(CmdDrawIndexedIndirect)( offset += stride; } + + trace_end_draw_indexed_indirect(&cmd_buffer->trace, cmd_buffer, drawCount); } static struct mi_value @@ -4541,6 +4574,8 @@ void genX(CmdDrawIndirectCount)( if (anv_batch_has_error(&cmd_buffer->batch)) return; + trace_begin_draw_indirect_count(&cmd_buffer->trace, cmd_buffer); + genX(cmd_buffer_flush_state)(cmd_buffer); struct mi_builder b; @@ -4580,6 +4615,8 @@ void genX(CmdDrawIndirectCount)( } mi_value_unref(&b, max); + + trace_end_draw_indirect_count(&cmd_buffer->trace, cmd_buffer, maxDrawCount); } void genX(CmdDrawIndexedIndirectCount)( @@ -4601,6 +4638,8 @@ void genX(CmdDrawIndexedIndirectCount)( if (anv_batch_has_error(&cmd_buffer->batch)) return; + trace_begin_draw_indexed_indirect_count(&cmd_buffer->trace, cmd_buffer); + genX(cmd_buffer_flush_state)(cmd_buffer); struct mi_builder b; @@ -4641,6 +4680,9 @@ void genX(CmdDrawIndexedIndirectCount)( } mi_value_unref(&b, max); + + trace_end_draw_indexed_indirect_count(&cmd_buffer->trace, cmd_buffer, maxDrawCount); + } void genX(CmdBeginTransformFeedbackEXT)( @@ -5016,6 +5058,8 @@ void genX(CmdDispatchBase)( prog_data->local_size[0] * prog_data->local_size[1] * prog_data->local_size[2]); + trace_begin_compute(&cmd_buffer->trace, cmd_buffer); + if (prog_data->uses_num_work_groups) { struct anv_state state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 12, 4); @@ -5039,6 +5083,9 @@ void genX(CmdDispatchBase)( emit_cs_walker(cmd_buffer, pipeline, false, prog_data, groupCountX, groupCountY, groupCountZ); + + trace_end_compute(&cmd_buffer->trace, cmd_buffer, + groupCountX, groupCountY, groupCountZ); } #define GPGPU_DISPATCHDIMX 0x2500 @@ -5072,6 +5119,7 @@ void genX(CmdDispatchIndirect)( INTEL_SNAPSHOT_COMPUTE, "compute indirect", 0); + trace_begin_compute(&cmd_buffer->trace, cmd_buffer); if (prog_data->uses_num_work_groups) { cmd_buffer->state.compute.num_workgroups = addr; @@ -5145,6 +5193,8 @@ void genX(CmdDispatchIndirect)( #endif emit_cs_walker(cmd_buffer, pipeline, true, prog_data, 0, 0, 0); + + trace_end_compute(&cmd_buffer->trace, cmd_buffer, 0, 0, 0); } #if GFX_VERx10 >= 125 @@ -6750,6 +6800,7 @@ void genX(CmdBeginRenderPass2)( cmd_buffer->state.render_area = pRenderPassBeginInfo->renderArea; anv_measure_beginrenderpass(cmd_buffer); + trace_begin_render_pass(&cmd_buffer->trace, cmd_buffer); result = genX(cmd_buffer_setup_attachments)(cmd_buffer, pass, framebuffer, @@ -6792,6 +6843,14 @@ void genX(CmdEndRenderPass2)( cmd_buffer_end_subpass(cmd_buffer); + trace_end_render_pass(&cmd_buffer->trace, cmd_buffer, + cmd_buffer->state.render_area.extent.width, + cmd_buffer->state.render_area.extent.height, + cmd_buffer->state.pass->attachment_count, + cmd_buffer->state.pass->attachment_count > 0 ? + cmd_buffer->state.pass->attachments[0].samples : 0, + cmd_buffer->state.pass->subpass_count); + cmd_buffer->state.hiz_enabled = false; /* Remove references to render pass specific state. This enables us to @@ -7030,13 +7089,21 @@ VkResult genX(CmdSetPerformanceStreamMarkerINTEL)( return VK_SUCCESS; } +#define TIMESTAMP 0x2358 + void genX(cmd_emit_timestamp)(struct anv_batch *batch, - struct anv_bo *bo, - uint32_t offset) { - anv_batch_emit(batch, GENX(PIPE_CONTROL), pc) { - pc.CommandStreamerStallEnable = true; - pc.PostSyncOperation = WriteTimestamp; - pc.Address = (struct anv_address) {bo, offset}; - anv_debug_dump_pc(pc); + struct anv_device *device, + struct anv_address addr, + bool end_of_pipe) { + if (end_of_pipe) { + anv_batch_emit(batch, GENX(PIPE_CONTROL), pc) { + pc.PostSyncOperation = WriteTimestamp; + pc.Address = addr; + anv_debug_dump_pc(pc); + } + } else { + struct mi_builder b; + mi_builder_init(&b, &device->info, batch); + mi_store(&b, mi_mem64(addr), mi_reg64(TIMESTAMP)); } } diff --git a/src/intel/vulkan/meson.build b/src/intel/vulkan/meson.build index fa693551030..be0308cacbe 100644 --- a/src/intel/vulkan/meson.build +++ b/src/intel/vulkan/meson.build @@ -33,6 +33,19 @@ anv_entrypoints = custom_target( depend_files : vk_entrypoints_gen_depend_files, ) +anv_tracepoints = custom_target( + 'anv_tracepoints.[ch]', + input: 'anv_tracepoints.py', + output: ['anv_tracepoints.h', 'anv_tracepoints.c'], + command: [ + prog_python, '@INPUT@', + '-p', join_paths(meson.source_root(), 'src/util/perf/'), + '--utrace-hdr', '@OUTPUT0@', + '--utrace-src', '@OUTPUT1@', + ], + depend_files: u_trace_py, +) + intel_icd = custom_target( 'intel_icd', input : [vk_icd_gen, vk_api_xml], @@ -65,7 +78,7 @@ foreach g : [['70', ['gfx7_cmd_buffer.c']], ['75', ['gfx7_cmd_buffer.c']], _gfx_ver = g[0] libanv_per_hw_ver_libs += static_library( 'anv_per_hw_ver@0@'.format(_gfx_ver), - [anv_per_hw_ver_files, g[1], anv_entrypoints[0]], + [anv_per_hw_ver_files, g[1], anv_entrypoints[0], anv_tracepoints[0]], include_directories : [ inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_compiler, inc_intel, ], @@ -111,6 +124,7 @@ libanv_files = files( 'anv_private.h', 'anv_queue.c', 'anv_util.c', + 'anv_utrace.c', 'anv_wsi.c', ) @@ -154,7 +168,7 @@ libanv_common = static_library( 'anv_common', [ libanv_files, anv_entrypoints, sha1_h, - gen_xml_pack, + gen_xml_pack ], include_directories : [ inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_intel, inc_compiler, @@ -167,7 +181,7 @@ libanv_common = static_library( libvulkan_intel = shared_library( 'vulkan_intel', - [files('anv_gem.c'), anv_entrypoints[0]], + [files('anv_gem.c'), anv_entrypoints[0], anv_tracepoints], include_directories : [ inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_intel, inc_compiler, ], @@ -202,7 +216,7 @@ endif if with_tests libvulkan_intel_test = static_library( 'vulkan_intel_test', - [files('anv_gem_stubs.c'), anv_entrypoints[0]], + [files('anv_gem_stubs.c'), anv_entrypoints[0], anv_tracepoints[0]], include_directories : [ inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_intel, inc_compiler, ], @@ -227,7 +241,7 @@ if with_tests 'anv_@0@'.format(t), executable( t, - ['tests/@0@.c'.format(t), anv_entrypoints[0]], + ['tests/@0@.c'.format(t), anv_entrypoints[0], anv_tracepoints[0]], c_args : [ c_sse2_args ], link_with : libvulkan_intel_test, dependencies : [