diff --git a/src/amd/common/ac_rgp.h b/src/amd/common/ac_rgp.h index 205e06e9a4f..ce31b9417db 100644 --- a/src/amd/common/ac_rgp.h +++ b/src/amd/common/ac_rgp.h @@ -42,6 +42,8 @@ struct rgp_shader_data { uint32_t elf_symbol_offset; uint32_t hw_stage; uint32_t is_combined; + char rt_shader_name[32]; + uint32_t rt_stack_size; }; struct rgp_code_object_record { @@ -49,6 +51,8 @@ struct rgp_code_object_record { struct rgp_shader_data shader_data[MESA_VULKAN_SHADER_STAGES]; uint32_t num_shaders_combined; /* count combined shaders as one count */ uint64_t pipeline_hash[2]; + + bool is_rt; struct list_head list; }; diff --git a/src/amd/common/ac_rgp_elf_object_pack.c b/src/amd/common/ac_rgp_elf_object_pack.c index f55fd6b1e50..7e83df24def 100644 --- a/src/amd/common/ac_rgp_elf_object_pack.c +++ b/src/amd/common/ac_rgp_elf_object_pack.c @@ -27,15 +27,6 @@ #define EM_AMDGPU 224 #endif -char shader_stage_api_string[6][10] = { - ".vertex", /* vertex */ - ".hull", /* tessellation control */ - ".domain", /* tessellation evaluation */ - ".geometry", /* geometry */ - ".pixel", /* fragment */ - ".compute" /* compute */ -}; - char hw_stage_string[RGP_HW_STAGE_MAX][4] = { ".vs", ".ls", @@ -56,6 +47,57 @@ char hw_stage_symbol_string[RGP_HW_STAGE_MAX][16] = { "_amdgpu_cs_main" }; +static const char * +get_api_stage_string(gl_shader_stage stage) +{ + switch (stage) { + case MESA_SHADER_VERTEX: + return".vertex"; + case MESA_SHADER_TESS_CTRL: + return".hull"; + case MESA_SHADER_TESS_EVAL: + return".domain"; + case MESA_SHADER_GEOMETRY: + return".geometry"; + case MESA_SHADER_FRAGMENT: + return".pixel"; + default: + /* RT shaders are implemented using compute HW stages, so use ".compute" + for any stage other than graphics stages */ + return".compute"; + } +} + +static const char * +get_hw_stage_symbol(struct rgp_code_object_record *record, unsigned index) +{ + if (record->is_rt) + return record->shader_data[index].rt_shader_name; + else + return hw_stage_symbol_string[record->shader_data[index].hw_stage]; +} + +static const char * +rt_subtype_from_stage(gl_shader_stage stage) +{ + switch (stage) { + case MESA_SHADER_RAYGEN: + return "RayGeneration"; + case MESA_SHADER_MISS: + return "Miss"; + case MESA_SHADER_CLOSEST_HIT: + return "ClosestHit"; + case MESA_SHADER_CALLABLE: + return "Callable"; + case MESA_SHADER_INTERSECTION: + return "Traversal"; + /* There are also AnyHit and Intersection subtypes, but on RADV + * these are inlined into the traversal shader */ + default: + return "Unknown"; + } +} + /** * rgp profiler requires data for few variables stored in msgpack format * in notes section. This function writes the data from @@ -85,7 +127,7 @@ ac_rgp_write_msgpack(FILE *output, ac_msgpack_add_fixstr(&msgpack, "amdpal.pipelines"); ac_msgpack_add_fixarray_op(&msgpack, 1); - ac_msgpack_add_fixmap_op(&msgpack, 6); + ac_msgpack_add_fixmap_op(&msgpack, 6 + record->is_rt); /* 1 * This not used in RGP but data needs to be present @@ -105,8 +147,7 @@ ac_rgp_write_msgpack(FILE *output, mask = record->shader_stages_mask; while(mask) { i = u_bit_scan(&mask); - ac_msgpack_add_fixstr(&msgpack, - shader_stage_api_string[i]); + ac_msgpack_add_fixstr(&msgpack, get_api_stage_string(i)); ac_msgpack_add_fixmap_op(&msgpack, 2); ac_msgpack_add_fixstr(&msgpack, ".api_shader_hash"); ac_msgpack_add_fixarray_op(&msgpack, 2); @@ -134,8 +175,7 @@ ac_rgp_write_msgpack(FILE *output, record->shader_data[i].hw_stage]); ac_msgpack_add_fixmap_op(&msgpack, 6); ac_msgpack_add_fixstr(&msgpack, ".entry_point"); - ac_msgpack_add_fixstr(&msgpack, hw_stage_symbol_string[ - record->shader_data[i].hw_stage]); + ac_msgpack_add_fixstr(&msgpack, get_hw_stage_symbol(record, i)); ac_msgpack_add_fixstr(&msgpack, ".sgpr_count"); ac_msgpack_add_uint(&msgpack, @@ -167,6 +207,39 @@ ac_rgp_write_msgpack(FILE *output, ac_msgpack_add_fixstr(&msgpack, ".api"); ac_msgpack_add_fixstr(&msgpack, "Vulkan"); + if (record->is_rt) { + /* 7 */ + ac_msgpack_add_fixstr(&msgpack, ".shader_functions"); + ac_msgpack_add_fixmap_op(&msgpack, num_shaders); + mask = record->shader_stages_mask; + while (mask) { + i = u_bit_scan(&mask); + ac_msgpack_add_fixstr(&msgpack, record->shader_data[i].rt_shader_name); + ac_msgpack_add_fixmap_op(&msgpack, 7); + ac_msgpack_add_fixstr(&msgpack, ".stack_frame_size_in_bytes"); + ac_msgpack_add_uint(&msgpack, record->shader_data[i].rt_stack_size); + + ac_msgpack_add_fixstr(&msgpack, ".shader_subtype"); + ac_msgpack_add_fixstr(&msgpack, rt_subtype_from_stage(i)); + ac_msgpack_add_fixstr(&msgpack, ".api_shader_hash"); + ac_msgpack_add_fixarray_op(&msgpack, 2); + ac_msgpack_add_uint(&msgpack, record->pipeline_hash[0]); + ac_msgpack_add_uint(&msgpack, record->pipeline_hash[1]); + + ac_msgpack_add_fixstr(&msgpack, ".sgpr_count"); + ac_msgpack_add_uint(&msgpack, record->shader_data[i].sgpr_count); + + ac_msgpack_add_fixstr(&msgpack, ".vgpr_count"); + ac_msgpack_add_uint(&msgpack, record->shader_data[i].vgpr_count); + + ac_msgpack_add_fixstr(&msgpack, ".lds_size"); + ac_msgpack_add_uint(&msgpack, record->shader_data[i].lds_size); + + ac_msgpack_add_fixstr(&msgpack, ".scratch_memory_size"); + ac_msgpack_add_uint(&msgpack, + record->shader_data[i].scratch_memory_size); + } + } ac_msgpack_resize_if_required(&msgpack, 4 - (msgpack.offset % 4)); msgpack.offset = ALIGN(msgpack.offset, 4); fwrite(msgpack.mem, 1, msgpack.offset, output); @@ -316,13 +389,18 @@ ac_rgp_file_write_elf_symbol_table(FILE *output, uint32_t *elf_size_calc, memset(&elf_sym, 0x00, sizeof(elf_sym)); fwrite(&elf_sym, 1, sizeof(elf_sym), output); + uint32_t rt_name_offset = 0; + while(mask) { i = u_bit_scan(&mask); if (record->shader_data[i].is_combined) continue; - elf_sym.st_name = rgp_elf_hw_stage_string_offset - [record->shader_data[i].hw_stage]; + if (record->is_rt) { + elf_sym.st_name = sizeof(rgp_elf_strtab) + rt_name_offset; + rt_name_offset += strlen(record->shader_data[i].rt_shader_name) + 1; + } else + elf_sym.st_name = rgp_elf_hw_stage_string_offset[record->shader_data[i].hw_stage]; elf_sym.st_info = STT_FUNC; elf_sym.st_other = 0x0; elf_sym.st_shndx = RGP_ELF_TEXT_SEC_HEADER_INDEX; @@ -369,6 +447,7 @@ ac_rgp_file_write_elf_object(FILE *output, size_t file_elf_start, uint32_t msgpack_size = 0; size_t note_sec_start; uint32_t sh_offset; + uint32_t strtab_size = sizeof(rgp_elf_strtab); /* Give space for header in file. It will be written to file at the end */ fseek(output, sizeof(Elf64_Ehdr), SEEK_CUR); @@ -391,7 +470,19 @@ ac_rgp_file_write_elf_object(FILE *output, size_t file_elf_start, /* write hardcoded string table */ fwrite(&rgp_elf_strtab, 1, sizeof(rgp_elf_strtab), output); - elf_size_calc += sizeof(rgp_elf_strtab); + if (record->is_rt) { + uint32_t mask = record->shader_stages_mask; + while (mask) { + int i = u_bit_scan(&mask); + + char *name = record->shader_data[i].rt_shader_name; + uint32_t name_len = strlen(name); + + fwrite(name, 1, name_len + 1, output); + strtab_size += name_len + 1; + } + } + elf_size_calc += strtab_size; /* write shader code as .text code */ ac_rgp_file_write_elf_text(output, &elf_size_calc, record, &text_size); @@ -424,7 +515,7 @@ ac_rgp_file_write_elf_object(FILE *output, size_t file_elf_start, sec_hdr[1].sh_name = (uintptr_t)((struct ac_rgp_elf_string_table*)0)->strtab; sec_hdr[1].sh_type = SHT_STRTAB; sec_hdr[1].sh_offset = sizeof(Elf64_Ehdr); - sec_hdr[1].sh_size = sizeof(rgp_elf_strtab); + sec_hdr[1].sh_size = strtab_size; /* text must be at index 2 as used in other places*/ sec_hdr[2].sh_name = (uintptr_t)((struct ac_rgp_elf_string_table*)0)->text; diff --git a/src/amd/vulkan/layers/radv_sqtt_layer.c b/src/amd/vulkan/layers/radv_sqtt_layer.c index 02b2f70b03f..0080dfed0c1 100644 --- a/src/amd/vulkan/layers/radv_sqtt_layer.c +++ b/src/amd/vulkan/layers/radv_sqtt_layer.c @@ -1118,6 +1118,7 @@ radv_add_code_object(struct radv_device *device, struct radv_pipeline *pipeline) record->num_shaders_combined = 0; record->pipeline_hash[0] = pipeline->pipeline_hash; record->pipeline_hash[1] = pipeline->pipeline_hash; + record->is_rt = false; for (unsigned i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) { struct radv_shader *shader = pipeline->shaders[i];