diff --git a/src/amd/vulkan/nir/radv_nir_lower_abi.c b/src/amd/vulkan/nir/radv_nir_lower_abi.c index fb9c7406c51..eb4114f247d 100644 --- a/src/amd/vulkan/nir/radv_nir_lower_abi.c +++ b/src/amd/vulkan/nir/radv_nir_lower_abi.c @@ -287,7 +287,7 @@ lower_abi_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *state) replacement = ac_nir_load_arg(b, &s->args->ac, s->args->vgt_esgs_ring_itemsize); } else { const unsigned stride = - s->info->is_ngg ? s->info->ngg_info.vgt_esgs_ring_itemsize : s->info->gs_ring_info.vgt_esgs_ring_itemsize; + s->info->is_ngg ? s->info->ngg_info.vgt_esgs_ring_itemsize : s->info->gs_ring_info.esgs_itemsize; replacement = nir_imm_int(b, stride); } break; diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index ced13dadbd0..8e90285742c 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -5593,7 +5593,7 @@ gfx10_emit_ge_cntl(struct radv_cmd_buffer *cmd_buffer) } } else if (radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_GEOMETRY)) { const struct radv_legacy_gs_info *gs_state = &cmd_buffer->state.shaders[MESA_SHADER_GEOMETRY]->info.gs_ring_info; - primgroup_size = G_028A44_GS_PRIMS_PER_SUBGRP(gs_state->vgt_gs_onchip_cntl); + primgroup_size = gs_state->gs_prims_per_subgroup; } else { primgroup_size = 128; /* recommended without a GS and tess */ } diff --git a/src/amd/vulkan/radv_pipeline_graphics.c b/src/amd/vulkan/radv_pipeline_graphics.c index a8be95717ed..e258dbf0584 100644 --- a/src/amd/vulkan/radv_pipeline_graphics.c +++ b/src/amd/vulkan/radv_pipeline_graphics.c @@ -3218,7 +3218,7 @@ radv_emit_hw_gs(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, /* GFX6-8: ESGS offchip ring buffer is allocated according to VGT_ESGS_RING_ITEMSIZE. * GFX9+: Only used to set the GS input VGPRs, emulated in shaders. */ - radeon_set_context_reg(ctx_cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE, gs_state->vgt_esgs_ring_itemsize); + radeon_set_context_reg(ctx_cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE, gs->info.regs.gs.vgt_esgs_ring_itemsize); } va = radv_shader_get_va(gs); @@ -3237,8 +3237,9 @@ radv_emit_hw_gs(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, radeon_emit(cs, gs->config.rsrc2 | S_00B22C_LDS_SIZE(gs_state->lds_size)); } - radeon_set_context_reg(ctx_cs, R_028A44_VGT_GS_ONCHIP_CNTL, gs_state->vgt_gs_onchip_cntl); - radeon_set_context_reg(ctx_cs, R_028A94_VGT_GS_MAX_PRIMS_PER_SUBGROUP, gs_state->vgt_gs_max_prims_per_subgroup); + radeon_set_context_reg(ctx_cs, R_028A44_VGT_GS_ONCHIP_CNTL, gs->info.regs.gs.vgt_gs_onchip_cntl); + radeon_set_context_reg(ctx_cs, R_028A94_VGT_GS_MAX_PRIMS_PER_SUBGROUP, + gs->info.regs.gs.vgt_gs_max_prims_per_subgroup); } else { radeon_set_sh_reg_seq(cs, R_00B220_SPI_SHADER_PGM_LO_GS, 4); radeon_emit(cs, va >> 8); diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index f02f9928d6b..82357a1c088 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -1461,6 +1461,21 @@ radv_open_rtld_binary(struct radv_device *device, const struct radv_shader_binar } #endif +static void +radv_precompute_registers_hw_gs(struct radv_device *device, struct radv_shader_binary *binary) +{ + struct radv_shader_info *info = &binary->info; + + info->regs.gs.vgt_esgs_ring_itemsize = info->gs_ring_info.esgs_itemsize; + + info->regs.gs.vgt_gs_max_prims_per_subgroup = + S_028A94_MAX_PRIMS_PER_SUBGROUP(info->gs_ring_info.gs_inst_prims_in_subgroup); + + info->regs.gs.vgt_gs_onchip_cntl = S_028A44_ES_VERTS_PER_SUBGRP(info->gs_ring_info.es_verts_per_subgroup) | + S_028A44_GS_PRIMS_PER_SUBGRP(info->gs_ring_info.gs_prims_per_subgroup) | + S_028A44_GS_INST_PRIMS_IN_SUBGRP(info->gs_ring_info.gs_inst_prims_in_subgroup); +} + static void radv_precompute_registers_hw_cs(struct radv_device *device, struct radv_shader_binary *binary) { @@ -1479,6 +1494,10 @@ radv_precompute_registers(struct radv_device *device, struct radv_shader_binary const struct radv_shader_info *info = &binary->info; switch (info->stage) { + case MESA_SHADER_GEOMETRY: + if (!info->is_ngg) + radv_precompute_registers_hw_gs(device, binary); + break; case MESA_SHADER_COMPUTE: case MESA_SHADER_TASK: radv_precompute_registers_hw_cs(device, binary); diff --git a/src/amd/vulkan/radv_shader_info.c b/src/amd/vulkan/radv_shader_info.c index eb07b3a59f9..6641a285282 100644 --- a/src/amd/vulkan/radv_shader_info.c +++ b/src/amd/vulkan/radv_shader_info.c @@ -624,11 +624,9 @@ radv_init_legacy_gs_ring_info(const struct radv_device *device, struct radv_shad unsigned max_size = ((unsigned)(63.999 * 1024 * 1024) & ~255) * num_se; /* Calculate the minimum size. */ - unsigned min_esgs_ring_size = - align(gs_ring_info->vgt_esgs_ring_itemsize * 4 * gs_vertex_reuse * wave_size, alignment); + unsigned min_esgs_ring_size = align(gs_ring_info->esgs_itemsize * 4 * gs_vertex_reuse * wave_size, alignment); /* These are recommended sizes, not minimum sizes. */ - unsigned esgs_ring_size = - max_gs_waves * 2 * wave_size * gs_ring_info->vgt_esgs_ring_itemsize * 4 * gs_info->gs.vertices_in; + unsigned esgs_ring_size = max_gs_waves * 2 * wave_size * gs_ring_info->esgs_itemsize * 4 * gs_info->gs.vertices_in; unsigned gsvs_ring_size = max_gs_waves * 2 * wave_size * gs_info->gs.max_gsvs_emit_size; min_esgs_ring_size = align(min_esgs_ring_size, alignment); @@ -731,12 +729,12 @@ radv_get_legacy_gs_info(const struct radv_device *device, struct radv_shader_inf const uint32_t max_prims_per_subgroup = gs_inst_prims_in_subgroup * gs_info->gs.vertices_out; const uint32_t lds_granularity = pdev->info.lds_encode_granularity; const uint32_t total_lds_bytes = align(esgs_lds_size * 4, lds_granularity); + + out->gs_inst_prims_in_subgroup = gs_inst_prims_in_subgroup; + out->es_verts_per_subgroup = es_verts_per_subgroup; + out->gs_prims_per_subgroup = gs_prims_per_subgroup; + out->esgs_itemsize = esgs_itemsize; out->lds_size = total_lds_bytes / lds_granularity; - out->vgt_gs_onchip_cntl = S_028A44_ES_VERTS_PER_SUBGRP(es_verts_per_subgroup) | - S_028A44_GS_PRIMS_PER_SUBGRP(gs_prims_per_subgroup) | - S_028A44_GS_INST_PRIMS_IN_SUBGRP(gs_inst_prims_in_subgroup); - out->vgt_gs_max_prims_per_subgroup = S_028A94_MAX_PRIMS_PER_SUBGROUP(max_prims_per_subgroup); - out->vgt_esgs_ring_itemsize = esgs_itemsize; assert(max_prims_per_subgroup <= max_out_prims); radv_init_legacy_gs_ring_info(device, gs_info); @@ -1339,8 +1337,8 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n break; case MESA_SHADER_GEOMETRY: if (!info->is_ngg) { - unsigned es_verts_per_subgroup = G_028A44_ES_VERTS_PER_SUBGRP(info->gs_ring_info.vgt_gs_onchip_cntl); - unsigned gs_inst_prims_in_subgroup = G_028A44_GS_INST_PRIMS_IN_SUBGRP(info->gs_ring_info.vgt_gs_onchip_cntl); + unsigned es_verts_per_subgroup = info->gs_ring_info.es_verts_per_subgroup; + unsigned gs_inst_prims_in_subgroup = info->gs_ring_info.gs_inst_prims_in_subgroup; info->workgroup_size = ac_compute_esgs_workgroup_size(pdev->info.gfx_level, info->wave_size, es_verts_per_subgroup, gs_inst_prims_in_subgroup); diff --git a/src/amd/vulkan/radv_shader_info.h b/src/amd/vulkan/radv_shader_info.h index 56601e463d5..dd0c1ef881f 100644 --- a/src/amd/vulkan/radv_shader_info.h +++ b/src/amd/vulkan/radv_shader_info.h @@ -54,9 +54,10 @@ struct radv_streamout_info { }; struct radv_legacy_gs_info { - uint32_t vgt_gs_onchip_cntl; - uint32_t vgt_gs_max_prims_per_subgroup; - uint32_t vgt_esgs_ring_itemsize; + uint32_t gs_inst_prims_in_subgroup; + uint32_t es_verts_per_subgroup; + uint32_t gs_prims_per_subgroup; + uint32_t esgs_itemsize; uint32_t lds_size; uint32_t esgs_ring_size; uint32_t gsvs_ring_size; @@ -252,6 +253,12 @@ struct radv_shader_info { /* Precomputed register values. */ struct { + struct { + uint32_t vgt_esgs_ring_itemsize; + uint32_t vgt_gs_max_prims_per_subgroup; + uint32_t vgt_gs_onchip_cntl; + } gs; + struct { uint32_t compute_num_thread_x; uint32_t compute_num_thread_y;