From 15640e58d96c5db0cd78769a06b6b204dcd60799 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Thu, 12 Aug 2021 15:36:56 +0100 Subject: [PATCH] radv,aco: lower texture descriptor loads in NIR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fossil-db (Sienna Cichlid): Totals from 39445 (24.30% of 162293) affected shaders: MaxWaves: 875988 -> 875972 (-0.00%) Instrs: 35372561 -> 35234909 (-0.39%); split: -0.41%, +0.03% CodeSize: 190237480 -> 189379240 (-0.45%); split: -0.47%, +0.02% VGPRs: 1889856 -> 1889928 (+0.00%); split: -0.00%, +0.01% SpillSGPRs: 10764 -> 10857 (+0.86%); split: -2.04%, +2.91% SpillVGPRs: 1891 -> 1907 (+0.85%); split: -0.32%, +1.16% Scratch: 260096 -> 261120 (+0.39%) Latency: 477701150 -> 477578466 (-0.03%); split: -0.06%, +0.03% InvThroughput: 87819847 -> 87830346 (+0.01%); split: -0.03%, +0.04% VClause: 673353 -> 673829 (+0.07%); split: -0.04%, +0.11% SClause: 1385396 -> 1366478 (-1.37%); split: -1.65%, +0.29% Copies: 2327965 -> 2229134 (-4.25%); split: -4.58%, +0.34% Branches: 906707 -> 906434 (-0.03%); split: -0.13%, +0.10% PreSGPRs: 1874153 -> 1862698 (-0.61%); split: -1.34%, +0.73% PreVGPRs: 1691382 -> 1691383 (+0.00%); split: -0.00%, +0.00% Signed-off-by: Rhys Perry Reviewed-by: Samuel Pitoiset Reviewed-by: Timur Kristóf Part-of: --- src/amd/common/ac_shader_util.h | 11 + .../compiler/aco_instruction_selection.cpp | 88 ++------ src/amd/llvm/ac_nir_to_llvm.c | 8 +- src/amd/llvm/ac_shader_abi.h | 17 +- .../vulkan/radv_nir_apply_pipeline_layout.c | 211 +++++++++++++++++- src/amd/vulkan/radv_nir_to_llvm.c | 111 +-------- src/amd/vulkan/radv_shader.c | 1 - src/amd/vulkan/radv_shader.h | 1 - src/gallium/drivers/radeonsi/si_shader_llvm.c | 1 - 9 files changed, 257 insertions(+), 192 deletions(-) diff --git a/src/amd/common/ac_shader_util.h b/src/amd/common/ac_shader_util.h index 5d7ee8f7e73..129f6ebe93d 100644 --- a/src/amd/common/ac_shader_util.h +++ b/src/amd/common/ac_shader_util.h @@ -79,6 +79,17 @@ enum ac_fetch_format AC_FETCH_FORMAT_NONE, }; +enum ac_descriptor_type +{ + AC_DESC_IMAGE, + AC_DESC_FMASK, + AC_DESC_SAMPLER, + AC_DESC_BUFFER, + AC_DESC_PLANE_0, + AC_DESC_PLANE_1, + AC_DESC_PLANE_2, +}; + unsigned ac_get_spi_shader_z_format(bool writes_z, bool writes_stencil, bool writes_samplemask); unsigned ac_get_cb_shader_mask(unsigned spi_shader_col_format); diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 504b54253fe..cf5f1ade632 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -1355,7 +1355,9 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) case nir_op_vec2: case nir_op_vec3: case nir_op_vec4: - case nir_op_vec5: { + case nir_op_vec5: + case nir_op_vec8: + case nir_op_vec16: { std::array elems; unsigned num = instr->dest.dest.ssa.num_components; for (unsigned i = 0; i < num; ++i) @@ -8967,70 +8969,6 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) } } -void -tex_fetch_ptrs(isel_context* ctx, nir_tex_instr* instr, Temp* res_ptr, Temp* samp_ptr, - enum glsl_base_type* stype) -{ - nir_deref_instr* texture_deref_instr = NULL; - nir_deref_instr* sampler_deref_instr = NULL; - int plane = -1; - - for (unsigned i = 0; i < instr->num_srcs; i++) { - switch (instr->src[i].src_type) { - case nir_tex_src_texture_deref: - texture_deref_instr = nir_src_as_deref(instr->src[i].src); - break; - case nir_tex_src_sampler_deref: - sampler_deref_instr = nir_src_as_deref(instr->src[i].src); - break; - case nir_tex_src_plane: plane = nir_src_as_int(instr->src[i].src); break; - default: break; - } - } - - *stype = glsl_get_sampler_result_type(texture_deref_instr->type); - - if (!sampler_deref_instr) - sampler_deref_instr = texture_deref_instr; - - if (plane >= 0) { - assert(instr->sampler_dim != GLSL_SAMPLER_DIM_BUF); - *res_ptr = get_sampler_desc(ctx, texture_deref_instr, - (aco_descriptor_type)(ACO_DESC_PLANE_0 + plane), instr, false); - } else if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) { - *res_ptr = get_sampler_desc(ctx, texture_deref_instr, ACO_DESC_BUFFER, instr, false); - } else if (instr->op == nir_texop_fragment_mask_fetch_amd) { - *res_ptr = get_sampler_desc(ctx, texture_deref_instr, ACO_DESC_FMASK, instr, false); - } else { - *res_ptr = get_sampler_desc(ctx, texture_deref_instr, ACO_DESC_IMAGE, instr, false); - } - if (samp_ptr) { - *samp_ptr = get_sampler_desc(ctx, sampler_deref_instr, ACO_DESC_SAMPLER, instr, false); - - if (ctx->options->disable_aniso_single_level && - instr->sampler_dim < GLSL_SAMPLER_DIM_RECT && ctx->options->chip_class < GFX8) { - /* fix sampler aniso on SI/CI: samp[0] = samp[0] & img[7] */ - Builder bld(ctx->program, ctx->block); - - /* to avoid unnecessary moves, we split and recombine sampler and image */ - Temp img[8] = {bld.tmp(s1), bld.tmp(s1), bld.tmp(s1), bld.tmp(s1), - bld.tmp(s1), bld.tmp(s1), bld.tmp(s1), bld.tmp(s1)}; - Temp samp[4] = {bld.tmp(s1), bld.tmp(s1), bld.tmp(s1), bld.tmp(s1)}; - bld.pseudo(aco_opcode::p_split_vector, Definition(img[0]), Definition(img[1]), - Definition(img[2]), Definition(img[3]), Definition(img[4]), Definition(img[5]), - Definition(img[6]), Definition(img[7]), *res_ptr); - bld.pseudo(aco_opcode::p_split_vector, Definition(samp[0]), Definition(samp[1]), - Definition(samp[2]), Definition(samp[3]), *samp_ptr); - - samp[0] = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), samp[0], img[7]); - *res_ptr = bld.pseudo(aco_opcode::p_create_vector, bld.def(s8), img[0], img[1], img[2], - img[3], img[4], img[5], img[6], img[7]); - *samp_ptr = bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), samp[0], samp[1], samp[2], - samp[3]); - } - } -} - void build_cube_select(isel_context* ctx, Temp ma, Temp id, Temp deriv, Temp* out_ma, Temp* out_sc, Temp* out_tc) @@ -9178,11 +9116,21 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr) std::vector coords; std::vector derivs; nir_const_value* const_offset[4] = {NULL, NULL, NULL, NULL}; - enum glsl_base_type stype; - tex_fetch_ptrs(ctx, instr, &resource, &sampler, &stype); + + for (unsigned i = 0; i < instr->num_srcs; i++) { + switch (instr->src[i].src_type) { + case nir_tex_src_texture_handle: + resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[i].src.ssa)); + break; + case nir_tex_src_sampler_handle: + sampler = bld.as_uniform(get_ssa_temp(ctx, instr->src[i].src.ssa)); + break; + default: break; + } + } bool tg4_integer_workarounds = ctx->options->chip_class <= GFX8 && instr->op == nir_texop_tg4 && - (stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT); + (instr->dest_type & (nir_type_int | nir_type_uint)); bool tg4_integer_cube_workaround = tg4_integer_workarounds && instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE; @@ -9476,7 +9424,7 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr) Operand::c32(V_008F14_IMG_DATA_FORMAT_8_8_8_8)); Temp nfmt; - if (stype == GLSL_TYPE_UINT) { + if (instr->dest_type & nir_type_uint) { nfmt = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1), Operand::c32(V_008F14_IMG_NUM_FORMAT_USCALED), Operand::c32(V_008F14_IMG_NUM_FORMAT_UINT), bld.scc(compare_cube_wa)); @@ -9753,7 +9701,7 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr) for (unsigned i = 0; i < 4; i++) { val[i] = emit_extract_vector(ctx, tmp_dst, i, v1); Temp cvt_val; - if (stype == GLSL_TYPE_UINT) + if (instr->dest_type & nir_type_uint) cvt_val = bld.vop1(aco_opcode::v_cvt_u32_f32, bld.def(v1), val[i]); else cvt_val = bld.vop1(aco_opcode::v_cvt_i32_f32, bld.def(v1), val[i]); diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index 7b1e593d663..37e76f34f81 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -559,7 +559,7 @@ static LLVMValueRef exit_waterfall(struct ac_nir_context *ctx, struct waterfall_ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) { - LLVMValueRef src[4], result = NULL; + LLVMValueRef src[16], result = NULL; unsigned num_components = instr->dest.dest.ssa.num_components; unsigned src_components; LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.dest.ssa); @@ -570,6 +570,8 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) case nir_op_vec3: case nir_op_vec4: case nir_op_vec5: + case nir_op_vec8: + case nir_op_vec16: case nir_op_unpack_32_2x16: case nir_op_unpack_64_2x32: case nir_op_unpack_64_4x16: @@ -957,6 +959,8 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) case nir_op_vec3: case nir_op_vec4: case nir_op_vec5: + case nir_op_vec8: + case nir_op_vec16: for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) src[i] = ac_to_integer(&ctx->ac, src[i]); result = ac_build_gather_values(&ctx->ac, src, num_components); @@ -4486,7 +4490,7 @@ static LLVMValueRef sici_fix_sampler_aniso(struct ac_nir_context *ctx, LLVMValue LLVMBuilderRef builder = ctx->ac.builder; LLVMValueRef img7, samp0; - if (ctx->ac.chip_class >= GFX8 || !ctx->abi->disable_aniso_single_level) + if (ctx->ac.chip_class >= GFX8) return samp; img7 = LLVMBuildExtractElement(builder, res, LLVMConstInt(ctx->ac.i32, 7, 0), ""); diff --git a/src/amd/llvm/ac_shader_abi.h b/src/amd/llvm/ac_shader_abi.h index 0e7126831e4..2d7e553bd02 100644 --- a/src/amd/llvm/ac_shader_abi.h +++ b/src/amd/llvm/ac_shader_abi.h @@ -25,6 +25,7 @@ #define AC_SHADER_ABI_H #include "ac_shader_args.h" +#include "ac_shader_util.h" #include "compiler/shader_enums.h" #include @@ -34,17 +35,6 @@ #define AC_MAX_INLINE_PUSH_CONSTS 8 -enum ac_descriptor_type -{ - AC_DESC_IMAGE, - AC_DESC_FMASK, - AC_DESC_SAMPLER, - AC_DESC_BUFFER, - AC_DESC_PLANE_0, - AC_DESC_PLANE_1, - AC_DESC_PLANE_2, -}; - /* Document the shader ABI during compilation. This is what allows radeonsi and * radv to share a compiler backend. */ @@ -159,11 +149,6 @@ struct ac_shader_abi { */ bool adjust_frag_coord_z; - /* Whether anisotropic filtering should be disabled for single level - * images. - */ - bool disable_aniso_single_level; - /* Whether to inline the compute dispatch size in user sgprs. */ bool load_grid_size_from_user_sgpr; }; diff --git a/src/amd/vulkan/radv_nir_apply_pipeline_layout.c b/src/amd/vulkan/radv_nir_apply_pipeline_layout.c index 3901972be64..038fc561bd9 100644 --- a/src/amd/vulkan/radv_nir_apply_pipeline_layout.c +++ b/src/amd/vulkan/radv_nir_apply_pipeline_layout.c @@ -21,6 +21,7 @@ * IN THE SOFTWARE. * */ +#include "ac_shader_util.h" #include "nir.h" #include "nir_builder.h" #include "radv_private.h" @@ -30,6 +31,7 @@ typedef struct { enum chip_class chip_class; uint32_t address32_hi; + bool disable_aniso_single_level; const struct radv_shader_args *args; const struct radv_shader_info *info; @@ -218,6 +220,122 @@ visit_get_ssbo_size(nir_builder *b, apply_layout_state *state, nir_intrinsic_ins nir_instr_remove(&intrin->instr); } +static nir_ssa_def * +get_sampler_desc(nir_builder *b, apply_layout_state *state, nir_deref_instr *deref, + enum ac_descriptor_type desc_type, bool non_uniform, nir_tex_instr *tex) +{ + nir_variable *var = nir_deref_instr_get_variable(deref); + assert(var); + unsigned desc_set = var->data.descriptor_set; + unsigned binding_index = var->data.binding; + bool indirect = nir_deref_instr_has_indirect(deref); + + struct radv_descriptor_set_layout *layout = state->pipeline_layout->set[desc_set].layout; + struct radv_descriptor_set_binding_layout *binding = &layout->binding[binding_index]; + + /* Handle immutable (compile-time) samplers (VkDescriptorSetLayoutBinding::pImmutableSamplers) + * We can only do this for constant array index or if all samplers in the array are the same. + */ + if (desc_type == AC_DESC_SAMPLER && binding->immutable_samplers_offset && + (!indirect || binding->immutable_samplers_equal)) { + unsigned constant_index = 0; + if (!binding->immutable_samplers_equal) { + while (deref->deref_type != nir_deref_type_var) { + assert(deref->deref_type == nir_deref_type_array); + unsigned array_size = MAX2(glsl_get_aoa_size(deref->type), 1); + constant_index += nir_src_as_uint(deref->arr.index) * array_size; + deref = nir_deref_instr_parent(deref); + } + } + + const uint32_t *samplers = radv_immutable_samplers(layout, binding); + return nir_imm_ivec4(b, samplers[constant_index * 4 + 0], samplers[constant_index * 4 + 1], + samplers[constant_index * 4 + 2], samplers[constant_index * 4 + 3]); + } + + unsigned size = 8; + unsigned offset = binding->offset; + switch (desc_type) { + case AC_DESC_IMAGE: + case AC_DESC_PLANE_0: + break; + case AC_DESC_FMASK: + case AC_DESC_PLANE_1: + offset += 32; + break; + case AC_DESC_SAMPLER: + size = 4; + if (binding->type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) + offset += radv_combined_image_descriptor_sampler_offset(binding); + break; + case AC_DESC_BUFFER: + size = 4; + break; + case AC_DESC_PLANE_2: + size = 4; + offset += 64; + break; + } + + nir_ssa_def *index = NULL; + while (deref->deref_type != nir_deref_type_var) { + assert(deref->deref_type == nir_deref_type_array); + unsigned array_size = MAX2(glsl_get_aoa_size(deref->type), 1); + array_size *= binding->size; + + nir_ssa_def *tmp = nir_imul_imm(b, deref->arr.index.ssa, array_size); + if (tmp != deref->arr.index.ssa) + nir_instr_as_alu(tmp->parent_instr)->no_unsigned_wrap = true; + + if (index) { + index = nir_iadd(b, tmp, index); + nir_instr_as_alu(index->parent_instr)->no_unsigned_wrap = true; + } else { + index = tmp; + } + + deref = nir_deref_instr_parent(deref); + } + + nir_ssa_def *index_offset = index ? nir_iadd_imm(b, index, offset) : nir_imm_int(b, offset); + if (index && index_offset != index) + nir_instr_as_alu(index_offset->parent_instr)->no_unsigned_wrap = true; + + if (non_uniform) + return nir_iadd(b, load_desc_ptr(b, state, desc_set), index_offset); + + nir_ssa_def *addr = convert_pointer_to_64_bit(b, state, load_desc_ptr(b, state, desc_set)); + nir_ssa_def *desc = nir_load_smem_amd(b, size, addr, index_offset, .align_mul = size * 4u); + + /* 3 plane formats always have same size and format for plane 1 & 2, so + * use the tail from plane 1 so that we can store only the first 16 bytes + * of the last plane. */ + if (desc_type == AC_DESC_PLANE_2) { + nir_ssa_def *desc2 = get_sampler_desc(b, state, deref, AC_DESC_PLANE_1, non_uniform, tex); + + nir_ssa_def *comp[8]; + for (unsigned i = 0; i < 4; i++) + comp[i] = nir_channel(b, desc, i); + for (unsigned i = 4; i < 8; i++) + comp[i] = nir_channel(b, desc2, i); + + return nir_vec(b, comp, 8); + } else if (desc_type == AC_DESC_SAMPLER && tex->op == nir_texop_tg4) { + nir_ssa_def *comp[4]; + for (unsigned i = 0; i < 4; i++) + comp[i] = nir_channel(b, desc, i); + + /* We want to always use the linear filtering truncation behaviour for + * nir_texop_tg4, even if the sampler uses nearest/point filtering. + */ + comp[0] = nir_iand_imm(b, comp[0], C_008F30_TRUNC_COORD); + + return nir_vec(b, comp, 4); + } + + return desc; +} + static void apply_layout_to_intrin(nir_builder *b, apply_layout_state *state, nir_intrinsic_instr *intrin) { @@ -263,6 +381,94 @@ apply_layout_to_intrin(nir_builder *b, apply_layout_state *state, nir_intrinsic_ } } +static void +apply_layout_to_tex(nir_builder *b, apply_layout_state *state, nir_tex_instr *tex) +{ + b->cursor = nir_before_instr(&tex->instr); + + nir_deref_instr *texture_deref_instr = NULL; + nir_deref_instr *sampler_deref_instr = NULL; + int plane = -1; + + for (unsigned i = 0; i < tex->num_srcs; i++) { + switch (tex->src[i].src_type) { + case nir_tex_src_texture_deref: + texture_deref_instr = nir_src_as_deref(tex->src[i].src); + break; + case nir_tex_src_sampler_deref: + sampler_deref_instr = nir_src_as_deref(tex->src[i].src); + break; + case nir_tex_src_plane: + plane = nir_src_as_int(tex->src[i].src); + break; + default: + break; + } + } + + nir_ssa_def *image = NULL; + nir_ssa_def *sampler = NULL; + if (plane >= 0) { + assert(tex->op != nir_texop_txf_ms && tex->op != nir_texop_samples_identical); + assert(tex->sampler_dim != GLSL_SAMPLER_DIM_BUF); + image = get_sampler_desc(b, state, texture_deref_instr, AC_DESC_PLANE_0 + plane, + tex->texture_non_uniform, tex); + } else if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF) { + image = get_sampler_desc(b, state, texture_deref_instr, AC_DESC_BUFFER, + tex->texture_non_uniform, tex); + } else if (tex->op == nir_texop_fragment_mask_fetch_amd || + tex->op == nir_texop_samples_identical) { + image = get_sampler_desc(b, state, texture_deref_instr, AC_DESC_FMASK, + tex->texture_non_uniform, tex); + } else { + image = get_sampler_desc(b, state, texture_deref_instr, AC_DESC_IMAGE, + tex->texture_non_uniform, tex); + } + + if (sampler_deref_instr) { + sampler = get_sampler_desc(b, state, sampler_deref_instr, AC_DESC_SAMPLER, + tex->sampler_non_uniform, tex); + + if (state->disable_aniso_single_level && tex->sampler_dim < GLSL_SAMPLER_DIM_RECT && + state->chip_class < GFX8) { + /* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL. + * + * GFX6-GFX7: + * If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic + * filtering manually. The driver sets img7 to a mask clearing + * MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do: + * s_and_b32 samp0, samp0, img7 + * + * GFX8: + * The ANISO_OVERRIDE sampler field enables this fix in TA. + */ + /* TODO: This is unnecessary for combined image+sampler. + * We can do this when updating the desc set. */ + nir_ssa_def *comp[4]; + for (unsigned i = 0; i < 4; i++) + comp[i] = nir_channel(b, sampler, i); + comp[0] = nir_iand(b, comp[0], nir_channel(b, image, 7)); + + sampler = nir_vec(b, comp, 4); + } + } + + for (unsigned i = 0; i < tex->num_srcs; i++) { + switch (tex->src[i].src_type) { + case nir_tex_src_texture_deref: + tex->src[i].src_type = nir_tex_src_texture_handle; + nir_instr_rewrite_src_ssa(&tex->instr, &tex->src[i].src, image); + break; + case nir_tex_src_sampler_deref: + tex->src[i].src_type = nir_tex_src_sampler_handle; + nir_instr_rewrite_src_ssa(&tex->instr, &tex->src[i].src, sampler); + break; + default: + break; + } + } +} + void radv_nir_apply_pipeline_layout(nir_shader *shader, struct radv_device *device, const struct radv_pipeline_layout *layout, @@ -272,6 +478,7 @@ radv_nir_apply_pipeline_layout(nir_shader *shader, struct radv_device *device, apply_layout_state state = { .chip_class = device->physical_device->rad_info.chip_class, .address32_hi = device->physical_device->rad_info.address32_hi, + .disable_aniso_single_level = device->instance->disable_aniso_single_level, .args = args, .info = info, .pipeline_layout = layout, @@ -291,7 +498,9 @@ radv_nir_apply_pipeline_layout(nir_shader *shader, struct radv_device *device, */ nir_foreach_block_reverse (block, function->impl) { nir_foreach_instr_reverse_safe (instr, block) { - if (instr->type == nir_instr_type_intrinsic) + if (instr->type == nir_instr_type_tex) + apply_layout_to_tex(&b, &state, nir_instr_as_tex(instr)); + else if (instr->type == nir_instr_type_intrinsic) apply_layout_to_intrin(&b, &state, nir_instr_as_intrinsic(instr)); } } diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index ea93ed5b7fa..2d27b944547 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -403,118 +403,30 @@ radv_get_sampler_desc(struct ac_shader_abi *abi, unsigned descriptor_set, unsign enum ac_descriptor_type desc_type, bool image, bool write, bool bindless) { struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); - LLVMValueRef list = ctx->descriptor_sets[descriptor_set]; - struct radv_descriptor_set_layout *layout = - ctx->options->layout->set[descriptor_set].layout; - struct radv_descriptor_set_binding_layout *binding = layout->binding + base_index; - unsigned offset = binding->offset; - unsigned stride = binding->size; - unsigned type_size; - LLVMBuilderRef builder = ctx->ac.builder; - LLVMTypeRef type; - assert(base_index < layout->binding_count); - - if (binding->type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE && desc_type == AC_DESC_FMASK) + if (image && desc_type == AC_DESC_FMASK) return NULL; - switch (desc_type) { - case AC_DESC_IMAGE: - type = ctx->ac.v8i32; - type_size = 32; - break; - case AC_DESC_FMASK: - type = ctx->ac.v8i32; - offset += 32; - type_size = 32; - break; - case AC_DESC_SAMPLER: - type = ctx->ac.v4i32; - if (binding->type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) { - offset += radv_combined_image_descriptor_sampler_offset(binding); - } - - type_size = 16; - break; - case AC_DESC_BUFFER: - type = ctx->ac.v4i32; - type_size = 16; - break; - case AC_DESC_PLANE_0: - case AC_DESC_PLANE_1: - case AC_DESC_PLANE_2: - type = ctx->ac.v8i32; - type_size = 32; - offset += 32 * (desc_type - AC_DESC_PLANE_0); - break; - default: - unreachable("invalid desc_type\n"); - } - - offset += constant_index * stride; - - if (desc_type == AC_DESC_SAMPLER && binding->immutable_samplers_offset && - (!index || binding->immutable_samplers_equal)) { - if (binding->immutable_samplers_equal) - constant_index = 0; - - const uint32_t *samplers = radv_immutable_samplers(layout, binding); - - LLVMValueRef constants[] = { - LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 0], 0), - LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 1], 0), - LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 2], 0), - LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 3], 0), - }; - return ac_build_gather_values(&ctx->ac, constants, 4); - } - - assert(stride % type_size == 0); - - LLVMValueRef adjusted_index = index; - if (!adjusted_index) - adjusted_index = ctx->ac.i32_0; - - adjusted_index = - LLVMBuildMul(builder, adjusted_index, LLVMConstInt(ctx->ac.i32, stride / type_size, 0), ""); - - LLVMValueRef val_offset = LLVMConstInt(ctx->ac.i32, offset, 0); - list = LLVMBuildGEP(builder, list, &val_offset, 1, ""); - list = LLVMBuildPointerCast(builder, list, ac_array_in_const32_addr_space(type), ""); - - LLVMValueRef descriptor = ac_build_load_to_sgpr(&ctx->ac, list, adjusted_index); - /* 3 plane formats always have same size and format for plane 1 & 2, so * use the tail from plane 1 so that we can store only the first 16 bytes * of the last plane. */ - if (desc_type == AC_DESC_PLANE_2) { - LLVMValueRef descriptor2 = - radv_get_sampler_desc(abi, descriptor_set, base_index, constant_index, index, - AC_DESC_PLANE_1, image, write, bindless); + if (desc_type == AC_DESC_PLANE_2 && index && LLVMTypeOf(index) == ctx->ac.i32) { + LLVMValueRef plane1_addr = + LLVMBuildSub(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i32, 32, false), ""); + LLVMValueRef descriptor1 = radv_load_rsrc(ctx, plane1_addr, ctx->ac.v8i32); + LLVMValueRef descriptor2 = radv_load_rsrc(ctx, index, ctx->ac.v4i32); LLVMValueRef components[8]; for (unsigned i = 0; i < 4; ++i) - components[i] = ac_llvm_extract_elem(&ctx->ac, descriptor, i); + components[i] = ac_llvm_extract_elem(&ctx->ac, descriptor2, i); for (unsigned i = 4; i < 8; ++i) - components[i] = ac_llvm_extract_elem(&ctx->ac, descriptor2, i); - descriptor = ac_build_gather_values(&ctx->ac, components, 8); - } else if (desc_type == AC_DESC_IMAGE && - ctx->options->has_image_load_dcc_bug && - image && !write) { - LLVMValueRef components[8]; - - for (unsigned i = 0; i < 8; i++) - components[i] = ac_llvm_extract_elem(&ctx->ac, descriptor, i); - - /* WRITE_COMPRESS_ENABLE must be 0 for all image loads to workaround a hardware bug. */ - components[6] = LLVMBuildAnd(ctx->ac.builder, components[6], - LLVMConstInt(ctx->ac.i32, C_00A018_WRITE_COMPRESS_ENABLE, false), ""); - - descriptor = ac_build_gather_values(&ctx->ac, components, 8); + components[i] = ac_llvm_extract_elem(&ctx->ac, descriptor1, i); + return ac_build_gather_values(&ctx->ac, components, 8); } - return descriptor; + bool v4 = desc_type == AC_DESC_BUFFER || desc_type == AC_DESC_SAMPLER; + return radv_load_rsrc(ctx, index, v4 ? ctx->ac.v4i32 : ctx->ac.v8i32); } static LLVMValueRef @@ -2223,7 +2135,6 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, ctx.abi.clamp_shadow_reference = false; ctx.abi.adjust_frag_coord_z = options->adjust_frag_coord_z; ctx.abi.robust_buffer_access = options->robust_buffer_access; - ctx.abi.disable_aniso_single_level = options->disable_aniso_single_level; ctx.abi.load_grid_size_from_user_sgpr = args->load_grid_size_from_user_sgpr; bool is_ngg = is_pre_gs_stage(shaders[0]->info.stage) && info->is_ngg; diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index d3532aec404..045f5ba17a3 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -1889,7 +1889,6 @@ shader_compile(struct radv_device *device, struct vk_shader_module *module, options->enable_mrt_output_nan_fixup = module && !is_meta_shader(module->nir) && options->key.ps.enable_mrt_output_nan_fixup; options->adjust_frag_coord_z = options->key.adjust_frag_coord_z; - options->disable_aniso_single_level = options->key.disable_aniso_single_level; options->has_image_load_dcc_bug = device->physical_device->rad_info.has_image_load_dcc_bug; options->debug.func = radv_compiler_debug; options->debug.private_data = &debug_data; diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index bb6e1332728..e6b981b0912 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -125,7 +125,6 @@ struct radv_nir_compiler_options { bool has_image_load_dcc_bug; bool enable_mrt_output_nan_fixup; bool wgp_mode; - bool disable_aniso_single_level; enum radeon_family family; enum chip_class chip_class; const struct radeon_info *info; diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c index 62d24451817..0526b33737e 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c @@ -520,7 +520,6 @@ static bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader * ctx->abi.robust_buffer_access = true; ctx->abi.convert_undef_to_zero = true; ctx->abi.adjust_frag_coord_z = false; - ctx->abi.disable_aniso_single_level = true; ctx->abi.load_grid_size_from_user_sgpr = true; const struct si_shader_info *info = &ctx->shader->selector->info;