radv,aco: lower vulkan_resource_index in NIR
fossil-db (Sienna Cichlid): Totals from 31338 (19.31% of 162293) affected shaders: MaxWaves: 758634 -> 758616 (-0.00%) Instrs: 26398289 -> 26378282 (-0.08%); split: -0.09%, +0.01% CodeSize: 141048208 -> 140971060 (-0.05%); split: -0.07%, +0.01% VGPRs: 1373656 -> 1373736 (+0.01%) SpillSGPRs: 9944 -> 9924 (-0.20%); split: -0.24%, +0.04% SpillVGPRs: 1892 -> 1898 (+0.32%); split: -0.95%, +1.27% Latency: 308570144 -> 308528462 (-0.01%); split: -0.03%, +0.02% InvThroughput: 57698072 -> 57684901 (-0.02%); split: -0.07%, +0.04% VClause: 440357 -> 440602 (+0.06%); split: -0.02%, +0.08% SClause: 974724 -> 973315 (-0.14%); split: -0.18%, +0.04% Copies: 1944925 -> 1945103 (+0.01%); split: -0.11%, +0.12% Branches: 799444 -> 799461 (+0.00%); split: -0.00%, +0.00% PreSGPRs: 1619860 -> 1619233 (-0.04%); split: -0.05%, +0.02% PreVGPRs: 1252813 -> 1252863 (+0.00%); split: -0.00%, +0.00% Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12773>
This commit is contained in:
@@ -5401,59 +5401,6 @@ load_desc_ptr(isel_context* ctx, unsigned desc_set)
|
||||
return get_arg(ctx, ctx->args->descriptor_sets[desc_set]);
|
||||
}
|
||||
|
||||
void
|
||||
visit_load_resource(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
{
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
Temp index = get_ssa_temp(ctx, instr->src[0].ssa);
|
||||
if (!nir_dest_is_divergent(instr->dest))
|
||||
index = bld.as_uniform(index);
|
||||
unsigned desc_set = nir_intrinsic_desc_set(instr);
|
||||
unsigned binding = nir_intrinsic_binding(instr);
|
||||
|
||||
Temp desc_ptr;
|
||||
radv_pipeline_layout* pipeline_layout = ctx->options->layout;
|
||||
radv_descriptor_set_layout* layout = pipeline_layout->set[desc_set].layout;
|
||||
unsigned offset = layout->binding[binding].offset;
|
||||
unsigned stride;
|
||||
if (layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
|
||||
layout->binding[binding].type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
|
||||
unsigned idx = pipeline_layout->set[desc_set].dynamic_offset_start +
|
||||
layout->binding[binding].dynamic_offset_offset;
|
||||
desc_ptr = get_arg(ctx, ctx->args->ac.push_constants);
|
||||
offset = pipeline_layout->push_constant_size + 16 * idx;
|
||||
stride = 16;
|
||||
} else {
|
||||
desc_ptr = load_desc_ptr(ctx, desc_set);
|
||||
stride = layout->binding[binding].size;
|
||||
}
|
||||
|
||||
if (nir_src_is_const(instr->src[0])) {
|
||||
index =
|
||||
bld.copy(bld.def(s1), Operand::c32((offset + nir_src_as_uint(instr->src[0]) * stride)));
|
||||
} else if (index.type() == RegType::vgpr) {
|
||||
if (stride != 1) {
|
||||
bool index24bit = layout->binding[binding].array_size <= 0x1000000;
|
||||
index = bld.v_mul_imm(bld.def(v1), index, stride, index24bit);
|
||||
}
|
||||
if (offset)
|
||||
index = bld.vadd32(bld.def(v1), Operand::c32(offset), index);
|
||||
} else {
|
||||
if (stride != 1)
|
||||
index = bld.sop2(aco_opcode::s_mul_i32, bld.def(s1), Operand::c32(stride), index);
|
||||
if (offset)
|
||||
index = bld.sop2(aco_opcode::s_add_i32, bld.def(s1), bld.def(s1, scc),
|
||||
Operand::c32(offset), index);
|
||||
}
|
||||
|
||||
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
|
||||
std::array<Temp, NIR_MAX_VEC_COMPONENTS> elems;
|
||||
elems[0] = desc_ptr;
|
||||
elems[1] = index;
|
||||
ctx->allocated_vec.emplace(dst.id(), elems);
|
||||
bld.pseudo(aco_opcode::p_create_vector, Definition(dst), desc_ptr, index, Operand::zero());
|
||||
}
|
||||
|
||||
void
|
||||
load_buffer(isel_context* ctx, unsigned num_components, unsigned component_size, Temp dst,
|
||||
Temp rsrc, Temp offset, unsigned align_mul, unsigned align_offset, bool glc = false,
|
||||
@@ -5494,17 +5441,6 @@ load_buffer_rsrc(isel_context* ctx, Temp rsrc)
|
||||
return bld.smem(aco_opcode::s_load_dwordx4, bld.def(s4), set_ptr, binding);
|
||||
}
|
||||
|
||||
bool
|
||||
is_inline_ubo(isel_context* ctx, nir_src rsrc)
|
||||
{
|
||||
nir_binding binding = nir_chase_binding(rsrc);
|
||||
if (!binding.success)
|
||||
return false;
|
||||
|
||||
radv_descriptor_set_layout* layout = ctx->options->layout->set[binding.desc_set].layout;
|
||||
return layout->binding[binding.binding].type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT;
|
||||
}
|
||||
|
||||
void
|
||||
visit_load_ubo(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
{
|
||||
@@ -5513,28 +5449,11 @@ visit_load_ubo(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
|
||||
if (is_inline_ubo(ctx, instr->src[0])) {
|
||||
Temp set_ptr = bld.as_uniform(emit_extract_vector(ctx, rsrc, 0, RegClass(rsrc.type(), 1)));
|
||||
Temp binding_off =
|
||||
bld.as_uniform(emit_extract_vector(ctx, rsrc, 1, RegClass(rsrc.type(), 1)));
|
||||
rsrc = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc), set_ptr, binding_off);
|
||||
|
||||
uint32_t desc_type =
|
||||
S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
|
||||
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
|
||||
if (ctx->options->chip_class >= GFX10) {
|
||||
desc_type |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
|
||||
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
|
||||
} else {
|
||||
desc_type |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
|
||||
S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
|
||||
}
|
||||
rsrc = bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), rsrc,
|
||||
Operand::c32(S_008F04_BASE_ADDRESS_HI(ctx->options->address32_hi)),
|
||||
Operand::c32(0xFFFFFFFFu), Operand::c32(desc_type));
|
||||
} else {
|
||||
if (rsrc.bytes() == 16)
|
||||
rsrc = bld.as_uniform(rsrc); /* for VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT */
|
||||
else
|
||||
rsrc = load_buffer_rsrc(ctx, rsrc);
|
||||
}
|
||||
|
||||
unsigned size = instr->dest.ssa.bit_size / 8;
|
||||
load_buffer(ctx, instr->num_components, size, dst, rsrc, get_ssa_temp(ctx, instr->src[1].ssa),
|
||||
nir_intrinsic_align_mul(instr), nir_intrinsic_align_offset(instr));
|
||||
@@ -8110,7 +8029,6 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
case nir_intrinsic_load_ubo: visit_load_ubo(ctx, instr); break;
|
||||
case nir_intrinsic_load_push_constant: visit_load_push_constant(ctx, instr); break;
|
||||
case nir_intrinsic_load_constant: visit_load_constant(ctx, instr); break;
|
||||
case nir_intrinsic_vulkan_resource_index: visit_load_resource(ctx, instr); break;
|
||||
case nir_intrinsic_load_shared: visit_load_shared(ctx, instr); break;
|
||||
case nir_intrinsic_store_shared: visit_store_shared(ctx, instr); break;
|
||||
case nir_intrinsic_shared_atomic_add:
|
||||
|
@@ -749,7 +749,6 @@ init_context(isel_context* ctx, nir_shader* shader)
|
||||
case nir_intrinsic_load_ssbo:
|
||||
case nir_intrinsic_load_global:
|
||||
case nir_intrinsic_load_global_constant:
|
||||
case nir_intrinsic_vulkan_resource_index:
|
||||
case nir_intrinsic_get_ssbo_size:
|
||||
type = nir_dest_is_divergent(intrinsic->dest) ? RegType::vgpr : RegType::sgpr;
|
||||
break;
|
||||
|
@@ -3722,14 +3722,6 @@ static void visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
|
||||
case nir_intrinsic_load_push_constant:
|
||||
result = visit_load_push_constant(ctx, instr);
|
||||
break;
|
||||
case nir_intrinsic_vulkan_resource_index: {
|
||||
LLVMValueRef index = get_src(ctx, instr->src[0]);
|
||||
unsigned desc_set = nir_intrinsic_desc_set(instr);
|
||||
unsigned binding = nir_intrinsic_binding(instr);
|
||||
|
||||
result = ctx->abi->load_resource(ctx->abi, index, desc_set, binding);
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_store_ssbo:
|
||||
visit_store_ssbo(ctx, instr);
|
||||
break;
|
||||
|
@@ -129,16 +129,6 @@ struct ac_shader_abi {
|
||||
LLVMValueRef index, enum ac_descriptor_type desc_type,
|
||||
bool image, bool write, bool bindless);
|
||||
|
||||
/**
|
||||
* Load a Vulkan-specific resource.
|
||||
*
|
||||
* \param index resource index
|
||||
* \param desc_set descriptor set
|
||||
* \param binding descriptor set binding
|
||||
*/
|
||||
LLVMValueRef (*load_resource)(struct ac_shader_abi *abi, LLVMValueRef index, unsigned desc_set,
|
||||
unsigned binding);
|
||||
|
||||
LLVMValueRef (*load_sample_position)(struct ac_shader_abi *abi, LLVMValueRef sample_id);
|
||||
|
||||
LLVMValueRef (*load_local_group_size)(struct ac_shader_abi *abi);
|
||||
|
@@ -69,6 +69,7 @@ libradv_files = files(
|
||||
'radv_meta_resolve.c',
|
||||
'radv_meta_resolve_cs.c',
|
||||
'radv_meta_resolve_fs.c',
|
||||
'radv_nir_apply_pipeline_layout.c',
|
||||
'radv_nir_lower_ray_queries.c',
|
||||
'radv_nir_lower_ycbcr_textures.c',
|
||||
'radv_pass.c',
|
||||
|
252
src/amd/vulkan/radv_nir_apply_pipeline_layout.c
Normal file
252
src/amd/vulkan/radv_nir_apply_pipeline_layout.c
Normal file
@@ -0,0 +1,252 @@
|
||||
/*
|
||||
* Copyright © 2020 Valve Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
#include "nir.h"
|
||||
#include "nir_builder.h"
|
||||
#include "radv_private.h"
|
||||
#include "radv_shader.h"
|
||||
#include "radv_shader_args.h"
|
||||
|
||||
typedef struct {
|
||||
enum chip_class chip_class;
|
||||
uint32_t address32_hi;
|
||||
|
||||
const struct radv_shader_args *args;
|
||||
const struct radv_shader_info *info;
|
||||
const struct radv_pipeline_layout *pipeline_layout;
|
||||
} apply_layout_state;
|
||||
|
||||
static nir_ssa_def *
|
||||
get_scalar_arg(nir_builder *b, unsigned size, struct ac_arg arg)
|
||||
{
|
||||
return nir_load_scalar_arg_amd(b, size, .base = arg.arg_index);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
convert_pointer_to_64_bit(nir_builder *b, apply_layout_state *state, nir_ssa_def *ptr)
|
||||
{
|
||||
return nir_pack_64_2x32_split(b, ptr, nir_imm_int(b, state->address32_hi));
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
load_desc_ptr(nir_builder *b, apply_layout_state *state, unsigned set)
|
||||
{
|
||||
const struct radv_userdata_locations *user_sgprs_locs = &state->info->user_sgprs_locs;
|
||||
if (user_sgprs_locs->shader_data[AC_UD_INDIRECT_DESCRIPTOR_SETS].sgpr_idx != -1) {
|
||||
nir_ssa_def *addr = get_scalar_arg(b, 1, state->args->descriptor_sets[0]);
|
||||
addr = convert_pointer_to_64_bit(b, state, addr);
|
||||
return nir_load_smem_amd(b, 1, addr, nir_imm_int(b, set * 4));
|
||||
}
|
||||
|
||||
assert(state->args->descriptor_sets[set].used);
|
||||
return get_scalar_arg(b, 1, state->args->descriptor_sets[set]);
|
||||
}
|
||||
|
||||
static void
|
||||
visit_vulkan_resource_index(nir_builder *b, apply_layout_state *state, nir_intrinsic_instr *intrin)
|
||||
{
|
||||
unsigned desc_set = nir_intrinsic_desc_set(intrin);
|
||||
unsigned binding = nir_intrinsic_binding(intrin);
|
||||
struct radv_descriptor_set_layout *layout = state->pipeline_layout->set[desc_set].layout;
|
||||
unsigned offset = layout->binding[binding].offset;
|
||||
unsigned stride;
|
||||
|
||||
nir_ssa_def *set_ptr;
|
||||
if (layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
|
||||
layout->binding[binding].type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
|
||||
unsigned idx = state->pipeline_layout->set[desc_set].dynamic_offset_start +
|
||||
layout->binding[binding].dynamic_offset_offset;
|
||||
set_ptr = get_scalar_arg(b, 1, state->args->ac.push_constants);
|
||||
offset = state->pipeline_layout->push_constant_size + idx * 16;
|
||||
stride = 16;
|
||||
} else {
|
||||
set_ptr = load_desc_ptr(b, state, desc_set);
|
||||
stride = layout->binding[binding].size;
|
||||
}
|
||||
|
||||
nir_ssa_def *binding_ptr = nir_imul_imm(b, intrin->src[0].ssa, stride);
|
||||
nir_instr_as_alu(binding_ptr->parent_instr)->no_unsigned_wrap = true;
|
||||
|
||||
binding_ptr = nir_iadd_imm(b, binding_ptr, offset);
|
||||
nir_instr_as_alu(binding_ptr->parent_instr)->no_unsigned_wrap = true;
|
||||
|
||||
if (layout->binding[binding].type == VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR) {
|
||||
assert(stride == 16);
|
||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_pack_64_2x32_split(b, set_ptr, binding_ptr));
|
||||
} else {
|
||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
|
||||
nir_vec3(b, set_ptr, binding_ptr, nir_imm_int(b, stride)));
|
||||
}
|
||||
nir_instr_remove(&intrin->instr);
|
||||
}
|
||||
|
||||
static void
|
||||
visit_vulkan_resource_reindex(nir_builder *b, apply_layout_state *state,
|
||||
nir_intrinsic_instr *intrin)
|
||||
{
|
||||
VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
|
||||
if (desc_type == VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR) {
|
||||
nir_ssa_def *set_ptr = nir_unpack_64_2x32_split_x(b, intrin->src[0].ssa);
|
||||
nir_ssa_def *binding_ptr = nir_unpack_64_2x32_split_y(b, intrin->src[0].ssa);
|
||||
|
||||
nir_ssa_def *index = nir_imul_imm(b, intrin->src[1].ssa, 16);
|
||||
nir_instr_as_alu(index->parent_instr)->no_unsigned_wrap = true;
|
||||
|
||||
binding_ptr = nir_iadd_nuw(b, binding_ptr, index);
|
||||
|
||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_pack_64_2x32_split(b, set_ptr, binding_ptr));
|
||||
} else {
|
||||
assert(desc_type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ||
|
||||
desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
|
||||
|
||||
nir_ssa_def *binding_ptr = nir_channel(b, intrin->src[0].ssa, 1);
|
||||
nir_ssa_def *stride = nir_channel(b, intrin->src[0].ssa, 2);
|
||||
|
||||
nir_ssa_def *index = nir_imul(b, intrin->src[1].ssa, stride);
|
||||
nir_instr_as_alu(index->parent_instr)->no_unsigned_wrap = true;
|
||||
|
||||
binding_ptr = nir_iadd_nuw(b, binding_ptr, index);
|
||||
|
||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
|
||||
nir_vector_insert_imm(b, intrin->src[0].ssa, binding_ptr, 1));
|
||||
}
|
||||
nir_instr_remove(&intrin->instr);
|
||||
}
|
||||
|
||||
static void
|
||||
visit_load_vulkan_descriptor(nir_builder *b, apply_layout_state *state, nir_intrinsic_instr *intrin)
|
||||
{
|
||||
if (nir_intrinsic_desc_type(intrin) == VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR) {
|
||||
nir_ssa_def *addr = convert_pointer_to_64_bit(
|
||||
b, state,
|
||||
nir_iadd(b, nir_unpack_64_2x32_split_x(b, intrin->src[0].ssa),
|
||||
nir_unpack_64_2x32_split_y(b, intrin->src[0].ssa)));
|
||||
nir_ssa_def *desc = nir_build_load_global(b, 1, 64, addr, .access = ACCESS_NON_WRITEABLE);
|
||||
|
||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, desc);
|
||||
} else {
|
||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
|
||||
nir_vector_insert_imm(b, intrin->src[0].ssa, nir_imm_int(b, 0), 2));
|
||||
}
|
||||
nir_instr_remove(&intrin->instr);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
load_inline_buffer_descriptor(nir_builder *b, apply_layout_state *state, nir_ssa_def *rsrc)
|
||||
{
|
||||
uint32_t desc_type =
|
||||
S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
|
||||
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
|
||||
if (state->chip_class >= GFX10) {
|
||||
desc_type |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
|
||||
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
|
||||
} else {
|
||||
desc_type |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
|
||||
S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
|
||||
}
|
||||
|
||||
return nir_vec4(b, rsrc, nir_imm_int(b, S_008F04_BASE_ADDRESS_HI(state->address32_hi)),
|
||||
nir_imm_int(b, 0xffffffff), nir_imm_int(b, desc_type));
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
load_buffer_descriptor(nir_builder *b, apply_layout_state *state, nir_ssa_def *rsrc)
|
||||
{
|
||||
nir_binding binding = nir_chase_binding(nir_src_for_ssa(rsrc));
|
||||
|
||||
/* If binding.success=false, then this is a variable pointer, which we don't support with
|
||||
* VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT.
|
||||
*/
|
||||
if (binding.success) {
|
||||
struct radv_descriptor_set_layout *layout =
|
||||
state->pipeline_layout->set[binding.desc_set].layout;
|
||||
if (layout->binding[binding.binding].type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
|
||||
rsrc = nir_iadd(b, nir_channel(b, rsrc, 0), nir_channel(b, rsrc, 1));
|
||||
return load_inline_buffer_descriptor(b, state, rsrc);
|
||||
}
|
||||
}
|
||||
|
||||
return rsrc;
|
||||
}
|
||||
|
||||
static void
|
||||
apply_layout_to_intrin(nir_builder *b, apply_layout_state *state, nir_intrinsic_instr *intrin)
|
||||
{
|
||||
b->cursor = nir_before_instr(&intrin->instr);
|
||||
|
||||
nir_ssa_def *rsrc;
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_vulkan_resource_index:
|
||||
visit_vulkan_resource_index(b, state, intrin);
|
||||
break;
|
||||
case nir_intrinsic_vulkan_resource_reindex:
|
||||
visit_vulkan_resource_reindex(b, state, intrin);
|
||||
break;
|
||||
case nir_intrinsic_load_vulkan_descriptor:
|
||||
visit_load_vulkan_descriptor(b, state, intrin);
|
||||
break;
|
||||
case nir_intrinsic_load_ubo:
|
||||
rsrc = load_buffer_descriptor(b, state, intrin->src[0].ssa);
|
||||
nir_instr_rewrite_src_ssa(&intrin->instr, &intrin->src[0], rsrc);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
radv_nir_apply_pipeline_layout(nir_shader *shader, struct radv_device *device,
|
||||
const struct radv_pipeline_layout *layout,
|
||||
const struct radv_shader_info *info,
|
||||
const struct radv_shader_args *args)
|
||||
{
|
||||
apply_layout_state state = {
|
||||
.chip_class = device->physical_device->rad_info.chip_class,
|
||||
.address32_hi = device->physical_device->rad_info.address32_hi,
|
||||
.args = args,
|
||||
.info = info,
|
||||
.pipeline_layout = layout,
|
||||
};
|
||||
|
||||
nir_builder b;
|
||||
|
||||
nir_foreach_function (function, shader) {
|
||||
if (!function->impl)
|
||||
continue;
|
||||
|
||||
nir_builder_init(&b, function->impl);
|
||||
|
||||
/* Iterate in reverse so load_ubo lowering can look at
|
||||
* the vulkan_resource_index to tell if it's an inline
|
||||
* ubo.
|
||||
*/
|
||||
nir_foreach_block_reverse (block, function->impl) {
|
||||
nir_foreach_instr_reverse_safe (instr, block) {
|
||||
if (instr->type == nir_instr_type_intrinsic)
|
||||
apply_layout_to_intrin(&b, &state, nir_instr_as_intrinsic(instr));
|
||||
}
|
||||
}
|
||||
|
||||
nir_metadata_preserve(function->impl, nir_metadata_block_index | nir_metadata_dominance);
|
||||
}
|
||||
}
|
@@ -198,39 +198,6 @@ create_function(struct radv_shader_context *ctx, gl_shader_stage stage, bool has
|
||||
}
|
||||
}
|
||||
|
||||
static LLVMValueRef
|
||||
radv_load_resource(struct ac_shader_abi *abi, LLVMValueRef index, unsigned desc_set,
|
||||
unsigned binding)
|
||||
{
|
||||
struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
|
||||
LLVMValueRef desc_ptr = ctx->descriptor_sets[desc_set];
|
||||
struct radv_pipeline_layout *pipeline_layout = ctx->options->layout;
|
||||
struct radv_descriptor_set_layout *layout = pipeline_layout->set[desc_set].layout;
|
||||
unsigned base_offset = layout->binding[binding].offset;
|
||||
LLVMValueRef offset, stride;
|
||||
|
||||
if (layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
|
||||
layout->binding[binding].type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
|
||||
unsigned idx = pipeline_layout->set[desc_set].dynamic_offset_start +
|
||||
layout->binding[binding].dynamic_offset_offset;
|
||||
desc_ptr = ac_get_arg(&ctx->ac, ctx->args->ac.push_constants);
|
||||
base_offset = pipeline_layout->push_constant_size + 16 * idx;
|
||||
stride = LLVMConstInt(ctx->ac.i32, 16, false);
|
||||
} else
|
||||
stride = LLVMConstInt(ctx->ac.i32, layout->binding[binding].size, false);
|
||||
|
||||
offset = LLVMConstInt(ctx->ac.i32, base_offset, false);
|
||||
|
||||
if (layout->binding[binding].type != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
|
||||
offset = ac_build_imad(&ctx->ac, index, stride, offset);
|
||||
}
|
||||
|
||||
desc_ptr = LLVMBuildPtrToInt(ctx->ac.builder, desc_ptr, ctx->ac.i32, "");
|
||||
|
||||
LLVMValueRef res[] = {desc_ptr, offset, ctx->ac.i32_0};
|
||||
return ac_build_gather_values(&ctx->ac, res, 3);
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
radv_get_sample_pos_offset(uint32_t num_samples)
|
||||
{
|
||||
@@ -2303,7 +2270,6 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
|
||||
ctx.abi.load_ubo = radv_load_ubo;
|
||||
ctx.abi.load_ssbo = radv_load_ssbo;
|
||||
ctx.abi.load_sampler_desc = radv_get_sampler_desc;
|
||||
ctx.abi.load_resource = radv_load_resource;
|
||||
ctx.abi.load_ring_tess_factors = load_ring_tess_factors;
|
||||
ctx.abi.load_ring_tess_offchip = load_ring_tess_offchip;
|
||||
ctx.abi.load_ring_esgs = load_ring_esgs;
|
||||
|
@@ -4067,8 +4067,6 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_pipeline_layout
|
||||
}
|
||||
NIR_PASS_V(nir[i], nir_lower_memory_model);
|
||||
|
||||
bool lower_to_scalar = false;
|
||||
|
||||
nir_load_store_vectorize_options vectorize_opts = {
|
||||
.modes = nir_var_mem_ssbo | nir_var_mem_ubo | nir_var_mem_push_const |
|
||||
nir_var_mem_shared | nir_var_mem_global,
|
||||
@@ -4084,16 +4082,26 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_pipeline_layout
|
||||
if (nir_opt_load_store_vectorize(nir[i], &vectorize_opts)) {
|
||||
NIR_PASS_V(nir[i], nir_copy_prop);
|
||||
nir_opt_shrink_stores(nir[i], !device->instance->disable_shrink_image_store);
|
||||
lower_to_scalar = true;
|
||||
|
||||
/* Gather info again, to update whether 8/16-bit are used. */
|
||||
nir_shader_gather_info(nir[i], nir_shader_get_entrypoint(nir[i]));
|
||||
}
|
||||
|
||||
struct radv_shader_info *info = &infos[i];
|
||||
if (pipeline->device->physical_device->rad_info.chip_class >= GFX9) {
|
||||
if (i == MESA_SHADER_VERTEX && nir[MESA_SHADER_TESS_CTRL])
|
||||
info = &infos[MESA_SHADER_TESS_CTRL];
|
||||
else if (i == MESA_SHADER_VERTEX && nir[MESA_SHADER_GEOMETRY])
|
||||
info = &infos[MESA_SHADER_GEOMETRY];
|
||||
else if (i == MESA_SHADER_TESS_EVAL && nir[MESA_SHADER_GEOMETRY])
|
||||
info = &infos[MESA_SHADER_GEOMETRY];
|
||||
}
|
||||
NIR_PASS_V(nir[i], radv_nir_apply_pipeline_layout, device, pipeline_layout, info,
|
||||
&args[i]);
|
||||
|
||||
nir_opt_shrink_vectors(nir[i]);
|
||||
|
||||
if (lower_to_scalar)
|
||||
nir_lower_alu_to_scalar(nir[i], NULL, NULL);
|
||||
nir_lower_alu_to_scalar(nir[i], NULL, NULL);
|
||||
|
||||
/* lower ALU operations */
|
||||
nir_lower_int64(nir[i]);
|
||||
|
@@ -290,13 +290,6 @@ radv_compiler_debug(void *private_data, enum radv_compiler_debug_level level, co
|
||||
&debug_data->module->base, 0, 0, "radv", message);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
convert_pointer_to_64(nir_builder *b, const struct radv_physical_device *pdev, nir_ssa_def *ptr)
|
||||
{
|
||||
nir_ssa_def *comp[] = {ptr, nir_imm_int(b, pdev->rad_info.address32_hi)};
|
||||
return nir_pack_64_2x32(b, nir_vec(b, comp, 2));
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_intrinsics(nir_shader *nir, const struct radv_pipeline_key *key,
|
||||
const struct radv_pipeline_layout *layout, const struct radv_physical_device *pdev)
|
||||
@@ -317,44 +310,6 @@ lower_intrinsics(nir_shader *nir, const struct radv_pipeline_key *key,
|
||||
|
||||
nir_ssa_def *def = NULL;
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_load_vulkan_descriptor:
|
||||
if (nir_intrinsic_desc_type(intrin) == VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR) {
|
||||
nir_ssa_def *addr =
|
||||
convert_pointer_to_64(&b, pdev,
|
||||
nir_iadd(&b, nir_channel(&b, intrin->src[0].ssa, 0),
|
||||
nir_channel(&b, intrin->src[0].ssa, 1)));
|
||||
|
||||
def = nir_build_load_global(&b, 1, 64, addr, .access = ACCESS_NON_WRITEABLE);
|
||||
} else {
|
||||
def = nir_vector_insert_imm(&b, intrin->src[0].ssa, nir_imm_int(&b, 0), 2);
|
||||
}
|
||||
break;
|
||||
case nir_intrinsic_vulkan_resource_index: {
|
||||
unsigned desc_set = nir_intrinsic_desc_set(intrin);
|
||||
unsigned binding = nir_intrinsic_binding(intrin);
|
||||
struct radv_descriptor_set_layout *desc_layout = layout->set[desc_set].layout;
|
||||
|
||||
nir_ssa_def *new_res = nir_vulkan_resource_index(
|
||||
&b, 3, 32, intrin->src[0].ssa, .desc_set = desc_set, .binding = binding,
|
||||
.desc_type = nir_intrinsic_desc_type(intrin));
|
||||
|
||||
nir_ssa_def *stride;
|
||||
if (desc_layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
|
||||
desc_layout->binding[binding].type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
|
||||
stride = nir_imm_int(&b, 16);
|
||||
} else {
|
||||
stride = nir_imm_int(&b, desc_layout->binding[binding].size);
|
||||
}
|
||||
def = nir_vector_insert_imm(&b, new_res, stride, 2);
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_vulkan_resource_reindex: {
|
||||
nir_ssa_def *binding_ptr = nir_channel(&b, intrin->src[0].ssa, 1);
|
||||
nir_ssa_def *stride = nir_channel(&b, intrin->src[0].ssa, 2);
|
||||
binding_ptr = nir_iadd(&b, binding_ptr, nir_imul(&b, intrin->src[1].ssa, stride));
|
||||
def = nir_vector_insert_imm(&b, intrin->src[0].ssa, binding_ptr, 1);
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_is_sparse_texels_resident:
|
||||
def = nir_ieq_imm(&b, intrin->src[0].ssa, 0);
|
||||
break;
|
||||
|
@@ -510,6 +510,11 @@ bool radv_nir_lower_ycbcr_textures(nir_shader *shader, const struct radv_pipelin
|
||||
|
||||
bool radv_nir_lower_ray_queries(nir_shader *shader, struct radv_device *device);
|
||||
|
||||
void radv_nir_apply_pipeline_layout(nir_shader *shader, struct radv_device *device,
|
||||
const struct radv_pipeline_layout *layout,
|
||||
const struct radv_shader_info *info,
|
||||
const struct radv_shader_args *args);
|
||||
|
||||
nir_shader *radv_shader_compile_to_nir(struct radv_device *device, struct vk_shader_module *module,
|
||||
const char *entrypoint_name, gl_shader_stage stage,
|
||||
const VkSpecializationInfo *spec_info,
|
||||
|
Reference in New Issue
Block a user