anv: reduce push constant size for descriptor sets

Now that descriptor sets are located a in a 1Gb area, we can avoid
storing the whole address to the descriptor and add the base address
of the area to a 32bit offset.

Replay a bunch of fossils with this and changes not really significant
one way or another :

Totals:
Instrs: 9278246 -> 9277148 (-0.01%); split: -0.01%, +0.00%
Cycles: 3547598421 -> 3547579435 (-0.00%); split: -0.00%, +0.00%

Totals from 353 (1.14% of 31021) affected shaders:
Instrs: 581546 -> 580448 (-0.19%); split: -0.23%, +0.04%
Cycles: 25885422 -> 25866436 (-0.07%); split: -0.31%, +0.24%

No difference on send messages or spills/fills.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21645>
This commit is contained in:
Lionel Landwerlin
2023-03-15 16:10:25 +02:00
committed by Marge Bot
parent d2c0147228
commit 3f1ff326e0
6 changed files with 51 additions and 34 deletions

View File

@@ -737,6 +737,7 @@ enum brw_shader_reloc_id {
BRW_SHADER_RELOC_SHADER_START_OFFSET, BRW_SHADER_RELOC_SHADER_START_OFFSET,
BRW_SHADER_RELOC_RESUME_SBT_ADDR_LOW, BRW_SHADER_RELOC_RESUME_SBT_ADDR_LOW,
BRW_SHADER_RELOC_RESUME_SBT_ADDR_HIGH, BRW_SHADER_RELOC_RESUME_SBT_ADDR_HIGH,
BRW_SHADER_RELOC_DESCRIPTORS_ADDR_HIGH,
}; };
enum brw_shader_reloc_type { enum brw_shader_reloc_type {

View File

@@ -455,10 +455,10 @@ void anv_CmdBindPipeline(
assert(layout->set[s].dynamic_offset_start < MAX_DYNAMIC_BUFFERS); assert(layout->set[s].dynamic_offset_start < MAX_DYNAMIC_BUFFERS);
if (layout->set[s].layout->dynamic_offset_count > 0 && if (layout->set[s].layout->dynamic_offset_count > 0 &&
(push->desc_sets[s] & ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK) != layout->set[s].dynamic_offset_start) { (push->desc_offsets[s] & ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK) != layout->set[s].dynamic_offset_start) {
push->desc_sets[s] &= ~ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK; push->desc_offsets[s] &= ~ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK;
push->desc_sets[s] |= (layout->set[s].dynamic_offset_start & push->desc_offsets[s] |= (layout->set[s].dynamic_offset_start &
ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK); ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK);
modified = true; modified = true;
} }
} }
@@ -586,15 +586,17 @@ anv_cmd_buffer_bind_descriptor_set(struct anv_cmd_buffer *cmd_buffer,
if (update_desc_sets) { if (update_desc_sets) {
struct anv_push_constants *push = &pipe_state->push_constants; struct anv_push_constants *push = &pipe_state->push_constants;
struct anv_address addr = anv_descriptor_set_address(set); struct anv_address set_addr = anv_descriptor_set_address(set);
push->desc_sets[set_index] &= ~ANV_DESCRIPTOR_SET_ADDRESS_MASK; uint64_t addr = anv_address_physical(set_addr);
push->desc_sets[set_index] |= (anv_address_physical(addr) & uint32_t offset = addr & 0xffffffff;
ANV_DESCRIPTOR_SET_ADDRESS_MASK); assert((offset & ~ANV_DESCRIPTOR_SET_OFFSET_MASK) == 0);
push->desc_offsets[set_index] &= ~ANV_DESCRIPTOR_SET_OFFSET_MASK;
push->desc_offsets[set_index] |= offset;
if (addr.bo) { if (set_addr.bo) {
anv_reloc_list_add_bo(cmd_buffer->batch.relocs, anv_reloc_list_add_bo(cmd_buffer->batch.relocs,
cmd_buffer->batch.alloc, cmd_buffer->batch.alloc,
addr.bo); set_addr.bo);
} }
} }

View File

@@ -1017,10 +1017,11 @@ VkResult anv_CreateDescriptorPool(
} }
} else { } else {
VkResult result = anv_device_alloc_bo(device, VkResult result = anv_device_alloc_bo(device,
"descriptors", "indirect descriptors",
descriptor_bo_size, descriptor_bo_size,
ANV_BO_ALLOC_MAPPED | ANV_BO_ALLOC_MAPPED |
ANV_BO_ALLOC_SNOOPED, ANV_BO_ALLOC_SNOOPED |
ANV_BO_ALLOC_DESCRIPTOR_POOL,
0 /* explicit_address */, 0 /* explicit_address */,
&pool->bo); &pool->bo);
if (result != VK_SUCCESS) { if (result != VK_SUCCESS) {

View File

@@ -70,10 +70,10 @@ anv_nir_compute_push_layout(nir_shader *nir,
case nir_intrinsic_load_desc_set_address_intel: case nir_intrinsic_load_desc_set_address_intel:
case nir_intrinsic_load_desc_set_dynamic_index_intel: { case nir_intrinsic_load_desc_set_dynamic_index_intel: {
unsigned base = offsetof(struct anv_push_constants, desc_sets); unsigned base = offsetof(struct anv_push_constants, desc_offsets);
push_start = MIN2(push_start, base); push_start = MIN2(push_start, base);
push_end = MAX2(push_end, base + push_end = MAX2(push_end, base +
sizeof_field(struct anv_push_constants, desc_sets)); sizeof_field(struct anv_push_constants, desc_offsets));
break; break;
} }
@@ -177,27 +177,30 @@ anv_nir_compute_push_layout(nir_shader *nir,
case nir_intrinsic_load_desc_set_address_intel: { case nir_intrinsic_load_desc_set_address_intel: {
b->cursor = nir_before_instr(&intrin->instr); b->cursor = nir_before_instr(&intrin->instr);
nir_ssa_def *pc_load = nir_load_uniform(b, 1, 64, nir_ssa_def *pc_load = nir_load_uniform(b, 1, 32,
nir_imul_imm(b, intrin->src[0].ssa, sizeof(uint64_t)), nir_imul_imm(b, intrin->src[0].ssa, sizeof(uint32_t)),
.base = offsetof(struct anv_push_constants, desc_sets), .base = offsetof(struct anv_push_constants, desc_offsets),
.range = sizeof_field(struct anv_push_constants, desc_sets), .range = sizeof_field(struct anv_push_constants, desc_offsets),
.dest_type = nir_type_uint64); .dest_type = nir_type_uint32);
pc_load = nir_iand_imm(b, pc_load, ANV_DESCRIPTOR_SET_ADDRESS_MASK); pc_load = nir_iand_imm(b, pc_load, ANV_DESCRIPTOR_SET_OFFSET_MASK);
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, pc_load); nir_ssa_def *desc_addr =
nir_pack_64_2x32_split(
b, pc_load,
nir_load_reloc_const_intel(
b, BRW_SHADER_RELOC_DESCRIPTORS_ADDR_HIGH));
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, desc_addr);
break; break;
} }
case nir_intrinsic_load_desc_set_dynamic_index_intel: { case nir_intrinsic_load_desc_set_dynamic_index_intel: {
b->cursor = nir_before_instr(&intrin->instr); b->cursor = nir_before_instr(&intrin->instr);
nir_ssa_def *pc_load = nir_load_uniform(b, 1, 64, nir_ssa_def *pc_load = nir_load_uniform(b, 1, 32,
nir_imul_imm(b, intrin->src[0].ssa, sizeof(uint64_t)), nir_imul_imm(b, intrin->src[0].ssa, sizeof(uint32_t)),
.base = offsetof(struct anv_push_constants, desc_sets), .base = offsetof(struct anv_push_constants, desc_offsets),
.range = sizeof_field(struct anv_push_constants, desc_sets), .range = sizeof_field(struct anv_push_constants, desc_offsets),
.dest_type = nir_type_uint64); .dest_type = nir_type_uint32);
pc_load = nir_i2i32( pc_load = nir_iand_imm(
b, b, pc_load, ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK);
nir_iand_imm(
b, pc_load, ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK));
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, pc_load); nir_ssa_def_rewrite_uses(&intrin->dest.ssa, pc_load);
break; break;
} }

View File

@@ -122,7 +122,14 @@ anv_shader_bin_create(struct anv_device *device,
prog_data_in->const_data_offset; prog_data_in->const_data_offset;
int rv_count = 0; int rv_count = 0;
struct brw_shader_reloc_value reloc_values[5]; struct brw_shader_reloc_value reloc_values[6];
assert((device->physical->va.instruction_state_pool.addr & 0xffffffff) == 0);
reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
.id = BRW_SHADER_RELOC_DESCRIPTORS_ADDR_HIGH,
.value = device->physical->indirect_descriptors ?
(device->physical->va.descriptor_pool.addr >> 32) :
(device->physical->va.binding_table_pool.addr >> 32),
};
reloc_values[rv_count++] = (struct brw_shader_reloc_value) { reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
.id = BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW, .id = BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW,
.value = shader_data_addr, .value = shader_data_addr,

View File

@@ -2444,15 +2444,18 @@ struct anv_push_constants {
/** Ray query globals (RT_DISPATCH_GLOBALS) */ /** Ray query globals (RT_DISPATCH_GLOBALS) */
uint64_t ray_query_globals; uint64_t ray_query_globals;
#define ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK ((uint64_t)ANV_UBO_ALIGNMENT - 1) #define ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK ((uint32_t)ANV_UBO_ALIGNMENT - 1)
#define ANV_DESCRIPTOR_SET_ADDRESS_MASK (~(uint64_t)(ANV_UBO_ALIGNMENT - 1)) #define ANV_DESCRIPTOR_SET_OFFSET_MASK (~(uint32_t)(ANV_UBO_ALIGNMENT - 1))
/** /**
* Base offsets for descriptor sets from
* INDIRECT_DESCRIPTOR_POOL_MIN_ADDRESS
*
* In bits [0:5] : dynamic offset index in dynamic_offsets[] for the set * In bits [0:5] : dynamic offset index in dynamic_offsets[] for the set
* *
* In bits [6:63] : descriptor set address * In bits [6:63] : descriptor set address
*/ */
uint64_t desc_sets[MAX_SETS]; uint32_t desc_offsets[MAX_SETS];
union { union {
struct { struct {