anv: reduce push constant size for descriptor sets

Now that descriptor sets are located a in a 1Gb area, we can avoid
storing the whole address to the descriptor and add the base address
of the area to a 32bit offset.

Replay a bunch of fossils with this and changes not really significant
one way or another :

Totals:
Instrs: 9278246 -> 9277148 (-0.01%); split: -0.01%, +0.00%
Cycles: 3547598421 -> 3547579435 (-0.00%); split: -0.00%, +0.00%

Totals from 353 (1.14% of 31021) affected shaders:
Instrs: 581546 -> 580448 (-0.19%); split: -0.23%, +0.04%
Cycles: 25885422 -> 25866436 (-0.07%); split: -0.31%, +0.24%

No difference on send messages or spills/fills.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21645>
This commit is contained in:
Lionel Landwerlin
2023-03-15 16:10:25 +02:00
committed by Marge Bot
parent d2c0147228
commit 3f1ff326e0
6 changed files with 51 additions and 34 deletions

View File

@@ -737,6 +737,7 @@ enum brw_shader_reloc_id {
BRW_SHADER_RELOC_SHADER_START_OFFSET,
BRW_SHADER_RELOC_RESUME_SBT_ADDR_LOW,
BRW_SHADER_RELOC_RESUME_SBT_ADDR_HIGH,
BRW_SHADER_RELOC_DESCRIPTORS_ADDR_HIGH,
};
enum brw_shader_reloc_type {

View File

@@ -455,9 +455,9 @@ void anv_CmdBindPipeline(
assert(layout->set[s].dynamic_offset_start < MAX_DYNAMIC_BUFFERS);
if (layout->set[s].layout->dynamic_offset_count > 0 &&
(push->desc_sets[s] & ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK) != layout->set[s].dynamic_offset_start) {
push->desc_sets[s] &= ~ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK;
push->desc_sets[s] |= (layout->set[s].dynamic_offset_start &
(push->desc_offsets[s] & ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK) != layout->set[s].dynamic_offset_start) {
push->desc_offsets[s] &= ~ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK;
push->desc_offsets[s] |= (layout->set[s].dynamic_offset_start &
ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK);
modified = true;
}
@@ -586,15 +586,17 @@ anv_cmd_buffer_bind_descriptor_set(struct anv_cmd_buffer *cmd_buffer,
if (update_desc_sets) {
struct anv_push_constants *push = &pipe_state->push_constants;
struct anv_address addr = anv_descriptor_set_address(set);
push->desc_sets[set_index] &= ~ANV_DESCRIPTOR_SET_ADDRESS_MASK;
push->desc_sets[set_index] |= (anv_address_physical(addr) &
ANV_DESCRIPTOR_SET_ADDRESS_MASK);
struct anv_address set_addr = anv_descriptor_set_address(set);
uint64_t addr = anv_address_physical(set_addr);
uint32_t offset = addr & 0xffffffff;
assert((offset & ~ANV_DESCRIPTOR_SET_OFFSET_MASK) == 0);
push->desc_offsets[set_index] &= ~ANV_DESCRIPTOR_SET_OFFSET_MASK;
push->desc_offsets[set_index] |= offset;
if (addr.bo) {
if (set_addr.bo) {
anv_reloc_list_add_bo(cmd_buffer->batch.relocs,
cmd_buffer->batch.alloc,
addr.bo);
set_addr.bo);
}
}

View File

@@ -1017,10 +1017,11 @@ VkResult anv_CreateDescriptorPool(
}
} else {
VkResult result = anv_device_alloc_bo(device,
"descriptors",
"indirect descriptors",
descriptor_bo_size,
ANV_BO_ALLOC_MAPPED |
ANV_BO_ALLOC_SNOOPED,
ANV_BO_ALLOC_SNOOPED |
ANV_BO_ALLOC_DESCRIPTOR_POOL,
0 /* explicit_address */,
&pool->bo);
if (result != VK_SUCCESS) {

View File

@@ -70,10 +70,10 @@ anv_nir_compute_push_layout(nir_shader *nir,
case nir_intrinsic_load_desc_set_address_intel:
case nir_intrinsic_load_desc_set_dynamic_index_intel: {
unsigned base = offsetof(struct anv_push_constants, desc_sets);
unsigned base = offsetof(struct anv_push_constants, desc_offsets);
push_start = MIN2(push_start, base);
push_end = MAX2(push_end, base +
sizeof_field(struct anv_push_constants, desc_sets));
sizeof_field(struct anv_push_constants, desc_offsets));
break;
}
@@ -177,27 +177,30 @@ anv_nir_compute_push_layout(nir_shader *nir,
case nir_intrinsic_load_desc_set_address_intel: {
b->cursor = nir_before_instr(&intrin->instr);
nir_ssa_def *pc_load = nir_load_uniform(b, 1, 64,
nir_imul_imm(b, intrin->src[0].ssa, sizeof(uint64_t)),
.base = offsetof(struct anv_push_constants, desc_sets),
.range = sizeof_field(struct anv_push_constants, desc_sets),
.dest_type = nir_type_uint64);
pc_load = nir_iand_imm(b, pc_load, ANV_DESCRIPTOR_SET_ADDRESS_MASK);
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, pc_load);
nir_ssa_def *pc_load = nir_load_uniform(b, 1, 32,
nir_imul_imm(b, intrin->src[0].ssa, sizeof(uint32_t)),
.base = offsetof(struct anv_push_constants, desc_offsets),
.range = sizeof_field(struct anv_push_constants, desc_offsets),
.dest_type = nir_type_uint32);
pc_load = nir_iand_imm(b, pc_load, ANV_DESCRIPTOR_SET_OFFSET_MASK);
nir_ssa_def *desc_addr =
nir_pack_64_2x32_split(
b, pc_load,
nir_load_reloc_const_intel(
b, BRW_SHADER_RELOC_DESCRIPTORS_ADDR_HIGH));
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, desc_addr);
break;
}
case nir_intrinsic_load_desc_set_dynamic_index_intel: {
b->cursor = nir_before_instr(&intrin->instr);
nir_ssa_def *pc_load = nir_load_uniform(b, 1, 64,
nir_imul_imm(b, intrin->src[0].ssa, sizeof(uint64_t)),
.base = offsetof(struct anv_push_constants, desc_sets),
.range = sizeof_field(struct anv_push_constants, desc_sets),
.dest_type = nir_type_uint64);
pc_load = nir_i2i32(
b,
nir_iand_imm(
b, pc_load, ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK));
nir_ssa_def *pc_load = nir_load_uniform(b, 1, 32,
nir_imul_imm(b, intrin->src[0].ssa, sizeof(uint32_t)),
.base = offsetof(struct anv_push_constants, desc_offsets),
.range = sizeof_field(struct anv_push_constants, desc_offsets),
.dest_type = nir_type_uint32);
pc_load = nir_iand_imm(
b, pc_load, ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK);
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, pc_load);
break;
}

View File

@@ -122,7 +122,14 @@ anv_shader_bin_create(struct anv_device *device,
prog_data_in->const_data_offset;
int rv_count = 0;
struct brw_shader_reloc_value reloc_values[5];
struct brw_shader_reloc_value reloc_values[6];
assert((device->physical->va.instruction_state_pool.addr & 0xffffffff) == 0);
reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
.id = BRW_SHADER_RELOC_DESCRIPTORS_ADDR_HIGH,
.value = device->physical->indirect_descriptors ?
(device->physical->va.descriptor_pool.addr >> 32) :
(device->physical->va.binding_table_pool.addr >> 32),
};
reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
.id = BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW,
.value = shader_data_addr,

View File

@@ -2444,15 +2444,18 @@ struct anv_push_constants {
/** Ray query globals (RT_DISPATCH_GLOBALS) */
uint64_t ray_query_globals;
#define ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK ((uint64_t)ANV_UBO_ALIGNMENT - 1)
#define ANV_DESCRIPTOR_SET_ADDRESS_MASK (~(uint64_t)(ANV_UBO_ALIGNMENT - 1))
#define ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK ((uint32_t)ANV_UBO_ALIGNMENT - 1)
#define ANV_DESCRIPTOR_SET_OFFSET_MASK (~(uint32_t)(ANV_UBO_ALIGNMENT - 1))
/**
* Base offsets for descriptor sets from
* INDIRECT_DESCRIPTOR_POOL_MIN_ADDRESS
*
* In bits [0:5] : dynamic offset index in dynamic_offsets[] for the set
*
* In bits [6:63] : descriptor set address
*/
uint64_t desc_sets[MAX_SETS];
uint32_t desc_offsets[MAX_SETS];
union {
struct {