anv: reduce push constant size for descriptor sets
Now that descriptor sets are located a in a 1Gb area, we can avoid storing the whole address to the descriptor and add the base address of the area to a 32bit offset. Replay a bunch of fossils with this and changes not really significant one way or another : Totals: Instrs: 9278246 -> 9277148 (-0.01%); split: -0.01%, +0.00% Cycles: 3547598421 -> 3547579435 (-0.00%); split: -0.00%, +0.00% Totals from 353 (1.14% of 31021) affected shaders: Instrs: 581546 -> 580448 (-0.19%); split: -0.23%, +0.04% Cycles: 25885422 -> 25866436 (-0.07%); split: -0.31%, +0.24% No difference on send messages or spills/fills. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21645>
This commit is contained in:

committed by
Marge Bot

parent
d2c0147228
commit
3f1ff326e0
@@ -737,6 +737,7 @@ enum brw_shader_reloc_id {
|
|||||||
BRW_SHADER_RELOC_SHADER_START_OFFSET,
|
BRW_SHADER_RELOC_SHADER_START_OFFSET,
|
||||||
BRW_SHADER_RELOC_RESUME_SBT_ADDR_LOW,
|
BRW_SHADER_RELOC_RESUME_SBT_ADDR_LOW,
|
||||||
BRW_SHADER_RELOC_RESUME_SBT_ADDR_HIGH,
|
BRW_SHADER_RELOC_RESUME_SBT_ADDR_HIGH,
|
||||||
|
BRW_SHADER_RELOC_DESCRIPTORS_ADDR_HIGH,
|
||||||
};
|
};
|
||||||
|
|
||||||
enum brw_shader_reloc_type {
|
enum brw_shader_reloc_type {
|
||||||
|
@@ -455,9 +455,9 @@ void anv_CmdBindPipeline(
|
|||||||
|
|
||||||
assert(layout->set[s].dynamic_offset_start < MAX_DYNAMIC_BUFFERS);
|
assert(layout->set[s].dynamic_offset_start < MAX_DYNAMIC_BUFFERS);
|
||||||
if (layout->set[s].layout->dynamic_offset_count > 0 &&
|
if (layout->set[s].layout->dynamic_offset_count > 0 &&
|
||||||
(push->desc_sets[s] & ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK) != layout->set[s].dynamic_offset_start) {
|
(push->desc_offsets[s] & ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK) != layout->set[s].dynamic_offset_start) {
|
||||||
push->desc_sets[s] &= ~ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK;
|
push->desc_offsets[s] &= ~ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK;
|
||||||
push->desc_sets[s] |= (layout->set[s].dynamic_offset_start &
|
push->desc_offsets[s] |= (layout->set[s].dynamic_offset_start &
|
||||||
ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK);
|
ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK);
|
||||||
modified = true;
|
modified = true;
|
||||||
}
|
}
|
||||||
@@ -586,15 +586,17 @@ anv_cmd_buffer_bind_descriptor_set(struct anv_cmd_buffer *cmd_buffer,
|
|||||||
if (update_desc_sets) {
|
if (update_desc_sets) {
|
||||||
struct anv_push_constants *push = &pipe_state->push_constants;
|
struct anv_push_constants *push = &pipe_state->push_constants;
|
||||||
|
|
||||||
struct anv_address addr = anv_descriptor_set_address(set);
|
struct anv_address set_addr = anv_descriptor_set_address(set);
|
||||||
push->desc_sets[set_index] &= ~ANV_DESCRIPTOR_SET_ADDRESS_MASK;
|
uint64_t addr = anv_address_physical(set_addr);
|
||||||
push->desc_sets[set_index] |= (anv_address_physical(addr) &
|
uint32_t offset = addr & 0xffffffff;
|
||||||
ANV_DESCRIPTOR_SET_ADDRESS_MASK);
|
assert((offset & ~ANV_DESCRIPTOR_SET_OFFSET_MASK) == 0);
|
||||||
|
push->desc_offsets[set_index] &= ~ANV_DESCRIPTOR_SET_OFFSET_MASK;
|
||||||
|
push->desc_offsets[set_index] |= offset;
|
||||||
|
|
||||||
if (addr.bo) {
|
if (set_addr.bo) {
|
||||||
anv_reloc_list_add_bo(cmd_buffer->batch.relocs,
|
anv_reloc_list_add_bo(cmd_buffer->batch.relocs,
|
||||||
cmd_buffer->batch.alloc,
|
cmd_buffer->batch.alloc,
|
||||||
addr.bo);
|
set_addr.bo);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -1017,10 +1017,11 @@ VkResult anv_CreateDescriptorPool(
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
VkResult result = anv_device_alloc_bo(device,
|
VkResult result = anv_device_alloc_bo(device,
|
||||||
"descriptors",
|
"indirect descriptors",
|
||||||
descriptor_bo_size,
|
descriptor_bo_size,
|
||||||
ANV_BO_ALLOC_MAPPED |
|
ANV_BO_ALLOC_MAPPED |
|
||||||
ANV_BO_ALLOC_SNOOPED,
|
ANV_BO_ALLOC_SNOOPED |
|
||||||
|
ANV_BO_ALLOC_DESCRIPTOR_POOL,
|
||||||
0 /* explicit_address */,
|
0 /* explicit_address */,
|
||||||
&pool->bo);
|
&pool->bo);
|
||||||
if (result != VK_SUCCESS) {
|
if (result != VK_SUCCESS) {
|
||||||
|
@@ -70,10 +70,10 @@ anv_nir_compute_push_layout(nir_shader *nir,
|
|||||||
|
|
||||||
case nir_intrinsic_load_desc_set_address_intel:
|
case nir_intrinsic_load_desc_set_address_intel:
|
||||||
case nir_intrinsic_load_desc_set_dynamic_index_intel: {
|
case nir_intrinsic_load_desc_set_dynamic_index_intel: {
|
||||||
unsigned base = offsetof(struct anv_push_constants, desc_sets);
|
unsigned base = offsetof(struct anv_push_constants, desc_offsets);
|
||||||
push_start = MIN2(push_start, base);
|
push_start = MIN2(push_start, base);
|
||||||
push_end = MAX2(push_end, base +
|
push_end = MAX2(push_end, base +
|
||||||
sizeof_field(struct anv_push_constants, desc_sets));
|
sizeof_field(struct anv_push_constants, desc_offsets));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -177,27 +177,30 @@ anv_nir_compute_push_layout(nir_shader *nir,
|
|||||||
|
|
||||||
case nir_intrinsic_load_desc_set_address_intel: {
|
case nir_intrinsic_load_desc_set_address_intel: {
|
||||||
b->cursor = nir_before_instr(&intrin->instr);
|
b->cursor = nir_before_instr(&intrin->instr);
|
||||||
nir_ssa_def *pc_load = nir_load_uniform(b, 1, 64,
|
nir_ssa_def *pc_load = nir_load_uniform(b, 1, 32,
|
||||||
nir_imul_imm(b, intrin->src[0].ssa, sizeof(uint64_t)),
|
nir_imul_imm(b, intrin->src[0].ssa, sizeof(uint32_t)),
|
||||||
.base = offsetof(struct anv_push_constants, desc_sets),
|
.base = offsetof(struct anv_push_constants, desc_offsets),
|
||||||
.range = sizeof_field(struct anv_push_constants, desc_sets),
|
.range = sizeof_field(struct anv_push_constants, desc_offsets),
|
||||||
.dest_type = nir_type_uint64);
|
.dest_type = nir_type_uint32);
|
||||||
pc_load = nir_iand_imm(b, pc_load, ANV_DESCRIPTOR_SET_ADDRESS_MASK);
|
pc_load = nir_iand_imm(b, pc_load, ANV_DESCRIPTOR_SET_OFFSET_MASK);
|
||||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, pc_load);
|
nir_ssa_def *desc_addr =
|
||||||
|
nir_pack_64_2x32_split(
|
||||||
|
b, pc_load,
|
||||||
|
nir_load_reloc_const_intel(
|
||||||
|
b, BRW_SHADER_RELOC_DESCRIPTORS_ADDR_HIGH));
|
||||||
|
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, desc_addr);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case nir_intrinsic_load_desc_set_dynamic_index_intel: {
|
case nir_intrinsic_load_desc_set_dynamic_index_intel: {
|
||||||
b->cursor = nir_before_instr(&intrin->instr);
|
b->cursor = nir_before_instr(&intrin->instr);
|
||||||
nir_ssa_def *pc_load = nir_load_uniform(b, 1, 64,
|
nir_ssa_def *pc_load = nir_load_uniform(b, 1, 32,
|
||||||
nir_imul_imm(b, intrin->src[0].ssa, sizeof(uint64_t)),
|
nir_imul_imm(b, intrin->src[0].ssa, sizeof(uint32_t)),
|
||||||
.base = offsetof(struct anv_push_constants, desc_sets),
|
.base = offsetof(struct anv_push_constants, desc_offsets),
|
||||||
.range = sizeof_field(struct anv_push_constants, desc_sets),
|
.range = sizeof_field(struct anv_push_constants, desc_offsets),
|
||||||
.dest_type = nir_type_uint64);
|
.dest_type = nir_type_uint32);
|
||||||
pc_load = nir_i2i32(
|
pc_load = nir_iand_imm(
|
||||||
b,
|
b, pc_load, ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK);
|
||||||
nir_iand_imm(
|
|
||||||
b, pc_load, ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK));
|
|
||||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, pc_load);
|
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, pc_load);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@@ -122,7 +122,14 @@ anv_shader_bin_create(struct anv_device *device,
|
|||||||
prog_data_in->const_data_offset;
|
prog_data_in->const_data_offset;
|
||||||
|
|
||||||
int rv_count = 0;
|
int rv_count = 0;
|
||||||
struct brw_shader_reloc_value reloc_values[5];
|
struct brw_shader_reloc_value reloc_values[6];
|
||||||
|
assert((device->physical->va.instruction_state_pool.addr & 0xffffffff) == 0);
|
||||||
|
reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
|
||||||
|
.id = BRW_SHADER_RELOC_DESCRIPTORS_ADDR_HIGH,
|
||||||
|
.value = device->physical->indirect_descriptors ?
|
||||||
|
(device->physical->va.descriptor_pool.addr >> 32) :
|
||||||
|
(device->physical->va.binding_table_pool.addr >> 32),
|
||||||
|
};
|
||||||
reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
|
reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
|
||||||
.id = BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW,
|
.id = BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW,
|
||||||
.value = shader_data_addr,
|
.value = shader_data_addr,
|
||||||
|
@@ -2444,15 +2444,18 @@ struct anv_push_constants {
|
|||||||
/** Ray query globals (RT_DISPATCH_GLOBALS) */
|
/** Ray query globals (RT_DISPATCH_GLOBALS) */
|
||||||
uint64_t ray_query_globals;
|
uint64_t ray_query_globals;
|
||||||
|
|
||||||
#define ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK ((uint64_t)ANV_UBO_ALIGNMENT - 1)
|
#define ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK ((uint32_t)ANV_UBO_ALIGNMENT - 1)
|
||||||
#define ANV_DESCRIPTOR_SET_ADDRESS_MASK (~(uint64_t)(ANV_UBO_ALIGNMENT - 1))
|
#define ANV_DESCRIPTOR_SET_OFFSET_MASK (~(uint32_t)(ANV_UBO_ALIGNMENT - 1))
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
* Base offsets for descriptor sets from
|
||||||
|
* INDIRECT_DESCRIPTOR_POOL_MIN_ADDRESS
|
||||||
|
*
|
||||||
* In bits [0:5] : dynamic offset index in dynamic_offsets[] for the set
|
* In bits [0:5] : dynamic offset index in dynamic_offsets[] for the set
|
||||||
*
|
*
|
||||||
* In bits [6:63] : descriptor set address
|
* In bits [6:63] : descriptor set address
|
||||||
*/
|
*/
|
||||||
uint64_t desc_sets[MAX_SETS];
|
uint32_t desc_offsets[MAX_SETS];
|
||||||
|
|
||||||
union {
|
union {
|
||||||
struct {
|
struct {
|
||||||
|
Reference in New Issue
Block a user