radv: use a more relaxed alignment for upload buffer allocations

256 bytes was higher than necessary.

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8833>
This commit is contained in:
Rhys Perry
2021-02-02 15:28:37 +00:00
committed by Marge Bot
parent 9de8745399
commit d906c007d6
4 changed files with 29 additions and 31 deletions

View File

@@ -459,7 +459,7 @@ radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
unsigned fence_offset, eop_bug_offset; unsigned fence_offset, eop_bug_offset;
void *fence_ptr; void *fence_ptr;
radv_cmd_buffer_upload_alloc(cmd_buffer, 8, 8, &fence_offset, radv_cmd_buffer_upload_alloc(cmd_buffer, 8, &fence_offset,
&fence_ptr); &fence_ptr);
memset(fence_ptr, 0, 8); memset(fence_ptr, 0, 8);
@@ -469,7 +469,7 @@ radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) { if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) {
/* Allocate a buffer for the EOP bug on GFX9. */ /* Allocate a buffer for the EOP bug on GFX9. */
radv_cmd_buffer_upload_alloc(cmd_buffer, 16 * num_db, 8, radv_cmd_buffer_upload_alloc(cmd_buffer, 16 * num_db,
&eop_bug_offset, &fence_ptr); &eop_bug_offset, &fence_ptr);
memset(fence_ptr, 0, 16 * num_db); memset(fence_ptr, 0, 16 * num_db);
cmd_buffer->gfx9_eop_bug_va = cmd_buffer->gfx9_eop_bug_va =
@@ -548,14 +548,21 @@ radv_cmd_buffer_resize_upload_buf(struct radv_cmd_buffer *cmd_buffer,
bool bool
radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer,
unsigned size, unsigned size, unsigned *out_offset, void **ptr)
unsigned alignment,
unsigned *out_offset,
void **ptr)
{ {
assert(util_is_power_of_two_nonzero(alignment)); assert(size % 4 == 0);
struct radeon_info *rad_info = &cmd_buffer->device->physical_device->rad_info;
/* Align to the scalar cache line size if it results in this allocation
* being placed in less of them.
*/
unsigned offset = cmd_buffer->upload.offset;
unsigned line_size = rad_info->chip_class >= GFX10 ? 64 : 32;
unsigned gap = align(offset, line_size) - offset;
if ((size & (line_size - 1)) > gap)
offset = align(offset, line_size);
uint64_t offset = align(cmd_buffer->upload.offset, alignment);
if (offset + size > cmd_buffer->upload.size) { if (offset + size > cmd_buffer->upload.size) {
if (!radv_cmd_buffer_resize_upload_buf(cmd_buffer, size)) if (!radv_cmd_buffer_resize_upload_buf(cmd_buffer, size))
return false; return false;
@@ -571,13 +578,11 @@ radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer,
bool bool
radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer, radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer,
unsigned size, unsigned alignment, unsigned size, const void *data, unsigned *out_offset)
const void *data, unsigned *out_offset)
{ {
uint8_t *ptr; uint8_t *ptr;
if (!radv_cmd_buffer_upload_alloc(cmd_buffer, size, alignment, if (!radv_cmd_buffer_upload_alloc(cmd_buffer, size, out_offset, (void **)&ptr))
out_offset, (void **)&ptr))
return false; return false;
if (ptr) if (ptr)
@@ -2634,7 +2639,7 @@ radv_flush_push_descriptors(struct radv_cmd_buffer *cmd_buffer,
(struct radv_descriptor_set *)&descriptors_state->push_set.set; (struct radv_descriptor_set *)&descriptors_state->push_set.set;
unsigned bo_offset; unsigned bo_offset;
if (!radv_cmd_buffer_upload_data(cmd_buffer, set->header.size, 32, if (!radv_cmd_buffer_upload_data(cmd_buffer, set->header.size,
set->header.mapped_ptr, set->header.mapped_ptr,
&bo_offset)) &bo_offset))
return; return;
@@ -2653,8 +2658,7 @@ radv_flush_indirect_descriptor_sets(struct radv_cmd_buffer *cmd_buffer,
uint32_t offset; uint32_t offset;
void *ptr; void *ptr;
if (!radv_cmd_buffer_upload_alloc(cmd_buffer, size, if (!radv_cmd_buffer_upload_alloc(cmd_buffer, size, &offset, &ptr))
256, &offset, &ptr))
return; return;
for (unsigned i = 0; i < MAX_SETS; i++) { for (unsigned i = 0; i < MAX_SETS; i++) {
@@ -2798,8 +2802,7 @@ radv_flush_constants(struct radv_cmd_buffer *cmd_buffer,
if (need_push_constants) { if (need_push_constants) {
if (!radv_cmd_buffer_upload_alloc(cmd_buffer, layout->push_constant_size + if (!radv_cmd_buffer_upload_alloc(cmd_buffer, layout->push_constant_size +
16 * layout->dynamic_offset_count, 16 * layout->dynamic_offset_count, &offset, &ptr))
256, &offset, &ptr))
return; return;
memcpy(ptr, cmd_buffer->push_constants, layout->push_constant_size); memcpy(ptr, cmd_buffer->push_constants, layout->push_constant_size);
@@ -2847,7 +2850,7 @@ radv_flush_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer,
uint64_t va; uint64_t va;
/* allocate some descriptor state for vertex buffers */ /* allocate some descriptor state for vertex buffers */
if (!radv_cmd_buffer_upload_alloc(cmd_buffer, count * 16, 256, if (!radv_cmd_buffer_upload_alloc(cmd_buffer, count * 16,
&vb_offset, &vb_ptr)) &vb_offset, &vb_ptr))
return; return;
@@ -2970,7 +2973,7 @@ radv_flush_streamout_descriptors(struct radv_cmd_buffer *cmd_buffer)
/* Allocate some descriptor state for streamout buffers. */ /* Allocate some descriptor state for streamout buffers. */
if (!radv_cmd_buffer_upload_alloc(cmd_buffer, if (!radv_cmd_buffer_upload_alloc(cmd_buffer,
MAX_SO_BUFFERS * 16, 256, MAX_SO_BUFFERS * 16,
&so_offset, &so_ptr)) &so_offset, &so_ptr))
return; return;
@@ -4237,9 +4240,8 @@ void radv_meta_push_descriptor_set(
push_set->header.size = layout->set[set].layout->size; push_set->header.size = layout->set[set].layout->size;
push_set->header.layout = layout->set[set].layout; push_set->header.layout = layout->set[set].layout;
if (!radv_cmd_buffer_upload_alloc(cmd_buffer, push_set->header.size, 32, if (!radv_cmd_buffer_upload_alloc(cmd_buffer, push_set->header.size,
&bo_offset, &bo_offset, (void**) &push_set->header.mapped_ptr))
(void**) &push_set->header.mapped_ptr))
return; return;
push_set->header.va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); push_set->header.va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
@@ -6789,7 +6791,7 @@ void radv_CmdBeginConditionalRenderingEXT(
* Based on the conditionalrender demo, it's faster to do the * Based on the conditionalrender demo, it's faster to do the
* COPY_DATA in ME (+ sync PFP) instead of PFP. * COPY_DATA in ME (+ sync PFP) instead of PFP.
*/ */
radv_cmd_buffer_upload_data(cmd_buffer, 8, 16, &pred_value, &pred_offset); radv_cmd_buffer_upload_data(cmd_buffer, 8, &pred_value, &pred_offset);
pred_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + pred_offset; pred_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + pred_offset;

View File

@@ -546,7 +546,7 @@ void radv_CmdUpdateBuffer(
radv_cmd_buffer_trace_emit(cmd_buffer); radv_cmd_buffer_trace_emit(cmd_buffer);
} else { } else {
uint32_t buf_offset; uint32_t buf_offset;
radv_cmd_buffer_upload_data(cmd_buffer, dataSize, 32, pData, &buf_offset); radv_cmd_buffer_upload_data(cmd_buffer, dataSize, pData, &buf_offset);
radv_copy_buffer(cmd_buffer, cmd_buffer->upload.upload_bo, dst_buffer->bo, radv_copy_buffer(cmd_buffer, cmd_buffer->upload.upload_bo, dst_buffer->bo,
buf_offset, dstOffset + dst_buffer->offset, dataSize); buf_offset, dstOffset + dst_buffer->offset, dataSize);
} }

View File

@@ -1504,17 +1504,13 @@ void si_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer);
void radv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer); void radv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer);
bool bool
radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer,
unsigned size, unsigned size, unsigned *out_offset, void **ptr);
unsigned alignment,
unsigned *out_offset,
void **ptr);
void void
radv_cmd_buffer_set_subpass(struct radv_cmd_buffer *cmd_buffer, radv_cmd_buffer_set_subpass(struct radv_cmd_buffer *cmd_buffer,
const struct radv_subpass *subpass); const struct radv_subpass *subpass);
bool bool
radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer, radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer,
unsigned size, unsigned alignmnet, unsigned size, const void *data, unsigned *out_offset);
const void *data, unsigned *out_offset);
void radv_cmd_buffer_clear_subpass(struct radv_cmd_buffer *cmd_buffer); void radv_cmd_buffer_clear_subpass(struct radv_cmd_buffer *cmd_buffer);
void radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer); void radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer);

View File

@@ -1744,7 +1744,7 @@ static void si_cp_dma_realign_engine(struct radv_cmd_buffer *cmd_buffer, unsigne
assert(size < SI_CPDMA_ALIGNMENT); assert(size < SI_CPDMA_ALIGNMENT);
radv_cmd_buffer_upload_alloc(cmd_buffer, buf_size, SI_CPDMA_ALIGNMENT, &offset, &ptr); radv_cmd_buffer_upload_alloc(cmd_buffer, buf_size, &offset, &ptr);
va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
va += offset; va += offset;