ac/gpu_info: replace ib_alignment with per-IP IB base and size alignments
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25043>
This commit is contained in:
@@ -677,8 +677,8 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
|||||||
info->ip[AMD_IP_GFX].ver_minor = info->ip[AMD_IP_COMPUTE].ver_minor = 3;
|
info->ip[AMD_IP_GFX].ver_minor = info->ip[AMD_IP_COMPUTE].ver_minor = 3;
|
||||||
}
|
}
|
||||||
info->ip[ip_type].num_queues = util_bitcount(ip_info.available_rings);
|
info->ip[ip_type].num_queues = util_bitcount(ip_info.available_rings);
|
||||||
info->ib_alignment = MAX3(info->ib_alignment, ip_info.ib_start_alignment,
|
info->ip[ip_type].ib_base_alignment = ip_info.ib_start_alignment;
|
||||||
ip_info.ib_size_alignment);
|
info->ip[ip_type].ib_size_alignment = ip_info.ib_size_alignment;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Only require gfx or compute. */
|
/* Only require gfx or compute. */
|
||||||
@@ -690,12 +690,6 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
|||||||
assert(util_is_power_of_two_or_zero(info->ip[AMD_IP_COMPUTE].num_queues));
|
assert(util_is_power_of_two_or_zero(info->ip[AMD_IP_COMPUTE].num_queues));
|
||||||
assert(util_is_power_of_two_or_zero(info->ip[AMD_IP_SDMA].num_queues));
|
assert(util_is_power_of_two_or_zero(info->ip[AMD_IP_SDMA].num_queues));
|
||||||
|
|
||||||
/* The kernel pads gfx and compute IBs to 256 dwords since:
|
|
||||||
* 66f3b2d527154bd258a57c8815004b5964aa1cf5
|
|
||||||
* Do the same.
|
|
||||||
*/
|
|
||||||
info->ib_alignment = MAX2(info->ib_alignment, 1024);
|
|
||||||
|
|
||||||
r = amdgpu_query_firmware_version(dev, AMDGPU_INFO_FW_GFX_ME, 0, 0, &info->me_fw_version,
|
r = amdgpu_query_firmware_version(dev, AMDGPU_INFO_FW_GFX_ME, 0, 0, &info->me_fw_version,
|
||||||
&info->me_fw_feature);
|
&info->me_fw_feature);
|
||||||
if (r) {
|
if (r) {
|
||||||
@@ -1681,8 +1675,11 @@ void ac_print_gpu_info(const struct radeon_info *info, FILE *f)
|
|||||||
|
|
||||||
for (unsigned i = 0; i < AMD_NUM_IP_TYPES; i++) {
|
for (unsigned i = 0; i < AMD_NUM_IP_TYPES; i++) {
|
||||||
if (info->ip[i].num_queues) {
|
if (info->ip[i].num_queues) {
|
||||||
fprintf(f, " IP %-7s %2u.%u \tqueues:%u\n", ip_string[i],
|
fprintf(f, " IP %-7s %2u.%u queues:%u "
|
||||||
info->ip[i].ver_major, info->ip[i].ver_minor, info->ip[i].num_queues);
|
"align(base:%u, size:%u)\n",
|
||||||
|
ip_string[i], info->ip[i].ver_major, info->ip[i].ver_minor,
|
||||||
|
info->ip[i].num_queues, info->ip[i].ib_base_alignment,
|
||||||
|
info->ip[i].ib_size_alignment);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1756,7 +1753,6 @@ void ac_print_gpu_info(const struct radeon_info *info, FILE *f)
|
|||||||
|
|
||||||
fprintf(f, "CP info:\n");
|
fprintf(f, "CP info:\n");
|
||||||
fprintf(f, " gfx_ib_pad_with_type2 = %i\n", info->gfx_ib_pad_with_type2);
|
fprintf(f, " gfx_ib_pad_with_type2 = %i\n", info->gfx_ib_pad_with_type2);
|
||||||
fprintf(f, " ib_alignment = %u\n", info->ib_alignment);
|
|
||||||
fprintf(f, " me_fw_version = %i\n", info->me_fw_version);
|
fprintf(f, " me_fw_version = %i\n", info->me_fw_version);
|
||||||
fprintf(f, " me_fw_feature = %i\n", info->me_fw_feature);
|
fprintf(f, " me_fw_feature = %i\n", info->me_fw_feature);
|
||||||
fprintf(f, " mec_fw_version = %i\n", info->mec_fw_version);
|
fprintf(f, " mec_fw_version = %i\n", info->mec_fw_version);
|
||||||
|
@@ -26,6 +26,8 @@ struct amd_ip_info {
|
|||||||
uint8_t ver_minor;
|
uint8_t ver_minor;
|
||||||
uint8_t ver_rev;
|
uint8_t ver_rev;
|
||||||
uint8_t num_queues;
|
uint8_t num_queues;
|
||||||
|
uint16_t ib_base_alignment;
|
||||||
|
uint16_t ib_size_alignment;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct radeon_info {
|
struct radeon_info {
|
||||||
@@ -160,7 +162,6 @@ struct radeon_info {
|
|||||||
|
|
||||||
/* CP info. */
|
/* CP info. */
|
||||||
bool gfx_ib_pad_with_type2;
|
bool gfx_ib_pad_with_type2;
|
||||||
unsigned ib_alignment; /* both start and size alignment */
|
|
||||||
uint32_t me_fw_version;
|
uint32_t me_fw_version;
|
||||||
uint32_t me_fw_feature;
|
uint32_t me_fw_feature;
|
||||||
uint32_t mec_fw_version;
|
uint32_t mec_fw_version;
|
||||||
|
@@ -1371,11 +1371,14 @@ radv_GetGeneratedCommandsMemoryRequirementsNV(VkDevice _device,
|
|||||||
VkDeviceSize cmd_buf_size =
|
VkDeviceSize cmd_buf_size =
|
||||||
radv_align_cmdbuf_size(device, cmd_stride * pInfo->maxSequencesCount) + radv_dgc_preamble_cmdbuf_size(device);
|
radv_align_cmdbuf_size(device, cmd_stride * pInfo->maxSequencesCount) + radv_dgc_preamble_cmdbuf_size(device);
|
||||||
VkDeviceSize upload_buf_size = upload_stride * pInfo->maxSequencesCount;
|
VkDeviceSize upload_buf_size = upload_stride * pInfo->maxSequencesCount;
|
||||||
|
unsigned ib_base_alignment = MAX2(device->physical_device->rad_info.ip[AMD_IP_GFX].ib_base_alignment,
|
||||||
|
device->physical_device->rad_info.ip[AMD_IP_COMPUTE].ib_base_alignment);
|
||||||
|
unsigned ib_size_alignment = MAX2(device->physical_device->rad_info.ip[AMD_IP_GFX].ib_size_alignment,
|
||||||
|
device->physical_device->rad_info.ip[AMD_IP_COMPUTE].ib_size_alignment);
|
||||||
|
|
||||||
pMemoryRequirements->memoryRequirements.memoryTypeBits = device->physical_device->memory_types_32bit;
|
pMemoryRequirements->memoryRequirements.memoryTypeBits = device->physical_device->memory_types_32bit;
|
||||||
pMemoryRequirements->memoryRequirements.alignment = device->physical_device->rad_info.ib_alignment;
|
pMemoryRequirements->memoryRequirements.alignment = ib_base_alignment;
|
||||||
pMemoryRequirements->memoryRequirements.size =
|
pMemoryRequirements->memoryRequirements.size = align(cmd_buf_size + upload_buf_size, ib_size_alignment);
|
||||||
align(cmd_buf_size + upload_buf_size, pMemoryRequirements->memoryRequirements.alignment);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
VKAPI_ATTR void VKAPI_CALL
|
VKAPI_ATTR void VKAPI_CALL
|
||||||
|
@@ -248,8 +248,9 @@ radv_amdgpu_cs_bo_create(struct radv_amdgpu_cs *cs, uint32_t ib_size)
|
|||||||
const enum radeon_bo_flag flags =
|
const enum radeon_bo_flag flags =
|
||||||
RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY | gtt_wc_flag;
|
RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY | gtt_wc_flag;
|
||||||
|
|
||||||
return ws->buffer_create(ws, ib_size, cs->ws->info.ib_alignment, domain, flags, RADV_BO_PRIORITY_CS, 0,
|
ib_size = align(ib_size, cs->ws->info.ip[cs->ib.ip_type].ib_size_alignment);
|
||||||
&cs->ib_buffer);
|
return ws->buffer_create(ws, ib_size, cs->ws->info.ip[cs->ib.ip_type].ib_base_alignment, domain, flags,
|
||||||
|
RADV_BO_PRIORITY_CS, 0, &cs->ib_buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
static VkResult
|
static VkResult
|
||||||
@@ -1670,8 +1671,8 @@ radv_amdgpu_cs_submit(struct radv_amdgpu_ctx *ctx, struct radv_amdgpu_cs_request
|
|||||||
chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i];
|
chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i];
|
||||||
|
|
||||||
ib = &request->ibs[i];
|
ib = &request->ibs[i];
|
||||||
assert(ib->ib_mc_address && ib->ib_mc_address % ctx->ws->info.ib_alignment == 0);
|
assert(ib->ib_mc_address && ib->ib_mc_address % ctx->ws->info.ip[ib->ip_type].ib_base_alignment == 0);
|
||||||
assert(ib->size);
|
assert(ib->size && (ib->size * 4) % ctx->ws->info.ip[ib->ip_type].ib_size_alignment == 0);
|
||||||
|
|
||||||
chunk_data[i].ib_data._pad = 0;
|
chunk_data[i].ib_data._pad = 0;
|
||||||
chunk_data[i].ib_data.va_start = ib->ib_mc_address;
|
chunk_data[i].ib_data.va_start = ib->ib_mc_address;
|
||||||
|
@@ -953,9 +953,11 @@ static void amdgpu_set_ib_size(struct radeon_cmdbuf *rcs, struct amdgpu_ib *ib)
|
|||||||
static void amdgpu_ib_finalize(struct amdgpu_winsys *ws, struct radeon_cmdbuf *rcs,
|
static void amdgpu_ib_finalize(struct amdgpu_winsys *ws, struct radeon_cmdbuf *rcs,
|
||||||
struct amdgpu_ib *ib)
|
struct amdgpu_ib *ib)
|
||||||
{
|
{
|
||||||
|
struct amdgpu_cs *cs = (struct amdgpu_cs*)ib;
|
||||||
|
|
||||||
amdgpu_set_ib_size(rcs, ib);
|
amdgpu_set_ib_size(rcs, ib);
|
||||||
ib->used_ib_space += rcs->current.cdw * 4;
|
ib->used_ib_space += rcs->current.cdw * 4;
|
||||||
ib->used_ib_space = align(ib->used_ib_space, ws->info.ib_alignment);
|
ib->used_ib_space = align(ib->used_ib_space, ws->info.ip[cs->ip_type].ib_base_alignment);
|
||||||
ib->max_ib_size = MAX2(ib->max_ib_size, rcs->prev_dw + rcs->current.cdw);
|
ib->max_ib_size = MAX2(ib->max_ib_size, rcs->prev_dw + rcs->current.cdw);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1145,12 +1147,12 @@ amdgpu_cs_setup_preemption(struct radeon_cmdbuf *rcs, const uint32_t *preamble_i
|
|||||||
struct amdgpu_cs *cs = amdgpu_cs(rcs);
|
struct amdgpu_cs *cs = amdgpu_cs(rcs);
|
||||||
struct amdgpu_winsys *ws = cs->ws;
|
struct amdgpu_winsys *ws = cs->ws;
|
||||||
struct amdgpu_cs_context *csc[2] = {&cs->csc1, &cs->csc2};
|
struct amdgpu_cs_context *csc[2] = {&cs->csc1, &cs->csc2};
|
||||||
unsigned size = align(preamble_num_dw * 4, ws->info.ib_alignment);
|
unsigned size = align(preamble_num_dw * 4, ws->info.ip[cs->ip_type].ib_size_alignment);
|
||||||
struct pb_buffer *preamble_bo;
|
struct pb_buffer *preamble_bo;
|
||||||
uint32_t *map;
|
uint32_t *map;
|
||||||
|
|
||||||
/* Create the preamble IB buffer. */
|
/* Create the preamble IB buffer. */
|
||||||
preamble_bo = amdgpu_bo_create(ws, size, ws->info.ib_alignment,
|
preamble_bo = amdgpu_bo_create(ws, size, ws->info.ip[cs->ip_type].ib_base_alignment,
|
||||||
RADEON_DOMAIN_VRAM,
|
RADEON_DOMAIN_VRAM,
|
||||||
RADEON_FLAG_NO_INTERPROCESS_SHARING |
|
RADEON_FLAG_NO_INTERPROCESS_SHARING |
|
||||||
RADEON_FLAG_GTT_WC |
|
RADEON_FLAG_GTT_WC |
|
||||||
@@ -1708,7 +1710,7 @@ static void amdgpu_cs_submit_ib(void *job, void *gdata, int thread_index)
|
|||||||
|
|
||||||
if (noop && acs->ip_type == AMD_IP_GFX) {
|
if (noop && acs->ip_type == AMD_IP_GFX) {
|
||||||
/* Reduce the IB size and fill it with NOP to make it like an empty IB. */
|
/* Reduce the IB size and fill it with NOP to make it like an empty IB. */
|
||||||
unsigned noop_size = MIN2(cs->ib[IB_MAIN].ib_bytes, ws->info.ib_alignment);
|
unsigned noop_size = MIN2(cs->ib[IB_MAIN].ib_bytes, ws->info.ip[AMD_IP_GFX].ib_size_alignment);
|
||||||
|
|
||||||
cs->ib_main_addr[0] = PKT3(PKT3_NOP, noop_size / 4 - 2, 0);
|
cs->ib_main_addr[0] = PKT3(PKT3_NOP, noop_size / 4 - 2, 0);
|
||||||
cs->ib[IB_MAIN].ib_bytes = noop_size;
|
cs->ib[IB_MAIN].ib_bytes = noop_size;
|
||||||
|
@@ -551,7 +551,11 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws)
|
|||||||
(ws->info.family == CHIP_HAWAII &&
|
(ws->info.family == CHIP_HAWAII &&
|
||||||
ws->accel_working2 < 3);
|
ws->accel_working2 < 3);
|
||||||
ws->info.tcc_cache_line_size = 64; /* TC L2 line size on GCN */
|
ws->info.tcc_cache_line_size = 64; /* TC L2 line size on GCN */
|
||||||
ws->info.ib_alignment = 4096;
|
for (unsigned i = 0; i < AMD_NUM_IP_TYPES; i++) {
|
||||||
|
/* This is probably too large. */
|
||||||
|
ws->info.ip[i].ib_base_alignment = 4096;
|
||||||
|
ws->info.ip[i].ib_size_alignment = 4096;
|
||||||
|
}
|
||||||
ws->info.has_bo_metadata = false;
|
ws->info.has_bo_metadata = false;
|
||||||
ws->info.has_eqaa_surface_allocator = false;
|
ws->info.has_eqaa_surface_allocator = false;
|
||||||
ws->info.has_sparse_vm_mappings = false;
|
ws->info.has_sparse_vm_mappings = false;
|
||||||
|
Reference in New Issue
Block a user