freedreno,tu,ir3: Move threadsize_base and max_waves to fd_dev_info
Signed-off-by: Valentine Burley <valentine.burley@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29277>
This commit is contained in:

committed by
Marge Bot

parent
692e1ab2c1
commit
c4da848a1a
@@ -56,6 +56,10 @@ struct fd_dev_info {
|
||||
/* Information for private memory calculations */
|
||||
uint32_t fibers_per_sp;
|
||||
|
||||
uint32_t threadsize_base;
|
||||
|
||||
uint32_t max_waves;
|
||||
|
||||
/* number of CCU is always equal to the number of SP */
|
||||
union {
|
||||
uint32_t num_sp_cores;
|
||||
|
@@ -118,7 +118,8 @@ class GPUInfo(Struct):
|
||||
def __init__(self, chip, gmem_align_w, gmem_align_h,
|
||||
tile_align_w, tile_align_h,
|
||||
tile_max_w, tile_max_h, num_vsc_pipes,
|
||||
cs_shared_mem_size, num_sp_cores, wave_granularity, fibers_per_sp):
|
||||
cs_shared_mem_size, num_sp_cores, wave_granularity, fibers_per_sp,
|
||||
threadsize_base = 64, max_waves = 16):
|
||||
self.chip = chip.value
|
||||
self.gmem_align_w = gmem_align_w
|
||||
self.gmem_align_h = gmem_align_h
|
||||
@@ -131,6 +132,8 @@ class GPUInfo(Struct):
|
||||
self.num_sp_cores = num_sp_cores
|
||||
self.wave_granularity = wave_granularity
|
||||
self.fibers_per_sp = fibers_per_sp
|
||||
self.threadsize_base = threadsize_base
|
||||
self.max_waves = max_waves
|
||||
|
||||
s.gpu_infos.append(self)
|
||||
|
||||
@@ -143,7 +146,8 @@ class A6xxGPUInfo(GPUInfo):
|
||||
def __init__(self, chip, template, num_ccu,
|
||||
tile_align_w, tile_align_h, num_vsc_pipes,
|
||||
cs_shared_mem_size, wave_granularity, fibers_per_sp,
|
||||
magic_regs, raw_magic_regs = None):
|
||||
magic_regs, raw_magic_regs = None, threadsize_base = 64,
|
||||
max_waves = 16):
|
||||
if chip == CHIP.A6XX:
|
||||
tile_max_w = 1024 # max_bitfield_val(5, 0, 5)
|
||||
tile_max_h = max_bitfield_val(14, 8, 4) # 1008
|
||||
@@ -160,7 +164,9 @@ class A6xxGPUInfo(GPUInfo):
|
||||
cs_shared_mem_size = cs_shared_mem_size,
|
||||
num_sp_cores = num_ccu, # The # of SP cores seems to always match # of CCU
|
||||
wave_granularity = wave_granularity,
|
||||
fibers_per_sp = fibers_per_sp)
|
||||
fibers_per_sp = fibers_per_sp,
|
||||
threadsize_base = threadsize_base,
|
||||
max_waves = max_waves)
|
||||
|
||||
self.num_ccu = num_ccu
|
||||
|
||||
@@ -202,6 +208,7 @@ add_gpus([
|
||||
num_sp_cores = 0, # TODO
|
||||
wave_granularity = 2,
|
||||
fibers_per_sp = 0, # TODO
|
||||
threadsize_base = 8, # TODO: Confirm this
|
||||
))
|
||||
|
||||
add_gpus([
|
||||
@@ -221,6 +228,7 @@ add_gpus([
|
||||
num_sp_cores = 0, # TODO
|
||||
wave_granularity = 2,
|
||||
fibers_per_sp = 0, # TODO
|
||||
threadsize_base = 8,
|
||||
))
|
||||
|
||||
add_gpus([
|
||||
@@ -238,6 +246,7 @@ add_gpus([
|
||||
num_sp_cores = 0, # TODO
|
||||
wave_granularity = 2,
|
||||
fibers_per_sp = 0, # TODO
|
||||
threadsize_base = 32, # TODO: Confirm this
|
||||
))
|
||||
|
||||
add_gpus([
|
||||
@@ -255,6 +264,7 @@ add_gpus([
|
||||
num_sp_cores = 1,
|
||||
wave_granularity = 2,
|
||||
fibers_per_sp = 64 * 16, # Lowest number that didn't fault on spillall fs-varying-array-mat4-col-row-rd.
|
||||
threadsize_base = 32,
|
||||
))
|
||||
|
||||
add_gpus([
|
||||
@@ -271,6 +281,7 @@ add_gpus([
|
||||
num_sp_cores = 2,
|
||||
wave_granularity = 2,
|
||||
fibers_per_sp = 64 * 16, # Lowest number that didn't fault on spillall fs-varying-array-mat4-col-row-rd.
|
||||
threadsize_base = 32,
|
||||
))
|
||||
|
||||
add_gpus([
|
||||
@@ -287,6 +298,7 @@ add_gpus([
|
||||
num_sp_cores = 4,
|
||||
wave_granularity = 2,
|
||||
fibers_per_sp = 64 * 16, # Lowest number that didn't fault on spillall fs-varying-array-mat4-col-row-rd.
|
||||
threadsize_base = 32,
|
||||
))
|
||||
|
||||
|
||||
|
@@ -155,7 +155,7 @@ ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id,
|
||||
/* TODO see if older GPU's were different here */
|
||||
compiler->branchstack_size = 64;
|
||||
compiler->wave_granularity = dev_info->wave_granularity;
|
||||
compiler->max_waves = 16;
|
||||
compiler->max_waves = dev_info->max_waves;
|
||||
|
||||
compiler->max_variable_workgroup_size = 1024;
|
||||
|
||||
@@ -266,16 +266,7 @@ ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id,
|
||||
compiler->reg_size_vec4 = 96;
|
||||
}
|
||||
|
||||
if (compiler->gen >= 6) {
|
||||
compiler->threadsize_base = 64;
|
||||
} else if (compiler->gen >= 4) {
|
||||
/* TODO: Confirm this for a4xx. For a5xx this is based on the Vulkan
|
||||
* 1.1 subgroupSize which is 32.
|
||||
*/
|
||||
compiler->threadsize_base = 32;
|
||||
} else {
|
||||
compiler->threadsize_base = 8;
|
||||
}
|
||||
compiler->threadsize_base = dev_info->threadsize_base;
|
||||
|
||||
if (compiler->gen >= 4) {
|
||||
/* need special handling for "flat" */
|
||||
|
@@ -661,7 +661,8 @@ tu_get_physical_device_properties_1_1(struct tu_physical_device *pdevice,
|
||||
p->deviceNodeMask = 0;
|
||||
p->deviceLUIDValid = false;
|
||||
|
||||
p->subgroupSize = pdevice->info->a6xx.supports_double_threadsize ? 128 : 64;
|
||||
p->subgroupSize = pdevice->info->a6xx.supports_double_threadsize ?
|
||||
pdevice->info->threadsize_base * 2 : pdevice->info->threadsize_base;
|
||||
p->subgroupSupportedStages = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT |
|
||||
VK_SUBGROUP_FEATURE_VOTE_BIT |
|
||||
@@ -778,11 +779,10 @@ static void
|
||||
tu_get_physical_device_properties_1_3(struct tu_physical_device *pdevice,
|
||||
struct vk_properties *p)
|
||||
{
|
||||
/* TODO move threadsize_base and max_waves to fd_dev_info and use them here */
|
||||
p->minSubgroupSize = 64; /* threadsize_base */
|
||||
p->maxSubgroupSize =
|
||||
pdevice->info->a6xx.supports_double_threadsize ? 128 : 64;
|
||||
p->maxComputeWorkgroupSubgroups = 16; /* max_waves */
|
||||
p->minSubgroupSize = pdevice->info->threadsize_base;
|
||||
p->maxSubgroupSize = pdevice->info->a6xx.supports_double_threadsize ?
|
||||
pdevice->info->threadsize_base * 2 : pdevice->info->threadsize_base;
|
||||
p->maxComputeWorkgroupSubgroups = pdevice->info->max_waves;
|
||||
p->requiredSubgroupSizeStages = VK_SHADER_STAGE_ALL;
|
||||
|
||||
p->maxInlineUniformBlockSize = MAX_INLINE_UBO_RANGE;
|
||||
@@ -902,7 +902,9 @@ tu_get_properties(struct tu_physical_device *pdevice,
|
||||
props->maxComputeWorkGroupCount[0] =
|
||||
props->maxComputeWorkGroupCount[1] =
|
||||
props->maxComputeWorkGroupCount[2] = 65535;
|
||||
props->maxComputeWorkGroupInvocations = pdevice->info->a6xx.supports_double_threadsize ? 2048 : 1024;
|
||||
props->maxComputeWorkGroupInvocations = pdevice->info->a6xx.supports_double_threadsize ?
|
||||
pdevice->info->threadsize_base * 2 * pdevice->info->max_waves :
|
||||
pdevice->info->threadsize_base * pdevice->info->max_waves;
|
||||
props->maxComputeWorkGroupSize[0] =
|
||||
props->maxComputeWorkGroupSize[1] =
|
||||
props->maxComputeWorkGroupSize[2] = 1024;
|
||||
|
Reference in New Issue
Block a user