intel: Move subslice_total into devinfo
Reworks: * Move asserts for subslice_total into intel_device_info.c (s-b Ken) * Drop now unused intel_device_info_subslice_total (s-b Ken) * Add comment for subslice_total (Ken) Suggested-by: Kenneth Graunke <kenneth@whitecape.org> Signed-off-by: Jordan Justen <jordan.l.justen@intel.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12799>
This commit is contained in:
@@ -2645,9 +2645,8 @@ crocus_get_scratch_space(struct crocus_context *ice,
|
||||
|
||||
struct crocus_bo **bop = &ice->shaders.scratch_bos[encoded_size][stage];
|
||||
|
||||
unsigned subslice_total = screen->subslice_total;
|
||||
subslice_total = 4 * devinfo->num_slices;
|
||||
// assert(subslice_total >= screen->subslice_total);
|
||||
/* TODO: This doesn't seem to match brw_alloc_stage_scratch */
|
||||
unsigned cs_subslices = 4 * devinfo->num_slices;
|
||||
|
||||
if (!*bop) {
|
||||
unsigned scratch_ids_per_subslice = devinfo->max_cs_threads;
|
||||
@@ -2658,7 +2657,7 @@ crocus_get_scratch_space(struct crocus_context *ice,
|
||||
[MESA_SHADER_TESS_EVAL] = devinfo->max_tes_threads,
|
||||
[MESA_SHADER_GEOMETRY] = devinfo->max_gs_threads,
|
||||
[MESA_SHADER_FRAGMENT] = devinfo->max_wm_threads,
|
||||
[MESA_SHADER_COMPUTE] = scratch_ids_per_subslice * subslice_total,
|
||||
[MESA_SHADER_COMPUTE] = scratch_ids_per_subslice * cs_subslices,
|
||||
};
|
||||
|
||||
uint32_t size = per_thread_scratch * max_threads[stage];
|
||||
|
@@ -813,9 +813,6 @@ crocus_screen_create(int fd, const struct pipe_screen_config *config)
|
||||
slab_create_parent(&screen->transfer_pool,
|
||||
sizeof(struct crocus_transfer), 64);
|
||||
|
||||
screen->subslice_total = intel_device_info_subslice_total(&screen->devinfo);
|
||||
assert(screen->subslice_total >= 1);
|
||||
|
||||
struct pipe_screen *pscreen = &screen->base;
|
||||
|
||||
crocus_init_screen_fence_functions(pscreen);
|
||||
|
@@ -201,8 +201,6 @@ struct crocus_screen {
|
||||
bool always_flush_cache;
|
||||
} driconf;
|
||||
|
||||
unsigned subslice_total;
|
||||
|
||||
uint64_t aperture_bytes;
|
||||
|
||||
struct intel_device_info devinfo;
|
||||
|
@@ -8075,7 +8075,7 @@ crocus_upload_compute_state(struct crocus_context *ice,
|
||||
}
|
||||
|
||||
vfe.MaximumNumberofThreads =
|
||||
devinfo->max_cs_threads * screen->subslice_total - 1;
|
||||
devinfo->max_cs_threads * devinfo->subslice_total - 1;
|
||||
vfe.ResetGatewayTimer =
|
||||
Resettingrelativetimerandlatchingtheglobaltimestamp;
|
||||
vfe.BypassGatewayControl = true;
|
||||
|
@@ -2334,7 +2334,7 @@ iris_get_scratch_space(struct iris_context *ice,
|
||||
* For, Gfx11+, scratch space allocation is based on the number of threads
|
||||
* in the base configuration.
|
||||
*/
|
||||
unsigned subslice_total = screen->subslice_total;
|
||||
unsigned subslice_total = devinfo->subslice_total;
|
||||
if (devinfo->verx10 == 125)
|
||||
subslice_total = 32;
|
||||
else if (devinfo->ver == 12)
|
||||
@@ -2343,7 +2343,7 @@ iris_get_scratch_space(struct iris_context *ice,
|
||||
subslice_total = 8;
|
||||
else if (devinfo->ver < 11)
|
||||
subslice_total = 4 * devinfo->num_slices;
|
||||
assert(subslice_total >= screen->subslice_total);
|
||||
assert(subslice_total >= devinfo->subslice_total);
|
||||
|
||||
if (!*bop) {
|
||||
unsigned scratch_ids_per_subslice = devinfo->max_cs_threads;
|
||||
|
@@ -872,9 +872,6 @@ iris_screen_create(int fd, const struct pipe_screen_config *config)
|
||||
slab_create_parent(&screen->transfer_pool,
|
||||
sizeof(struct iris_transfer), 64);
|
||||
|
||||
screen->subslice_total = intel_device_info_subslice_total(&screen->devinfo);
|
||||
assert(screen->subslice_total >= 1);
|
||||
|
||||
iris_detect_kernel_features(screen);
|
||||
|
||||
struct pipe_screen *pscreen = &screen->base;
|
||||
|
@@ -185,8 +185,6 @@ struct iris_screen {
|
||||
unsigned kernel_features;
|
||||
#define KERNEL_HAS_WAIT_FOR_SUBMIT (1<<0)
|
||||
|
||||
unsigned subslice_total;
|
||||
|
||||
uint64_t aperture_bytes;
|
||||
|
||||
/**
|
||||
|
@@ -6920,7 +6920,7 @@ iris_upload_compute_walker(struct iris_context *ice,
|
||||
if (stage_dirty & IRIS_STAGE_DIRTY_CS) {
|
||||
iris_emit_cmd(batch, GENX(CFE_STATE), cfe) {
|
||||
cfe.MaximumNumberofThreads =
|
||||
devinfo->max_cs_threads * screen->subslice_total - 1;
|
||||
devinfo->max_cs_threads * devinfo->subslice_total - 1;
|
||||
if (prog_data->total_scratch > 0) {
|
||||
cfe.ScratchSpaceBuffer =
|
||||
iris_get_scratch_surf(ice, prog_data->total_scratch)->offset >> 4;
|
||||
@@ -7003,7 +7003,7 @@ iris_upload_gpgpu_walker(struct iris_context *ice,
|
||||
}
|
||||
|
||||
vfe.MaximumNumberofThreads =
|
||||
devinfo->max_cs_threads * screen->subslice_total - 1;
|
||||
devinfo->max_cs_threads * devinfo->subslice_total - 1;
|
||||
#if GFX_VER < 11
|
||||
vfe.ResetGatewayTimer =
|
||||
Resettingrelativetimerandlatchingtheglobaltimestamp;
|
||||
|
@@ -1538,5 +1538,13 @@ intel_get_device_info_from_fd(int fd, struct intel_device_info *devinfo)
|
||||
intel_get_aperture_size(fd, &devinfo->aperture_bytes);
|
||||
devinfo->has_tiling_uapi = has_get_tiling(fd);
|
||||
|
||||
devinfo->subslice_total = 0;
|
||||
for (uint32_t i = 0; i < devinfo->num_slices; i++)
|
||||
devinfo->subslice_total += __builtin_popcount(devinfo->subslice_masks[i]);
|
||||
|
||||
/* Gfx7 and older do not support EU/Subslice info */
|
||||
assert(devinfo->subslice_total >= 1 || devinfo->ver <= 7);
|
||||
devinfo->subslice_total = MAX2(devinfo->subslice_total, 1);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@@ -166,6 +166,12 @@ struct intel_device_info
|
||||
uint8_t subslice_masks[INTEL_DEVICE_MAX_SLICES *
|
||||
DIV_ROUND_UP(INTEL_DEVICE_MAX_SUBSLICES, 8)];
|
||||
|
||||
/**
|
||||
* The number of enabled subslices (considering fusing). For exactly which
|
||||
* subslices are enabled, see subslice_masks[].
|
||||
*/
|
||||
unsigned subslice_total;
|
||||
|
||||
/**
|
||||
* An array of bit mask of EUs available, use eu_slice_stride &
|
||||
* eu_subslice_stride to access this array.
|
||||
@@ -332,17 +338,6 @@ intel_device_info_eu_available(const struct intel_device_info *devinfo,
|
||||
return (devinfo->eu_masks[subslice_offset + eu / 8] & (1U << eu % 8)) != 0;
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
intel_device_info_subslice_total(const struct intel_device_info *devinfo)
|
||||
{
|
||||
uint32_t total = 0;
|
||||
|
||||
for (uint32_t i = 0; i < devinfo->num_slices; i++)
|
||||
total += __builtin_popcount(devinfo->subslice_masks[i]);
|
||||
|
||||
return total;
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
intel_device_info_eu_total(const struct intel_device_info *devinfo)
|
||||
{
|
||||
|
@@ -1464,7 +1464,7 @@ anv_scratch_pool_alloc(struct anv_device *device, struct anv_scratch_pool *pool,
|
||||
if (bo != NULL)
|
||||
return bo;
|
||||
|
||||
unsigned subslices = MAX2(device->physical->subslice_total, 1);
|
||||
unsigned subslices = devinfo->subslice_total;
|
||||
|
||||
/* The documentation for 3DSTATE_PS "Scratch Space Base Pointer" says:
|
||||
*
|
||||
|
@@ -940,9 +940,6 @@ anv_physical_device_try_create(struct anv_instance *instance,
|
||||
device->has_userptr_probe =
|
||||
anv_gem_get_param(fd, I915_PARAM_HAS_USERPTR_PROBE);
|
||||
|
||||
/* GENs prior to 8 do not support EU/Subslice info */
|
||||
device->subslice_total = intel_device_info_subslice_total(&device->info);
|
||||
|
||||
device->compiler = brw_compiler_create(NULL, &device->info);
|
||||
if (device->compiler == NULL) {
|
||||
result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
@@ -960,8 +960,6 @@ struct anv_physical_device {
|
||||
|
||||
bool always_flush_cache;
|
||||
|
||||
uint32_t subslice_total;
|
||||
|
||||
struct {
|
||||
uint32_t family_count;
|
||||
struct anv_queue_family families[ANV_MAX_QUEUE_FAMILIES];
|
||||
|
@@ -5401,11 +5401,9 @@ genX(flush_pipeline_select)(struct anv_cmd_buffer *cmd_buffer,
|
||||
* GPGPU and 3D are back-to-back and this seems to fix it. We don't
|
||||
* really know why.
|
||||
*/
|
||||
const uint32_t subslices =
|
||||
MAX2(cmd_buffer->device->physical->subslice_total, 1);
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_VFE_STATE), vfe) {
|
||||
vfe.MaximumNumberofThreads =
|
||||
devinfo->max_cs_threads * subslices - 1;
|
||||
devinfo->max_cs_threads * devinfo->subslice_total - 1;
|
||||
vfe.NumberofURBEntries = 2;
|
||||
vfe.URBEntryAllocationSize = 2;
|
||||
}
|
||||
|
@@ -2589,14 +2589,12 @@ emit_compute_state(struct anv_compute_pipeline *pipeline,
|
||||
const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
|
||||
anv_pipeline_setup_l3_config(&pipeline->base, cs_prog_data->base.total_shared > 0);
|
||||
|
||||
const uint32_t subslices = MAX2(device->physical->subslice_total, 1);
|
||||
|
||||
const UNUSED struct anv_shader_bin *cs_bin = pipeline->cs;
|
||||
const struct intel_device_info *devinfo = &device->info;
|
||||
|
||||
anv_batch_emit(&pipeline->base.batch, GENX(CFE_STATE), cfe) {
|
||||
cfe.MaximumNumberofThreads =
|
||||
devinfo->max_cs_threads * subslices - 1;
|
||||
devinfo->max_cs_threads * devinfo->subslice_total - 1;
|
||||
cfe.ScratchSpaceBuffer = get_scratch_surf(&pipeline->base, cs_bin);
|
||||
}
|
||||
}
|
||||
@@ -2618,8 +2616,6 @@ emit_compute_state(struct anv_compute_pipeline *pipeline,
|
||||
ALIGN(cs_prog_data->push.per_thread.regs * dispatch.threads +
|
||||
cs_prog_data->push.cross_thread.regs, 2);
|
||||
|
||||
const uint32_t subslices = MAX2(device->physical->subslice_total, 1);
|
||||
|
||||
const struct anv_shader_bin *cs_bin = pipeline->cs;
|
||||
|
||||
anv_batch_emit(&pipeline->base.batch, GENX(MEDIA_VFE_STATE), vfe) {
|
||||
@@ -2629,7 +2625,7 @@ emit_compute_state(struct anv_compute_pipeline *pipeline,
|
||||
vfe.GPGPUMode = true;
|
||||
#endif
|
||||
vfe.MaximumNumberofThreads =
|
||||
devinfo->max_cs_threads * subslices - 1;
|
||||
devinfo->max_cs_threads * devinfo->subslice_total - 1;
|
||||
vfe.NumberofURBEntries = GFX_VER <= 7 ? 0 : 2;
|
||||
#if GFX_VER < 11
|
||||
vfe.ResetGatewayTimer = true;
|
||||
|
@@ -497,9 +497,8 @@ brw_emit_select_pipeline(struct brw_context *brw, enum brw_pipeline pipeline)
|
||||
/* We seem to have issues with geometry flickering when 3D and compute
|
||||
* are combined in the same batch and this appears to fix it.
|
||||
*/
|
||||
const uint32_t subslices = MAX2(brw->screen->subslice_total, 1);
|
||||
const uint32_t maxNumberofThreads =
|
||||
devinfo->max_cs_threads * subslices - 1;
|
||||
devinfo->max_cs_threads * devinfo->subslice_total - 1;
|
||||
|
||||
BEGIN_BATCH(9);
|
||||
OUT_BATCH(MEDIA_VFE_STATE << 16 | (9 - 2));
|
||||
|
@@ -455,7 +455,7 @@ brw_alloc_stage_scratch(struct brw_context *brw,
|
||||
thread_count = devinfo->max_wm_threads;
|
||||
break;
|
||||
case MESA_SHADER_COMPUTE: {
|
||||
unsigned subslices = MAX2(brw->screen->subslice_total, 1);
|
||||
unsigned subslices = devinfo->subslice_total;
|
||||
|
||||
/* The documentation for 3DSTATE_PS "Scratch Space Base Pointer" says:
|
||||
*
|
||||
|
@@ -2611,9 +2611,6 @@ __DRIconfig **brw_init_screen(__DRIscreen *dri_screen)
|
||||
isl_device_init(&screen->isl_dev, &screen->devinfo,
|
||||
screen->hw_has_swizzling);
|
||||
|
||||
/* GENs prior to 8 do not support EU/Subslice info */
|
||||
screen->subslice_total = intel_device_info_subslice_total(devinfo);
|
||||
|
||||
/* Gfx7-7.5 kernel requirements / command parser saga:
|
||||
*
|
||||
* - pre-v3.16:
|
||||
|
@@ -107,11 +107,6 @@ struct brw_screen
|
||||
*/
|
||||
int cmd_parser_version;
|
||||
|
||||
/**
|
||||
* Number of subslices reported by the I915_PARAM_SUBSLICE_TOTAL parameter
|
||||
*/
|
||||
int subslice_total;
|
||||
|
||||
bool mesa_format_supports_texture[MESA_FORMAT_COUNT];
|
||||
bool mesa_format_supports_render[MESA_FORMAT_COUNT];
|
||||
enum isl_format mesa_to_isl_render_format[MESA_FORMAT_COUNT];
|
||||
|
@@ -4321,15 +4321,8 @@ genX(upload_cs_state)(struct brw_context *brw)
|
||||
vfe.PerThreadScratchSpace = per_thread_scratch_value;
|
||||
}
|
||||
|
||||
/* If brw->screen->subslice_total is greater than one, then
|
||||
* devinfo->max_cs_threads stores number of threads per sub-slice;
|
||||
* thus we need to multiply by that number by subslices to get
|
||||
* the actual maximum number of threads; the -1 is because the HW
|
||||
* has a bias of 1 (would not make sense to say the maximum number
|
||||
* of threads is 0).
|
||||
*/
|
||||
const uint32_t subslices = MAX2(brw->screen->subslice_total, 1);
|
||||
vfe.MaximumNumberofThreads = devinfo->max_cs_threads * subslices - 1;
|
||||
vfe.MaximumNumberofThreads =
|
||||
devinfo->max_cs_threads * devinfo->subslice_total - 1;
|
||||
vfe.NumberofURBEntries = GFX_VER >= 8 ? 2 : 0;
|
||||
#if GFX_VER < 11
|
||||
vfe.ResetGatewayTimer =
|
||||
|
Reference in New Issue
Block a user