diff --git a/src/gallium/drivers/crocus/crocus_program.c b/src/gallium/drivers/crocus/crocus_program.c index 4252c1e03cb..03f11a27481 100644 --- a/src/gallium/drivers/crocus/crocus_program.c +++ b/src/gallium/drivers/crocus/crocus_program.c @@ -2645,9 +2645,8 @@ crocus_get_scratch_space(struct crocus_context *ice, struct crocus_bo **bop = &ice->shaders.scratch_bos[encoded_size][stage]; - unsigned subslice_total = screen->subslice_total; - subslice_total = 4 * devinfo->num_slices; - // assert(subslice_total >= screen->subslice_total); + /* TODO: This doesn't seem to match brw_alloc_stage_scratch */ + unsigned cs_subslices = 4 * devinfo->num_slices; if (!*bop) { unsigned scratch_ids_per_subslice = devinfo->max_cs_threads; @@ -2658,7 +2657,7 @@ crocus_get_scratch_space(struct crocus_context *ice, [MESA_SHADER_TESS_EVAL] = devinfo->max_tes_threads, [MESA_SHADER_GEOMETRY] = devinfo->max_gs_threads, [MESA_SHADER_FRAGMENT] = devinfo->max_wm_threads, - [MESA_SHADER_COMPUTE] = scratch_ids_per_subslice * subslice_total, + [MESA_SHADER_COMPUTE] = scratch_ids_per_subslice * cs_subslices, }; uint32_t size = per_thread_scratch * max_threads[stage]; diff --git a/src/gallium/drivers/crocus/crocus_screen.c b/src/gallium/drivers/crocus/crocus_screen.c index ebd8af75c88..c427fff0121 100644 --- a/src/gallium/drivers/crocus/crocus_screen.c +++ b/src/gallium/drivers/crocus/crocus_screen.c @@ -813,9 +813,6 @@ crocus_screen_create(int fd, const struct pipe_screen_config *config) slab_create_parent(&screen->transfer_pool, sizeof(struct crocus_transfer), 64); - screen->subslice_total = intel_device_info_subslice_total(&screen->devinfo); - assert(screen->subslice_total >= 1); - struct pipe_screen *pscreen = &screen->base; crocus_init_screen_fence_functions(pscreen); diff --git a/src/gallium/drivers/crocus/crocus_screen.h b/src/gallium/drivers/crocus/crocus_screen.h index 5e6c4179647..d8fc1f2d155 100644 --- a/src/gallium/drivers/crocus/crocus_screen.h +++ b/src/gallium/drivers/crocus/crocus_screen.h @@ -201,8 +201,6 @@ struct crocus_screen { bool always_flush_cache; } driconf; - unsigned subslice_total; - uint64_t aperture_bytes; struct intel_device_info devinfo; diff --git a/src/gallium/drivers/crocus/crocus_state.c b/src/gallium/drivers/crocus/crocus_state.c index 3d440bdd0f1..7eb3dc74245 100644 --- a/src/gallium/drivers/crocus/crocus_state.c +++ b/src/gallium/drivers/crocus/crocus_state.c @@ -8075,7 +8075,7 @@ crocus_upload_compute_state(struct crocus_context *ice, } vfe.MaximumNumberofThreads = - devinfo->max_cs_threads * screen->subslice_total - 1; + devinfo->max_cs_threads * devinfo->subslice_total - 1; vfe.ResetGatewayTimer = Resettingrelativetimerandlatchingtheglobaltimestamp; vfe.BypassGatewayControl = true; diff --git a/src/gallium/drivers/iris/iris_program.c b/src/gallium/drivers/iris/iris_program.c index 6ced0b942cc..2730add768f 100644 --- a/src/gallium/drivers/iris/iris_program.c +++ b/src/gallium/drivers/iris/iris_program.c @@ -2334,7 +2334,7 @@ iris_get_scratch_space(struct iris_context *ice, * For, Gfx11+, scratch space allocation is based on the number of threads * in the base configuration. */ - unsigned subslice_total = screen->subslice_total; + unsigned subslice_total = devinfo->subslice_total; if (devinfo->verx10 == 125) subslice_total = 32; else if (devinfo->ver == 12) @@ -2343,7 +2343,7 @@ iris_get_scratch_space(struct iris_context *ice, subslice_total = 8; else if (devinfo->ver < 11) subslice_total = 4 * devinfo->num_slices; - assert(subslice_total >= screen->subslice_total); + assert(subslice_total >= devinfo->subslice_total); if (!*bop) { unsigned scratch_ids_per_subslice = devinfo->max_cs_threads; diff --git a/src/gallium/drivers/iris/iris_screen.c b/src/gallium/drivers/iris/iris_screen.c index 55db58c40ee..ffd1dc6e096 100644 --- a/src/gallium/drivers/iris/iris_screen.c +++ b/src/gallium/drivers/iris/iris_screen.c @@ -872,9 +872,6 @@ iris_screen_create(int fd, const struct pipe_screen_config *config) slab_create_parent(&screen->transfer_pool, sizeof(struct iris_transfer), 64); - screen->subslice_total = intel_device_info_subslice_total(&screen->devinfo); - assert(screen->subslice_total >= 1); - iris_detect_kernel_features(screen); struct pipe_screen *pscreen = &screen->base; diff --git a/src/gallium/drivers/iris/iris_screen.h b/src/gallium/drivers/iris/iris_screen.h index 3a4e56afb40..b789f6b1b31 100644 --- a/src/gallium/drivers/iris/iris_screen.h +++ b/src/gallium/drivers/iris/iris_screen.h @@ -185,8 +185,6 @@ struct iris_screen { unsigned kernel_features; #define KERNEL_HAS_WAIT_FOR_SUBMIT (1<<0) - unsigned subslice_total; - uint64_t aperture_bytes; /** diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index a9b2499830b..94d03c5a4ec 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -6920,7 +6920,7 @@ iris_upload_compute_walker(struct iris_context *ice, if (stage_dirty & IRIS_STAGE_DIRTY_CS) { iris_emit_cmd(batch, GENX(CFE_STATE), cfe) { cfe.MaximumNumberofThreads = - devinfo->max_cs_threads * screen->subslice_total - 1; + devinfo->max_cs_threads * devinfo->subslice_total - 1; if (prog_data->total_scratch > 0) { cfe.ScratchSpaceBuffer = iris_get_scratch_surf(ice, prog_data->total_scratch)->offset >> 4; @@ -7003,7 +7003,7 @@ iris_upload_gpgpu_walker(struct iris_context *ice, } vfe.MaximumNumberofThreads = - devinfo->max_cs_threads * screen->subslice_total - 1; + devinfo->max_cs_threads * devinfo->subslice_total - 1; #if GFX_VER < 11 vfe.ResetGatewayTimer = Resettingrelativetimerandlatchingtheglobaltimestamp; diff --git a/src/intel/dev/intel_device_info.c b/src/intel/dev/intel_device_info.c index 01c85aed2b5..eecded1d6ac 100644 --- a/src/intel/dev/intel_device_info.c +++ b/src/intel/dev/intel_device_info.c @@ -1538,5 +1538,13 @@ intel_get_device_info_from_fd(int fd, struct intel_device_info *devinfo) intel_get_aperture_size(fd, &devinfo->aperture_bytes); devinfo->has_tiling_uapi = has_get_tiling(fd); + devinfo->subslice_total = 0; + for (uint32_t i = 0; i < devinfo->num_slices; i++) + devinfo->subslice_total += __builtin_popcount(devinfo->subslice_masks[i]); + + /* Gfx7 and older do not support EU/Subslice info */ + assert(devinfo->subslice_total >= 1 || devinfo->ver <= 7); + devinfo->subslice_total = MAX2(devinfo->subslice_total, 1); + return true; } diff --git a/src/intel/dev/intel_device_info.h b/src/intel/dev/intel_device_info.h index 30330f8b5b1..006ffefda5e 100644 --- a/src/intel/dev/intel_device_info.h +++ b/src/intel/dev/intel_device_info.h @@ -166,6 +166,12 @@ struct intel_device_info uint8_t subslice_masks[INTEL_DEVICE_MAX_SLICES * DIV_ROUND_UP(INTEL_DEVICE_MAX_SUBSLICES, 8)]; + /** + * The number of enabled subslices (considering fusing). For exactly which + * subslices are enabled, see subslice_masks[]. + */ + unsigned subslice_total; + /** * An array of bit mask of EUs available, use eu_slice_stride & * eu_subslice_stride to access this array. @@ -332,17 +338,6 @@ intel_device_info_eu_available(const struct intel_device_info *devinfo, return (devinfo->eu_masks[subslice_offset + eu / 8] & (1U << eu % 8)) != 0; } -static inline uint32_t -intel_device_info_subslice_total(const struct intel_device_info *devinfo) -{ - uint32_t total = 0; - - for (uint32_t i = 0; i < devinfo->num_slices; i++) - total += __builtin_popcount(devinfo->subslice_masks[i]); - - return total; -} - static inline uint32_t intel_device_info_eu_total(const struct intel_device_info *devinfo) { diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c index c98a3d84f90..f5e0234d98e 100644 --- a/src/intel/vulkan/anv_allocator.c +++ b/src/intel/vulkan/anv_allocator.c @@ -1464,7 +1464,7 @@ anv_scratch_pool_alloc(struct anv_device *device, struct anv_scratch_pool *pool, if (bo != NULL) return bo; - unsigned subslices = MAX2(device->physical->subslice_total, 1); + unsigned subslices = devinfo->subslice_total; /* The documentation for 3DSTATE_PS "Scratch Space Base Pointer" says: * diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 6ff8e2b38df..3d3ad15151e 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -940,9 +940,6 @@ anv_physical_device_try_create(struct anv_instance *instance, device->has_userptr_probe = anv_gem_get_param(fd, I915_PARAM_HAS_USERPTR_PROBE); - /* GENs prior to 8 do not support EU/Subslice info */ - device->subslice_total = intel_device_info_subslice_total(&device->info); - device->compiler = brw_compiler_create(NULL, &device->info); if (device->compiler == NULL) { result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 6888b63eb92..5194a2f1887 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -960,8 +960,6 @@ struct anv_physical_device { bool always_flush_cache; - uint32_t subslice_total; - struct { uint32_t family_count; struct anv_queue_family families[ANV_MAX_QUEUE_FAMILIES]; diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 4c00eb3da2e..7d3e72f1711 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -5401,11 +5401,9 @@ genX(flush_pipeline_select)(struct anv_cmd_buffer *cmd_buffer, * GPGPU and 3D are back-to-back and this seems to fix it. We don't * really know why. */ - const uint32_t subslices = - MAX2(cmd_buffer->device->physical->subslice_total, 1); anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_VFE_STATE), vfe) { vfe.MaximumNumberofThreads = - devinfo->max_cs_threads * subslices - 1; + devinfo->max_cs_threads * devinfo->subslice_total - 1; vfe.NumberofURBEntries = 2; vfe.URBEntryAllocationSize = 2; } diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c index b0296affa92..cb5605e8883 100644 --- a/src/intel/vulkan/genX_pipeline.c +++ b/src/intel/vulkan/genX_pipeline.c @@ -2589,14 +2589,12 @@ emit_compute_state(struct anv_compute_pipeline *pipeline, const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline); anv_pipeline_setup_l3_config(&pipeline->base, cs_prog_data->base.total_shared > 0); - const uint32_t subslices = MAX2(device->physical->subslice_total, 1); - const UNUSED struct anv_shader_bin *cs_bin = pipeline->cs; const struct intel_device_info *devinfo = &device->info; anv_batch_emit(&pipeline->base.batch, GENX(CFE_STATE), cfe) { cfe.MaximumNumberofThreads = - devinfo->max_cs_threads * subslices - 1; + devinfo->max_cs_threads * devinfo->subslice_total - 1; cfe.ScratchSpaceBuffer = get_scratch_surf(&pipeline->base, cs_bin); } } @@ -2618,8 +2616,6 @@ emit_compute_state(struct anv_compute_pipeline *pipeline, ALIGN(cs_prog_data->push.per_thread.regs * dispatch.threads + cs_prog_data->push.cross_thread.regs, 2); - const uint32_t subslices = MAX2(device->physical->subslice_total, 1); - const struct anv_shader_bin *cs_bin = pipeline->cs; anv_batch_emit(&pipeline->base.batch, GENX(MEDIA_VFE_STATE), vfe) { @@ -2629,7 +2625,7 @@ emit_compute_state(struct anv_compute_pipeline *pipeline, vfe.GPGPUMode = true; #endif vfe.MaximumNumberofThreads = - devinfo->max_cs_threads * subslices - 1; + devinfo->max_cs_threads * devinfo->subslice_total - 1; vfe.NumberofURBEntries = GFX_VER <= 7 ? 0 : 2; #if GFX_VER < 11 vfe.ResetGatewayTimer = true; diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index aae5748d765..730fd8f4725 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -497,9 +497,8 @@ brw_emit_select_pipeline(struct brw_context *brw, enum brw_pipeline pipeline) /* We seem to have issues with geometry flickering when 3D and compute * are combined in the same batch and this appears to fix it. */ - const uint32_t subslices = MAX2(brw->screen->subslice_total, 1); const uint32_t maxNumberofThreads = - devinfo->max_cs_threads * subslices - 1; + devinfo->max_cs_threads * devinfo->subslice_total - 1; BEGIN_BATCH(9); OUT_BATCH(MEDIA_VFE_STATE << 16 | (9 - 2)); diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index 4cf5172e16d..50e6892d6b5 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -455,7 +455,7 @@ brw_alloc_stage_scratch(struct brw_context *brw, thread_count = devinfo->max_wm_threads; break; case MESA_SHADER_COMPUTE: { - unsigned subslices = MAX2(brw->screen->subslice_total, 1); + unsigned subslices = devinfo->subslice_total; /* The documentation for 3DSTATE_PS "Scratch Space Base Pointer" says: * diff --git a/src/mesa/drivers/dri/i965/brw_screen.c b/src/mesa/drivers/dri/i965/brw_screen.c index 73bb1a03c22..23f31f37917 100644 --- a/src/mesa/drivers/dri/i965/brw_screen.c +++ b/src/mesa/drivers/dri/i965/brw_screen.c @@ -2611,9 +2611,6 @@ __DRIconfig **brw_init_screen(__DRIscreen *dri_screen) isl_device_init(&screen->isl_dev, &screen->devinfo, screen->hw_has_swizzling); - /* GENs prior to 8 do not support EU/Subslice info */ - screen->subslice_total = intel_device_info_subslice_total(devinfo); - /* Gfx7-7.5 kernel requirements / command parser saga: * * - pre-v3.16: diff --git a/src/mesa/drivers/dri/i965/brw_screen.h b/src/mesa/drivers/dri/i965/brw_screen.h index 9ee24d06a11..d2cefc2be6a 100644 --- a/src/mesa/drivers/dri/i965/brw_screen.h +++ b/src/mesa/drivers/dri/i965/brw_screen.h @@ -107,11 +107,6 @@ struct brw_screen */ int cmd_parser_version; - /** - * Number of subslices reported by the I915_PARAM_SUBSLICE_TOTAL parameter - */ - int subslice_total; - bool mesa_format_supports_texture[MESA_FORMAT_COUNT]; bool mesa_format_supports_render[MESA_FORMAT_COUNT]; enum isl_format mesa_to_isl_render_format[MESA_FORMAT_COUNT]; diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c index 52f25fb1848..511c194662f 100644 --- a/src/mesa/drivers/dri/i965/genX_state_upload.c +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c @@ -4321,15 +4321,8 @@ genX(upload_cs_state)(struct brw_context *brw) vfe.PerThreadScratchSpace = per_thread_scratch_value; } - /* If brw->screen->subslice_total is greater than one, then - * devinfo->max_cs_threads stores number of threads per sub-slice; - * thus we need to multiply by that number by subslices to get - * the actual maximum number of threads; the -1 is because the HW - * has a bias of 1 (would not make sense to say the maximum number - * of threads is 0). - */ - const uint32_t subslices = MAX2(brw->screen->subslice_total, 1); - vfe.MaximumNumberofThreads = devinfo->max_cs_threads * subslices - 1; + vfe.MaximumNumberofThreads = + devinfo->max_cs_threads * devinfo->subslice_total - 1; vfe.NumberofURBEntries = GFX_VER >= 8 ? 2 : 0; #if GFX_VER < 11 vfe.ResetGatewayTimer =