anv: Add support for scratch on XeHP
Rework: * Jordan: Handle per_thread_scratch==0 in anv_scratch_pool_get_surf * Jordan: Update subslices in anv_scratch_pool_alloc * Jason: Clean up the patch a bit Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> Reviewed-by: Jordan Justen <jordan.l.justen@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11582>
This commit is contained in:

committed by
Marge Bot

parent
ae18e1e707
commit
eeeea5cb87
@@ -1420,6 +1420,13 @@ anv_scratch_pool_finish(struct anv_device *device, struct anv_scratch_pool *pool
|
|||||||
anv_device_release_bo(device, pool->bos[i][s]);
|
anv_device_release_bo(device, pool->bos[i][s]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < 16; i++) {
|
||||||
|
if (pool->surf_states[i].map != NULL) {
|
||||||
|
anv_state_pool_free(&device->surface_state_pool,
|
||||||
|
pool->surf_states[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct anv_bo *
|
struct anv_bo *
|
||||||
@@ -1433,13 +1440,22 @@ anv_scratch_pool_alloc(struct anv_device *device, struct anv_scratch_pool *pool,
|
|||||||
assert(scratch_size_log2 < 16);
|
assert(scratch_size_log2 < 16);
|
||||||
|
|
||||||
assert(stage < ARRAY_SIZE(pool->bos));
|
assert(stage < ARRAY_SIZE(pool->bos));
|
||||||
|
|
||||||
|
const struct intel_device_info *devinfo = &device->info;
|
||||||
|
|
||||||
|
/* On GFX version 12.5, scratch access changed to a surface-based model.
|
||||||
|
* Instead of each shader type having its own layout based on IDs passed
|
||||||
|
* from the relevant fixed-function unit, all scratch access is based on
|
||||||
|
* thread IDs like it always has been for compute.
|
||||||
|
*/
|
||||||
|
if (devinfo->verx10 >= 125)
|
||||||
|
stage = MESA_SHADER_COMPUTE;
|
||||||
|
|
||||||
struct anv_bo *bo = p_atomic_read(&pool->bos[scratch_size_log2][stage]);
|
struct anv_bo *bo = p_atomic_read(&pool->bos[scratch_size_log2][stage]);
|
||||||
|
|
||||||
if (bo != NULL)
|
if (bo != NULL)
|
||||||
return bo;
|
return bo;
|
||||||
|
|
||||||
const struct intel_device_info *devinfo = &device->info;
|
|
||||||
|
|
||||||
unsigned subslices = MAX2(device->physical->subslice_total, 1);
|
unsigned subslices = MAX2(device->physical->subslice_total, 1);
|
||||||
|
|
||||||
/* The documentation for 3DSTATE_PS "Scratch Space Base Pointer" says:
|
/* The documentation for 3DSTATE_PS "Scratch Space Base Pointer" says:
|
||||||
@@ -1456,7 +1472,9 @@ anv_scratch_pool_alloc(struct anv_device *device, struct anv_scratch_pool *pool,
|
|||||||
* For, Gfx11+, scratch space allocation is based on the number of threads
|
* For, Gfx11+, scratch space allocation is based on the number of threads
|
||||||
* in the base configuration.
|
* in the base configuration.
|
||||||
*/
|
*/
|
||||||
if (devinfo->ver == 12)
|
if (devinfo->verx10 == 125)
|
||||||
|
subslices = 32;
|
||||||
|
else if (devinfo->ver == 12)
|
||||||
subslices = (devinfo->is_dg1 || devinfo->gt == 2 ? 6 : 2);
|
subslices = (devinfo->is_dg1 || devinfo->gt == 2 ? 6 : 2);
|
||||||
else if (devinfo->ver == 11)
|
else if (devinfo->ver == 11)
|
||||||
subslices = 8;
|
subslices = 8;
|
||||||
@@ -1552,6 +1570,50 @@ anv_scratch_pool_alloc(struct anv_device *device, struct anv_scratch_pool *pool,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint32_t
|
||||||
|
anv_scratch_pool_get_surf(struct anv_device *device,
|
||||||
|
struct anv_scratch_pool *pool,
|
||||||
|
unsigned per_thread_scratch)
|
||||||
|
{
|
||||||
|
if (per_thread_scratch == 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
unsigned scratch_size_log2 = ffs(per_thread_scratch / 2048);
|
||||||
|
assert(scratch_size_log2 < 16);
|
||||||
|
|
||||||
|
uint32_t surf = p_atomic_read(&pool->surfs[scratch_size_log2]);
|
||||||
|
if (surf > 0)
|
||||||
|
return surf;
|
||||||
|
|
||||||
|
struct anv_bo *bo =
|
||||||
|
anv_scratch_pool_alloc(device, pool, MESA_SHADER_COMPUTE,
|
||||||
|
per_thread_scratch);
|
||||||
|
struct anv_address addr = { .bo = bo };
|
||||||
|
|
||||||
|
struct anv_state state =
|
||||||
|
anv_state_pool_alloc(&device->surface_state_pool,
|
||||||
|
device->isl_dev.ss.size, 64);
|
||||||
|
|
||||||
|
isl_buffer_fill_state(&device->isl_dev, state.map,
|
||||||
|
.address = anv_address_physical(addr),
|
||||||
|
.size_B = bo->size,
|
||||||
|
.mocs = anv_mocs(device, bo, 0),
|
||||||
|
.format = ISL_FORMAT_RAW,
|
||||||
|
.swizzle = ISL_SWIZZLE_IDENTITY,
|
||||||
|
.stride_B = per_thread_scratch,
|
||||||
|
.is_scratch = true);
|
||||||
|
|
||||||
|
uint32_t current = p_atomic_cmpxchg(&pool->surfs[scratch_size_log2],
|
||||||
|
0, state.offset);
|
||||||
|
if (current) {
|
||||||
|
anv_state_pool_free(&device->surface_state_pool, state);
|
||||||
|
return current;
|
||||||
|
} else {
|
||||||
|
pool->surf_states[scratch_size_log2] = state;
|
||||||
|
return state.offset;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
VkResult
|
VkResult
|
||||||
anv_bo_cache_init(struct anv_bo_cache *cache)
|
anv_bo_cache_init(struct anv_bo_cache *cache)
|
||||||
{
|
{
|
||||||
|
@@ -823,6 +823,8 @@ void anv_bo_pool_free(struct anv_bo_pool *pool, struct anv_bo *bo);
|
|||||||
struct anv_scratch_pool {
|
struct anv_scratch_pool {
|
||||||
/* Indexed by Per-Thread Scratch Space number (the hardware value) and stage */
|
/* Indexed by Per-Thread Scratch Space number (the hardware value) and stage */
|
||||||
struct anv_bo *bos[16][MESA_SHADER_STAGES];
|
struct anv_bo *bos[16][MESA_SHADER_STAGES];
|
||||||
|
uint32_t surfs[16];
|
||||||
|
struct anv_state surf_states[16];
|
||||||
};
|
};
|
||||||
|
|
||||||
void anv_scratch_pool_init(struct anv_device *device,
|
void anv_scratch_pool_init(struct anv_device *device,
|
||||||
@@ -833,6 +835,9 @@ struct anv_bo *anv_scratch_pool_alloc(struct anv_device *device,
|
|||||||
struct anv_scratch_pool *pool,
|
struct anv_scratch_pool *pool,
|
||||||
gl_shader_stage stage,
|
gl_shader_stage stage,
|
||||||
unsigned per_thread_scratch);
|
unsigned per_thread_scratch);
|
||||||
|
uint32_t anv_scratch_pool_get_surf(struct anv_device *device,
|
||||||
|
struct anv_scratch_pool *pool,
|
||||||
|
unsigned per_thread_scratch);
|
||||||
|
|
||||||
/** Implements a BO cache that ensures a 1-1 mapping of GEM BOs to anv_bos */
|
/** Implements a BO cache that ensures a 1-1 mapping of GEM BOs to anv_bos */
|
||||||
struct anv_bo_cache {
|
struct anv_bo_cache {
|
||||||
|
@@ -1703,7 +1703,7 @@ get_sampler_count(const struct anv_shader_bin *bin)
|
|||||||
return MIN2(count_by_4, 4);
|
return MIN2(count_by_4, 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct anv_address
|
static UNUSED struct anv_address
|
||||||
get_scratch_address(struct anv_pipeline *pipeline,
|
get_scratch_address(struct anv_pipeline *pipeline,
|
||||||
gl_shader_stage stage,
|
gl_shader_stage stage,
|
||||||
const struct anv_shader_bin *bin)
|
const struct anv_shader_bin *bin)
|
||||||
@@ -1716,12 +1716,21 @@ get_scratch_address(struct anv_pipeline *pipeline,
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint32_t
|
static UNUSED uint32_t
|
||||||
get_scratch_space(const struct anv_shader_bin *bin)
|
get_scratch_space(const struct anv_shader_bin *bin)
|
||||||
{
|
{
|
||||||
return ffs(bin->prog_data->total_scratch / 2048);
|
return ffs(bin->prog_data->total_scratch / 2048);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static UNUSED uint32_t
|
||||||
|
get_scratch_surf(struct anv_pipeline *pipeline,
|
||||||
|
const struct anv_shader_bin *bin)
|
||||||
|
{
|
||||||
|
return anv_scratch_pool_get_surf(pipeline->device,
|
||||||
|
&pipeline->device->scratch_pool,
|
||||||
|
bin->prog_data->total_scratch) >> 4;
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
emit_3dstate_vs(struct anv_graphics_pipeline *pipeline)
|
emit_3dstate_vs(struct anv_graphics_pipeline *pipeline)
|
||||||
{
|
{
|
||||||
@@ -1792,9 +1801,13 @@ emit_3dstate_vs(struct anv_graphics_pipeline *pipeline)
|
|||||||
vs_prog_data->base.cull_distance_mask;
|
vs_prog_data->base.cull_distance_mask;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if GFX_VERx10 >= 125
|
||||||
|
vs.ScratchSpaceBuffer = get_scratch_surf(&pipeline->base, vs_bin);
|
||||||
|
#else
|
||||||
vs.PerThreadScratchSpace = get_scratch_space(vs_bin);
|
vs.PerThreadScratchSpace = get_scratch_space(vs_bin);
|
||||||
vs.ScratchSpaceBasePointer =
|
vs.ScratchSpaceBasePointer =
|
||||||
get_scratch_address(&pipeline->base, MESA_SHADER_VERTEX, vs_bin);
|
get_scratch_address(&pipeline->base, MESA_SHADER_VERTEX, vs_bin);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1849,10 +1862,13 @@ emit_3dstate_hs_te_ds(struct anv_graphics_pipeline *pipeline,
|
|||||||
tcs_prog_data->base.base.dispatch_grf_start_reg >> 5;
|
tcs_prog_data->base.base.dispatch_grf_start_reg >> 5;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if GFX_VERx10 >= 125
|
||||||
|
hs.ScratchSpaceBuffer = get_scratch_surf(&pipeline->base, tcs_bin);
|
||||||
|
#else
|
||||||
hs.PerThreadScratchSpace = get_scratch_space(tcs_bin);
|
hs.PerThreadScratchSpace = get_scratch_space(tcs_bin);
|
||||||
hs.ScratchSpaceBasePointer =
|
hs.ScratchSpaceBasePointer =
|
||||||
get_scratch_address(&pipeline->base, MESA_SHADER_TESS_CTRL, tcs_bin);
|
get_scratch_address(&pipeline->base, MESA_SHADER_TESS_CTRL, tcs_bin);
|
||||||
|
#endif
|
||||||
|
|
||||||
#if GFX_VER == 12
|
#if GFX_VER == 12
|
||||||
/* Patch Count threshold specifies the maximum number of patches that
|
/* Patch Count threshold specifies the maximum number of patches that
|
||||||
@@ -1930,9 +1946,13 @@ emit_3dstate_hs_te_ds(struct anv_graphics_pipeline *pipeline,
|
|||||||
tes_prog_data->base.cull_distance_mask;
|
tes_prog_data->base.cull_distance_mask;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if GFX_VERx10 >= 125
|
||||||
|
ds.ScratchSpaceBuffer = get_scratch_surf(&pipeline->base, tes_bin);
|
||||||
|
#else
|
||||||
ds.PerThreadScratchSpace = get_scratch_space(tes_bin);
|
ds.PerThreadScratchSpace = get_scratch_space(tes_bin);
|
||||||
ds.ScratchSpaceBasePointer =
|
ds.ScratchSpaceBasePointer =
|
||||||
get_scratch_address(&pipeline->base, MESA_SHADER_TESS_EVAL, tes_bin);
|
get_scratch_address(&pipeline->base, MESA_SHADER_TESS_EVAL, tes_bin);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1998,9 +2018,13 @@ emit_3dstate_gs(struct anv_graphics_pipeline *pipeline)
|
|||||||
gs_prog_data->base.cull_distance_mask;
|
gs_prog_data->base.cull_distance_mask;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if GFX_VERx10 >= 125
|
||||||
|
gs.ScratchSpaceBuffer = get_scratch_surf(&pipeline->base, gs_bin);
|
||||||
|
#else
|
||||||
gs.PerThreadScratchSpace = get_scratch_space(gs_bin);
|
gs.PerThreadScratchSpace = get_scratch_space(gs_bin);
|
||||||
gs.ScratchSpaceBasePointer =
|
gs.ScratchSpaceBasePointer =
|
||||||
get_scratch_address(&pipeline->base, MESA_SHADER_GEOMETRY, gs_bin);
|
get_scratch_address(&pipeline->base, MESA_SHADER_GEOMETRY, gs_bin);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2266,9 +2290,13 @@ emit_3dstate_ps(struct anv_graphics_pipeline *pipeline,
|
|||||||
ps.DispatchGRFStartRegisterForConstantSetupData2 =
|
ps.DispatchGRFStartRegisterForConstantSetupData2 =
|
||||||
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2);
|
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2);
|
||||||
|
|
||||||
|
#if GFX_VERx10 >= 125
|
||||||
|
ps.ScratchSpaceBuffer = get_scratch_surf(&pipeline->base, fs_bin);
|
||||||
|
#else
|
||||||
ps.PerThreadScratchSpace = get_scratch_space(fs_bin);
|
ps.PerThreadScratchSpace = get_scratch_space(fs_bin);
|
||||||
ps.ScratchSpaceBasePointer =
|
ps.ScratchSpaceBasePointer =
|
||||||
get_scratch_address(&pipeline->base, MESA_SHADER_FRAGMENT, fs_bin);
|
get_scratch_address(&pipeline->base, MESA_SHADER_FRAGMENT, fs_bin);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2558,8 +2586,7 @@ emit_compute_state(struct anv_compute_pipeline *pipeline,
|
|||||||
anv_batch_emit(&pipeline->base.batch, GENX(CFE_STATE), cfe) {
|
anv_batch_emit(&pipeline->base.batch, GENX(CFE_STATE), cfe) {
|
||||||
cfe.MaximumNumberofThreads =
|
cfe.MaximumNumberofThreads =
|
||||||
devinfo->max_cs_threads * subslices - 1;
|
devinfo->max_cs_threads * subslices - 1;
|
||||||
/* TODO: Enable gfx12-hp scratch support*/
|
cfe.ScratchSpaceBuffer = get_scratch_surf(&pipeline->base, cs_bin);
|
||||||
assert(get_scratch_space(cs_bin) == 0);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user