i965: Fix shared local memory size for Gen9+.
Skylake changes the representation of shared local memory size: Size | 0 kB | 1 kB | 2 kB | 4 kB | 8 kB | 16 kB | 32 kB | 64 kB | ------------------------------------------------------------------- Gen7-8 | 0 | none | none | 1 | 2 | 4 | 8 | 16 | ------------------------------------------------------------------- Gen9+ | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | The old formula would substantially underallocate the amount of space. This fixes GPU hangs on Skylake when running with full thread counts. v2: Fix the Vulkan driver too, use a helper function, and fix the table in the comments and commit message. Cc: "12.0" <mesa-stable@lists.freedesktop.org> Signed-off-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Francisco Jerez <currojerez@riseup.net> Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
This commit is contained in:
@@ -241,15 +241,7 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(prog_data->total_shared <= 64 * 1024);
|
const uint32_t slm_size = encode_slm_size(GEN_GEN, prog_data->total_shared);
|
||||||
uint32_t slm_size = 0;
|
|
||||||
if (prog_data->total_shared > 0) {
|
|
||||||
/* slm_size is in 4k increments, but must be a power of 2. */
|
|
||||||
slm_size = 4 * 1024;
|
|
||||||
while (slm_size < prog_data->total_shared)
|
|
||||||
slm_size <<= 1;
|
|
||||||
slm_size /= 4 * 1024;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct anv_state state =
|
struct anv_state state =
|
||||||
anv_state_pool_emit(&device->dynamic_state_pool,
|
anv_state_pool_emit(&device->dynamic_state_pool,
|
||||||
|
@@ -326,15 +326,7 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(prog_data->total_shared <= 64 * 1024);
|
const uint32_t slm_size = encode_slm_size(GEN_GEN, prog_data->total_shared);
|
||||||
uint32_t slm_size = 0;
|
|
||||||
if (prog_data->total_shared > 0) {
|
|
||||||
/* slm_size is in 4k increments, but must be a power of 2. */
|
|
||||||
slm_size = 4 * 1024;
|
|
||||||
while (slm_size < prog_data->total_shared)
|
|
||||||
slm_size <<= 1;
|
|
||||||
slm_size /= 4 * 1024;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct anv_state state =
|
struct anv_state state =
|
||||||
anv_state_pool_emit(&device->dynamic_state_pool,
|
anv_state_pool_emit(&device->dynamic_state_pool,
|
||||||
|
@@ -26,6 +26,7 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include "brw_device_info.h"
|
#include "brw_device_info.h"
|
||||||
#include "main/mtypes.h"
|
#include "main/mtypes.h"
|
||||||
|
#include "main/macros.h"
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
@@ -831,6 +832,38 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
|
|||||||
unsigned *final_assembly_size,
|
unsigned *final_assembly_size,
|
||||||
char **error_str);
|
char **error_str);
|
||||||
|
|
||||||
|
static inline uint32_t
|
||||||
|
encode_slm_size(const struct brw_device_info *devinfo, uint32_t bytes)
|
||||||
|
{
|
||||||
|
uint32_t slm_size = 0;
|
||||||
|
|
||||||
|
/* Shared Local Memory is specified as powers of two, and encoded in
|
||||||
|
* INTERFACE_DESCRIPTOR_DATA with the following representations:
|
||||||
|
*
|
||||||
|
* Size | 0 kB | 1 kB | 2 kB | 4 kB | 8 kB | 16 kB | 32 kB | 64 kB |
|
||||||
|
* -------------------------------------------------------------------
|
||||||
|
* Gen7-8 | 0 | none | none | 1 | 2 | 4 | 8 | 16 |
|
||||||
|
* -------------------------------------------------------------------
|
||||||
|
* Gen9+ | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
|
||||||
|
*/
|
||||||
|
assert(bytes <= 64 * 1024);
|
||||||
|
|
||||||
|
if (bytes > 0) {
|
||||||
|
/* Shared Local Memory Size is specified as powers of two. */
|
||||||
|
slm_size = util_next_power_of_two(bytes);
|
||||||
|
|
||||||
|
if (devinfo->gen >= 9) {
|
||||||
|
/* Use a minimum of 1kB; turn an exponent of 10 (1024 kB) into 1. */
|
||||||
|
slm_size = ffs(MAX2(slm_size, 1024)) - 10;
|
||||||
|
} else {
|
||||||
|
/* Use a minimum of 4kB; convert to the pre-Gen9 representation. */
|
||||||
|
slm_size = MAX2(slm_size, 4096) / 4096;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return slm_size;
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
} /* extern "C" */
|
} /* extern "C" */
|
||||||
#endif
|
#endif
|
||||||
|
@@ -45,6 +45,7 @@ brw_upload_cs_state(struct brw_context *brw)
|
|||||||
struct brw_stage_state *stage_state = &brw->cs.base;
|
struct brw_stage_state *stage_state = &brw->cs.base;
|
||||||
struct brw_cs_prog_data *cs_prog_data = brw->cs.prog_data;
|
struct brw_cs_prog_data *cs_prog_data = brw->cs.prog_data;
|
||||||
struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
|
struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
|
||||||
|
const struct brw_device_info *devinfo = brw->intelScreen->devinfo;
|
||||||
|
|
||||||
if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
|
if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
|
||||||
brw->vtbl.emit_buffer_surface_state(
|
brw->vtbl.emit_buffer_surface_state(
|
||||||
@@ -147,15 +148,7 @@ brw_upload_cs_state(struct brw_context *brw)
|
|||||||
SET_FIELD(cs_prog_data->threads, MEDIA_GPGPU_THREAD_COUNT);
|
SET_FIELD(cs_prog_data->threads, MEDIA_GPGPU_THREAD_COUNT);
|
||||||
assert(cs_prog_data->threads <= brw->max_cs_threads);
|
assert(cs_prog_data->threads <= brw->max_cs_threads);
|
||||||
|
|
||||||
assert(prog_data->total_shared <= 64 * 1024);
|
const uint32_t slm_size = encode_slm_size(devinfo, prog_data->total_shared);
|
||||||
uint32_t slm_size = 0;
|
|
||||||
if (prog_data->total_shared > 0) {
|
|
||||||
/* slm_size is in 4k increments, but must be a power of 2. */
|
|
||||||
slm_size = 4 * 1024;
|
|
||||||
while (slm_size < prog_data->total_shared)
|
|
||||||
slm_size <<= 1;
|
|
||||||
slm_size /= 4 * 1024;
|
|
||||||
}
|
|
||||||
|
|
||||||
desc[dw++] =
|
desc[dw++] =
|
||||||
SET_FIELD(cs_prog_data->uses_barrier, MEDIA_BARRIER_ENABLE) |
|
SET_FIELD(cs_prog_data->uses_barrier, MEDIA_BARRIER_ENABLE) |
|
||||||
|
Reference in New Issue
Block a user