intel/blorp: Support compute for slow clears
Reworks: * Use BLORP_BATCH_USE_COMPUTE flag rather than compute param to blorp_clear (s-b Jason) * Use nir_load_global_invocation_id (s-b Jason) * Use nir_push_if (s-b Jason) * Use nir_image_store (s-b Jason) * Require gfx12 for ccs in blorp_clear_supports_compute (s-b Jason) * Add nir_pop_if (s-b Ken) * Fix aux_usage check on gfx12 blorp_clear_supports_compute (s-b Ken) * Use blorp_set_cs_dims (s-b Jason) * Simplify rgb-as-red calculation (s-b Jason) * Use dim=2d with array=true for nir_image_store (s-b Jason, Francisco) * discard => bounds (s-b Ken) * Re-add ISL_AUX_USAGE_CCS_E in *_supports_compute (s-b Sagar) Signed-off-by: Jordan Justen <jordan.l.justen@intel.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11564>
This commit is contained in:
@@ -173,6 +173,11 @@ blorp_fast_clear(struct blorp_batch *batch,
|
||||
uint32_t level, uint32_t start_layer, uint32_t num_layers,
|
||||
uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1);
|
||||
|
||||
bool
|
||||
blorp_clear_supports_compute(struct blorp_context *blorp,
|
||||
uint8_t color_write_disable, bool blend_enabled,
|
||||
enum isl_aux_usage aux_usage);
|
||||
|
||||
void
|
||||
blorp_clear(struct blorp_batch *batch,
|
||||
const struct blorp_surf *surf,
|
||||
|
@@ -29,6 +29,7 @@
|
||||
|
||||
#include "blorp_priv.h"
|
||||
#include "compiler/brw_eu_defines.h"
|
||||
#include "dev/intel_debug.h"
|
||||
|
||||
#include "blorp_nir_builder.h"
|
||||
|
||||
@@ -40,11 +41,12 @@ struct brw_blorp_const_color_prog_key
|
||||
struct brw_blorp_base_key base;
|
||||
bool use_simd16_replicated_data;
|
||||
bool clear_rgb_as_red;
|
||||
uint8_t local_y;
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
||||
static bool
|
||||
blorp_params_get_clear_kernel(struct blorp_batch *batch,
|
||||
blorp_params_get_clear_kernel_fs(struct blorp_batch *batch,
|
||||
struct blorp_params *params,
|
||||
bool use_replicated_data,
|
||||
bool clear_rgb_as_red)
|
||||
@@ -53,8 +55,10 @@ blorp_params_get_clear_kernel(struct blorp_batch *batch,
|
||||
|
||||
const struct brw_blorp_const_color_prog_key blorp_key = {
|
||||
.base = BRW_BLORP_BASE_KEY_INIT(BLORP_SHADER_TYPE_CLEAR),
|
||||
.base.shader_pipeline = BLORP_SHADER_PIPELINE_RENDER,
|
||||
.use_simd16_replicated_data = use_replicated_data,
|
||||
.clear_rgb_as_red = clear_rgb_as_red,
|
||||
.local_y = 0,
|
||||
};
|
||||
|
||||
if (blorp->lookup_shader(batch, &blorp_key, sizeof(blorp_key),
|
||||
@@ -103,6 +107,95 @@ blorp_params_get_clear_kernel(struct blorp_batch *batch,
|
||||
return result;
|
||||
}
|
||||
|
||||
static bool
|
||||
blorp_params_get_clear_kernel_cs(struct blorp_batch *batch,
|
||||
struct blorp_params *params,
|
||||
bool clear_rgb_as_red)
|
||||
{
|
||||
struct blorp_context *blorp = batch->blorp;
|
||||
|
||||
const struct brw_blorp_const_color_prog_key blorp_key = {
|
||||
.base = BRW_BLORP_BASE_KEY_INIT(BLORP_SHADER_TYPE_CLEAR),
|
||||
.base.shader_pipeline = BLORP_SHADER_PIPELINE_COMPUTE,
|
||||
.use_simd16_replicated_data = false,
|
||||
.clear_rgb_as_red = clear_rgb_as_red,
|
||||
.local_y = blorp_get_cs_local_y(params),
|
||||
};
|
||||
|
||||
if (blorp->lookup_shader(batch, &blorp_key, sizeof(blorp_key),
|
||||
¶ms->cs_prog_kernel, ¶ms->cs_prog_data))
|
||||
return true;
|
||||
|
||||
void *mem_ctx = ralloc_context(NULL);
|
||||
|
||||
nir_builder b;
|
||||
blorp_nir_init_shader(&b, mem_ctx, MESA_SHADER_COMPUTE, "BLORP-gpgpu-clear");
|
||||
blorp_set_cs_dims(b.shader, blorp_key.local_y);
|
||||
|
||||
nir_ssa_def *dst_pos = nir_load_global_invocation_id(&b, 32);
|
||||
|
||||
nir_variable *v_color =
|
||||
BLORP_CREATE_NIR_INPUT(b.shader, clear_color, glsl_vec4_type());
|
||||
nir_ssa_def *color = nir_load_var(&b, v_color);
|
||||
|
||||
nir_variable *v_bounds_rect =
|
||||
BLORP_CREATE_NIR_INPUT(b.shader, bounds_rect, glsl_vec4_type());
|
||||
nir_ssa_def *bounds_rect = nir_load_var(&b, v_bounds_rect);
|
||||
nir_ssa_def *in_bounds = blorp_check_in_bounds(&b, bounds_rect, dst_pos);
|
||||
|
||||
if (clear_rgb_as_red) {
|
||||
nir_ssa_def *comp = nir_umod(&b, nir_channel(&b, dst_pos, 0),
|
||||
nir_imm_int(&b, 3));
|
||||
color = nir_pad_vec4(&b, nir_vector_extract(&b, color, comp));
|
||||
}
|
||||
|
||||
nir_push_if(&b, in_bounds);
|
||||
|
||||
nir_image_store(&b, nir_imm_int(&b, 0),
|
||||
nir_pad_vector_imm_int(&b, dst_pos, 0, 4),
|
||||
nir_imm_int(&b, 0),
|
||||
nir_pad_vector_imm_int(&b, color, 0, 4),
|
||||
nir_imm_int(&b, 0),
|
||||
.image_dim = GLSL_SAMPLER_DIM_2D,
|
||||
.image_array = true,
|
||||
.access = ACCESS_NON_READABLE);
|
||||
|
||||
nir_pop_if(&b, NULL);
|
||||
|
||||
struct brw_cs_prog_key cs_key;
|
||||
brw_blorp_init_cs_prog_key(&cs_key);
|
||||
|
||||
struct brw_cs_prog_data prog_data;
|
||||
const unsigned *program =
|
||||
blorp_compile_cs(blorp, mem_ctx, b.shader, &cs_key, &prog_data);
|
||||
|
||||
bool result =
|
||||
blorp->upload_shader(batch, MESA_SHADER_COMPUTE,
|
||||
&blorp_key, sizeof(blorp_key),
|
||||
program, prog_data.base.program_size,
|
||||
&prog_data.base, sizeof(prog_data),
|
||||
¶ms->cs_prog_kernel, ¶ms->cs_prog_data);
|
||||
|
||||
ralloc_free(mem_ctx);
|
||||
return result;
|
||||
}
|
||||
|
||||
static bool
|
||||
blorp_params_get_clear_kernel(struct blorp_batch *batch,
|
||||
struct blorp_params *params,
|
||||
bool use_replicated_data,
|
||||
bool clear_rgb_as_red)
|
||||
{
|
||||
if (batch->flags & BLORP_BATCH_USE_COMPUTE) {
|
||||
assert(!use_replicated_data);
|
||||
return blorp_params_get_clear_kernel_cs(batch, params, clear_rgb_as_red);
|
||||
} else {
|
||||
return blorp_params_get_clear_kernel_fs(batch, params,
|
||||
use_replicated_data,
|
||||
clear_rgb_as_red);
|
||||
}
|
||||
}
|
||||
|
||||
#pragma pack(push, 1)
|
||||
struct layer_offset_vs_key {
|
||||
struct brw_blorp_base_key base;
|
||||
@@ -336,6 +429,7 @@ blorp_fast_clear(struct blorp_batch *batch,
|
||||
struct blorp_params params;
|
||||
blorp_params_init(¶ms);
|
||||
params.num_layers = num_layers;
|
||||
assert((batch->flags & BLORP_BATCH_USE_COMPUTE) == 0);
|
||||
|
||||
params.x0 = x0;
|
||||
params.y0 = y0;
|
||||
@@ -370,6 +464,24 @@ blorp_fast_clear(struct blorp_batch *batch,
|
||||
batch->blorp->exec(batch, ¶ms);
|
||||
}
|
||||
|
||||
bool
|
||||
blorp_clear_supports_compute(struct blorp_context *blorp,
|
||||
uint8_t color_write_disable, bool blend_enabled,
|
||||
enum isl_aux_usage aux_usage)
|
||||
{
|
||||
if (blorp->isl_dev->info->ver < 7)
|
||||
return false;
|
||||
if (color_write_disable != 0 || blend_enabled)
|
||||
return false;
|
||||
if (blorp->isl_dev->info->ver >= 12) {
|
||||
return aux_usage == ISL_AUX_USAGE_GFX12_CCS_E ||
|
||||
aux_usage == ISL_AUX_USAGE_CCS_E ||
|
||||
aux_usage == ISL_AUX_USAGE_NONE;
|
||||
} else {
|
||||
return aux_usage == ISL_AUX_USAGE_NONE;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
blorp_clear(struct blorp_batch *batch,
|
||||
const struct blorp_surf *surf,
|
||||
@@ -383,6 +495,11 @@ blorp_clear(struct blorp_batch *batch,
|
||||
blorp_params_init(¶ms);
|
||||
params.snapshot_type = INTEL_SNAPSHOT_SLOW_COLOR_CLEAR;
|
||||
|
||||
const bool compute = batch->flags & BLORP_BATCH_USE_COMPUTE;
|
||||
if (compute)
|
||||
assert(blorp_clear_supports_compute(batch->blorp, color_write_disable,
|
||||
false, surf->aux_usage));
|
||||
|
||||
/* Manually apply the clear destination swizzle. This way swizzled clears
|
||||
* will work for swizzles which we can't normally use for rendering and it
|
||||
* also ensures that they work on pre-Haswell hardware which can't swizlle
|
||||
@@ -431,6 +548,9 @@ blorp_clear(struct blorp_batch *batch,
|
||||
if (batch->blorp->isl_dev->info->ver < 6)
|
||||
use_simd16_replicated_data = false;
|
||||
|
||||
if (compute)
|
||||
use_simd16_replicated_data = false;
|
||||
|
||||
/* Constant color writes ignore everyting in blend and color calculator
|
||||
* state. This is not documented.
|
||||
*/
|
||||
@@ -443,7 +563,7 @@ blorp_clear(struct blorp_batch *batch,
|
||||
clear_rgb_as_red))
|
||||
return;
|
||||
|
||||
if (!blorp_ensure_sf_program(batch, ¶ms))
|
||||
if (!compute && !blorp_ensure_sf_program(batch, ¶ms))
|
||||
return;
|
||||
|
||||
while (num_layers > 0) {
|
||||
@@ -456,6 +576,13 @@ blorp_clear(struct blorp_batch *batch,
|
||||
params.x1 = x1;
|
||||
params.y1 = y1;
|
||||
|
||||
if (compute) {
|
||||
params.wm_inputs.bounds_rect.x0 = x0;
|
||||
params.wm_inputs.bounds_rect.y0 = y0;
|
||||
params.wm_inputs.bounds_rect.x1 = x1;
|
||||
params.wm_inputs.bounds_rect.y1 = y1;
|
||||
}
|
||||
|
||||
if (params.dst.tile_x_sa || params.dst.tile_y_sa) {
|
||||
assert(params.dst.surf.samples == 1);
|
||||
assert(num_layers == 1);
|
||||
@@ -558,6 +685,8 @@ blorp_clear_stencil_as_rgba(struct blorp_batch *batch,
|
||||
uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1,
|
||||
uint8_t stencil_mask, uint8_t stencil_value)
|
||||
{
|
||||
assert((batch->flags & BLORP_BATCH_USE_COMPUTE) == 0);
|
||||
|
||||
/* We only support separate W-tiled stencil for now */
|
||||
if (surf->surf->format != ISL_FORMAT_R8_UINT ||
|
||||
surf->surf->tiling != ISL_TILING_W)
|
||||
@@ -662,6 +791,8 @@ blorp_clear_depth_stencil(struct blorp_batch *batch,
|
||||
bool clear_depth, float depth_value,
|
||||
uint8_t stencil_mask, uint8_t stencil_value)
|
||||
{
|
||||
assert((batch->flags & BLORP_BATCH_USE_COMPUTE) == 0);
|
||||
|
||||
if (!clear_depth && blorp_clear_stencil_as_rgba(batch, stencil, level,
|
||||
start_layer, num_layers,
|
||||
x0, y0, x1, y1,
|
||||
@@ -987,6 +1118,7 @@ blorp_clear_attachments(struct blorp_batch *batch,
|
||||
struct blorp_params params;
|
||||
blorp_params_init(¶ms);
|
||||
|
||||
assert((batch->flags & BLORP_BATCH_USE_COMPUTE) == 0);
|
||||
assert(batch->flags & BLORP_BATCH_NO_EMIT_DEPTH_STENCIL);
|
||||
|
||||
params.x0 = x0;
|
||||
@@ -1045,6 +1177,7 @@ blorp_ccs_resolve(struct blorp_batch *batch,
|
||||
enum isl_format format,
|
||||
enum isl_aux_op resolve_op)
|
||||
{
|
||||
assert((batch->flags & BLORP_BATCH_USE_COMPUTE) == 0);
|
||||
struct blorp_params params;
|
||||
|
||||
blorp_params_init(¶ms);
|
||||
@@ -1261,6 +1394,8 @@ blorp_ccs_ambiguate(struct blorp_batch *batch,
|
||||
struct blorp_surf *surf,
|
||||
uint32_t level, uint32_t layer)
|
||||
{
|
||||
assert((batch->flags & BLORP_BATCH_USE_COMPUTE) == 0);
|
||||
|
||||
if (ISL_GFX_VER(batch->blorp->isl_dev) >= 10) {
|
||||
/* On gfx10 and above, we have a hardware resolve op for this */
|
||||
return blorp_ccs_resolve(batch, surf, level, layer, 1,
|
||||
|
Reference in New Issue
Block a user