intel/blorp: Support compute for slow clears
Reworks: * Use BLORP_BATCH_USE_COMPUTE flag rather than compute param to blorp_clear (s-b Jason) * Use nir_load_global_invocation_id (s-b Jason) * Use nir_push_if (s-b Jason) * Use nir_image_store (s-b Jason) * Require gfx12 for ccs in blorp_clear_supports_compute (s-b Jason) * Add nir_pop_if (s-b Ken) * Fix aux_usage check on gfx12 blorp_clear_supports_compute (s-b Ken) * Use blorp_set_cs_dims (s-b Jason) * Simplify rgb-as-red calculation (s-b Jason) * Use dim=2d with array=true for nir_image_store (s-b Jason, Francisco) * discard => bounds (s-b Ken) * Re-add ISL_AUX_USAGE_CCS_E in *_supports_compute (s-b Sagar) Signed-off-by: Jordan Justen <jordan.l.justen@intel.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11564>
This commit is contained in:
@@ -173,6 +173,11 @@ blorp_fast_clear(struct blorp_batch *batch,
|
|||||||
uint32_t level, uint32_t start_layer, uint32_t num_layers,
|
uint32_t level, uint32_t start_layer, uint32_t num_layers,
|
||||||
uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1);
|
uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1);
|
||||||
|
|
||||||
|
bool
|
||||||
|
blorp_clear_supports_compute(struct blorp_context *blorp,
|
||||||
|
uint8_t color_write_disable, bool blend_enabled,
|
||||||
|
enum isl_aux_usage aux_usage);
|
||||||
|
|
||||||
void
|
void
|
||||||
blorp_clear(struct blorp_batch *batch,
|
blorp_clear(struct blorp_batch *batch,
|
||||||
const struct blorp_surf *surf,
|
const struct blorp_surf *surf,
|
||||||
|
@@ -29,6 +29,7 @@
|
|||||||
|
|
||||||
#include "blorp_priv.h"
|
#include "blorp_priv.h"
|
||||||
#include "compiler/brw_eu_defines.h"
|
#include "compiler/brw_eu_defines.h"
|
||||||
|
#include "dev/intel_debug.h"
|
||||||
|
|
||||||
#include "blorp_nir_builder.h"
|
#include "blorp_nir_builder.h"
|
||||||
|
|
||||||
@@ -40,11 +41,12 @@ struct brw_blorp_const_color_prog_key
|
|||||||
struct brw_blorp_base_key base;
|
struct brw_blorp_base_key base;
|
||||||
bool use_simd16_replicated_data;
|
bool use_simd16_replicated_data;
|
||||||
bool clear_rgb_as_red;
|
bool clear_rgb_as_red;
|
||||||
|
uint8_t local_y;
|
||||||
};
|
};
|
||||||
#pragma pack(pop)
|
#pragma pack(pop)
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
blorp_params_get_clear_kernel(struct blorp_batch *batch,
|
blorp_params_get_clear_kernel_fs(struct blorp_batch *batch,
|
||||||
struct blorp_params *params,
|
struct blorp_params *params,
|
||||||
bool use_replicated_data,
|
bool use_replicated_data,
|
||||||
bool clear_rgb_as_red)
|
bool clear_rgb_as_red)
|
||||||
@@ -53,8 +55,10 @@ blorp_params_get_clear_kernel(struct blorp_batch *batch,
|
|||||||
|
|
||||||
const struct brw_blorp_const_color_prog_key blorp_key = {
|
const struct brw_blorp_const_color_prog_key blorp_key = {
|
||||||
.base = BRW_BLORP_BASE_KEY_INIT(BLORP_SHADER_TYPE_CLEAR),
|
.base = BRW_BLORP_BASE_KEY_INIT(BLORP_SHADER_TYPE_CLEAR),
|
||||||
|
.base.shader_pipeline = BLORP_SHADER_PIPELINE_RENDER,
|
||||||
.use_simd16_replicated_data = use_replicated_data,
|
.use_simd16_replicated_data = use_replicated_data,
|
||||||
.clear_rgb_as_red = clear_rgb_as_red,
|
.clear_rgb_as_red = clear_rgb_as_red,
|
||||||
|
.local_y = 0,
|
||||||
};
|
};
|
||||||
|
|
||||||
if (blorp->lookup_shader(batch, &blorp_key, sizeof(blorp_key),
|
if (blorp->lookup_shader(batch, &blorp_key, sizeof(blorp_key),
|
||||||
@@ -103,6 +107,95 @@ blorp_params_get_clear_kernel(struct blorp_batch *batch,
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
blorp_params_get_clear_kernel_cs(struct blorp_batch *batch,
|
||||||
|
struct blorp_params *params,
|
||||||
|
bool clear_rgb_as_red)
|
||||||
|
{
|
||||||
|
struct blorp_context *blorp = batch->blorp;
|
||||||
|
|
||||||
|
const struct brw_blorp_const_color_prog_key blorp_key = {
|
||||||
|
.base = BRW_BLORP_BASE_KEY_INIT(BLORP_SHADER_TYPE_CLEAR),
|
||||||
|
.base.shader_pipeline = BLORP_SHADER_PIPELINE_COMPUTE,
|
||||||
|
.use_simd16_replicated_data = false,
|
||||||
|
.clear_rgb_as_red = clear_rgb_as_red,
|
||||||
|
.local_y = blorp_get_cs_local_y(params),
|
||||||
|
};
|
||||||
|
|
||||||
|
if (blorp->lookup_shader(batch, &blorp_key, sizeof(blorp_key),
|
||||||
|
¶ms->cs_prog_kernel, ¶ms->cs_prog_data))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
void *mem_ctx = ralloc_context(NULL);
|
||||||
|
|
||||||
|
nir_builder b;
|
||||||
|
blorp_nir_init_shader(&b, mem_ctx, MESA_SHADER_COMPUTE, "BLORP-gpgpu-clear");
|
||||||
|
blorp_set_cs_dims(b.shader, blorp_key.local_y);
|
||||||
|
|
||||||
|
nir_ssa_def *dst_pos = nir_load_global_invocation_id(&b, 32);
|
||||||
|
|
||||||
|
nir_variable *v_color =
|
||||||
|
BLORP_CREATE_NIR_INPUT(b.shader, clear_color, glsl_vec4_type());
|
||||||
|
nir_ssa_def *color = nir_load_var(&b, v_color);
|
||||||
|
|
||||||
|
nir_variable *v_bounds_rect =
|
||||||
|
BLORP_CREATE_NIR_INPUT(b.shader, bounds_rect, glsl_vec4_type());
|
||||||
|
nir_ssa_def *bounds_rect = nir_load_var(&b, v_bounds_rect);
|
||||||
|
nir_ssa_def *in_bounds = blorp_check_in_bounds(&b, bounds_rect, dst_pos);
|
||||||
|
|
||||||
|
if (clear_rgb_as_red) {
|
||||||
|
nir_ssa_def *comp = nir_umod(&b, nir_channel(&b, dst_pos, 0),
|
||||||
|
nir_imm_int(&b, 3));
|
||||||
|
color = nir_pad_vec4(&b, nir_vector_extract(&b, color, comp));
|
||||||
|
}
|
||||||
|
|
||||||
|
nir_push_if(&b, in_bounds);
|
||||||
|
|
||||||
|
nir_image_store(&b, nir_imm_int(&b, 0),
|
||||||
|
nir_pad_vector_imm_int(&b, dst_pos, 0, 4),
|
||||||
|
nir_imm_int(&b, 0),
|
||||||
|
nir_pad_vector_imm_int(&b, color, 0, 4),
|
||||||
|
nir_imm_int(&b, 0),
|
||||||
|
.image_dim = GLSL_SAMPLER_DIM_2D,
|
||||||
|
.image_array = true,
|
||||||
|
.access = ACCESS_NON_READABLE);
|
||||||
|
|
||||||
|
nir_pop_if(&b, NULL);
|
||||||
|
|
||||||
|
struct brw_cs_prog_key cs_key;
|
||||||
|
brw_blorp_init_cs_prog_key(&cs_key);
|
||||||
|
|
||||||
|
struct brw_cs_prog_data prog_data;
|
||||||
|
const unsigned *program =
|
||||||
|
blorp_compile_cs(blorp, mem_ctx, b.shader, &cs_key, &prog_data);
|
||||||
|
|
||||||
|
bool result =
|
||||||
|
blorp->upload_shader(batch, MESA_SHADER_COMPUTE,
|
||||||
|
&blorp_key, sizeof(blorp_key),
|
||||||
|
program, prog_data.base.program_size,
|
||||||
|
&prog_data.base, sizeof(prog_data),
|
||||||
|
¶ms->cs_prog_kernel, ¶ms->cs_prog_data);
|
||||||
|
|
||||||
|
ralloc_free(mem_ctx);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
blorp_params_get_clear_kernel(struct blorp_batch *batch,
|
||||||
|
struct blorp_params *params,
|
||||||
|
bool use_replicated_data,
|
||||||
|
bool clear_rgb_as_red)
|
||||||
|
{
|
||||||
|
if (batch->flags & BLORP_BATCH_USE_COMPUTE) {
|
||||||
|
assert(!use_replicated_data);
|
||||||
|
return blorp_params_get_clear_kernel_cs(batch, params, clear_rgb_as_red);
|
||||||
|
} else {
|
||||||
|
return blorp_params_get_clear_kernel_fs(batch, params,
|
||||||
|
use_replicated_data,
|
||||||
|
clear_rgb_as_red);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#pragma pack(push, 1)
|
#pragma pack(push, 1)
|
||||||
struct layer_offset_vs_key {
|
struct layer_offset_vs_key {
|
||||||
struct brw_blorp_base_key base;
|
struct brw_blorp_base_key base;
|
||||||
@@ -336,6 +429,7 @@ blorp_fast_clear(struct blorp_batch *batch,
|
|||||||
struct blorp_params params;
|
struct blorp_params params;
|
||||||
blorp_params_init(¶ms);
|
blorp_params_init(¶ms);
|
||||||
params.num_layers = num_layers;
|
params.num_layers = num_layers;
|
||||||
|
assert((batch->flags & BLORP_BATCH_USE_COMPUTE) == 0);
|
||||||
|
|
||||||
params.x0 = x0;
|
params.x0 = x0;
|
||||||
params.y0 = y0;
|
params.y0 = y0;
|
||||||
@@ -370,6 +464,24 @@ blorp_fast_clear(struct blorp_batch *batch,
|
|||||||
batch->blorp->exec(batch, ¶ms);
|
batch->blorp->exec(batch, ¶ms);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
blorp_clear_supports_compute(struct blorp_context *blorp,
|
||||||
|
uint8_t color_write_disable, bool blend_enabled,
|
||||||
|
enum isl_aux_usage aux_usage)
|
||||||
|
{
|
||||||
|
if (blorp->isl_dev->info->ver < 7)
|
||||||
|
return false;
|
||||||
|
if (color_write_disable != 0 || blend_enabled)
|
||||||
|
return false;
|
||||||
|
if (blorp->isl_dev->info->ver >= 12) {
|
||||||
|
return aux_usage == ISL_AUX_USAGE_GFX12_CCS_E ||
|
||||||
|
aux_usage == ISL_AUX_USAGE_CCS_E ||
|
||||||
|
aux_usage == ISL_AUX_USAGE_NONE;
|
||||||
|
} else {
|
||||||
|
return aux_usage == ISL_AUX_USAGE_NONE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
blorp_clear(struct blorp_batch *batch,
|
blorp_clear(struct blorp_batch *batch,
|
||||||
const struct blorp_surf *surf,
|
const struct blorp_surf *surf,
|
||||||
@@ -383,6 +495,11 @@ blorp_clear(struct blorp_batch *batch,
|
|||||||
blorp_params_init(¶ms);
|
blorp_params_init(¶ms);
|
||||||
params.snapshot_type = INTEL_SNAPSHOT_SLOW_COLOR_CLEAR;
|
params.snapshot_type = INTEL_SNAPSHOT_SLOW_COLOR_CLEAR;
|
||||||
|
|
||||||
|
const bool compute = batch->flags & BLORP_BATCH_USE_COMPUTE;
|
||||||
|
if (compute)
|
||||||
|
assert(blorp_clear_supports_compute(batch->blorp, color_write_disable,
|
||||||
|
false, surf->aux_usage));
|
||||||
|
|
||||||
/* Manually apply the clear destination swizzle. This way swizzled clears
|
/* Manually apply the clear destination swizzle. This way swizzled clears
|
||||||
* will work for swizzles which we can't normally use for rendering and it
|
* will work for swizzles which we can't normally use for rendering and it
|
||||||
* also ensures that they work on pre-Haswell hardware which can't swizlle
|
* also ensures that they work on pre-Haswell hardware which can't swizlle
|
||||||
@@ -431,6 +548,9 @@ blorp_clear(struct blorp_batch *batch,
|
|||||||
if (batch->blorp->isl_dev->info->ver < 6)
|
if (batch->blorp->isl_dev->info->ver < 6)
|
||||||
use_simd16_replicated_data = false;
|
use_simd16_replicated_data = false;
|
||||||
|
|
||||||
|
if (compute)
|
||||||
|
use_simd16_replicated_data = false;
|
||||||
|
|
||||||
/* Constant color writes ignore everyting in blend and color calculator
|
/* Constant color writes ignore everyting in blend and color calculator
|
||||||
* state. This is not documented.
|
* state. This is not documented.
|
||||||
*/
|
*/
|
||||||
@@ -443,7 +563,7 @@ blorp_clear(struct blorp_batch *batch,
|
|||||||
clear_rgb_as_red))
|
clear_rgb_as_red))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (!blorp_ensure_sf_program(batch, ¶ms))
|
if (!compute && !blorp_ensure_sf_program(batch, ¶ms))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
while (num_layers > 0) {
|
while (num_layers > 0) {
|
||||||
@@ -456,6 +576,13 @@ blorp_clear(struct blorp_batch *batch,
|
|||||||
params.x1 = x1;
|
params.x1 = x1;
|
||||||
params.y1 = y1;
|
params.y1 = y1;
|
||||||
|
|
||||||
|
if (compute) {
|
||||||
|
params.wm_inputs.bounds_rect.x0 = x0;
|
||||||
|
params.wm_inputs.bounds_rect.y0 = y0;
|
||||||
|
params.wm_inputs.bounds_rect.x1 = x1;
|
||||||
|
params.wm_inputs.bounds_rect.y1 = y1;
|
||||||
|
}
|
||||||
|
|
||||||
if (params.dst.tile_x_sa || params.dst.tile_y_sa) {
|
if (params.dst.tile_x_sa || params.dst.tile_y_sa) {
|
||||||
assert(params.dst.surf.samples == 1);
|
assert(params.dst.surf.samples == 1);
|
||||||
assert(num_layers == 1);
|
assert(num_layers == 1);
|
||||||
@@ -558,6 +685,8 @@ blorp_clear_stencil_as_rgba(struct blorp_batch *batch,
|
|||||||
uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1,
|
uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1,
|
||||||
uint8_t stencil_mask, uint8_t stencil_value)
|
uint8_t stencil_mask, uint8_t stencil_value)
|
||||||
{
|
{
|
||||||
|
assert((batch->flags & BLORP_BATCH_USE_COMPUTE) == 0);
|
||||||
|
|
||||||
/* We only support separate W-tiled stencil for now */
|
/* We only support separate W-tiled stencil for now */
|
||||||
if (surf->surf->format != ISL_FORMAT_R8_UINT ||
|
if (surf->surf->format != ISL_FORMAT_R8_UINT ||
|
||||||
surf->surf->tiling != ISL_TILING_W)
|
surf->surf->tiling != ISL_TILING_W)
|
||||||
@@ -662,6 +791,8 @@ blorp_clear_depth_stencil(struct blorp_batch *batch,
|
|||||||
bool clear_depth, float depth_value,
|
bool clear_depth, float depth_value,
|
||||||
uint8_t stencil_mask, uint8_t stencil_value)
|
uint8_t stencil_mask, uint8_t stencil_value)
|
||||||
{
|
{
|
||||||
|
assert((batch->flags & BLORP_BATCH_USE_COMPUTE) == 0);
|
||||||
|
|
||||||
if (!clear_depth && blorp_clear_stencil_as_rgba(batch, stencil, level,
|
if (!clear_depth && blorp_clear_stencil_as_rgba(batch, stencil, level,
|
||||||
start_layer, num_layers,
|
start_layer, num_layers,
|
||||||
x0, y0, x1, y1,
|
x0, y0, x1, y1,
|
||||||
@@ -987,6 +1118,7 @@ blorp_clear_attachments(struct blorp_batch *batch,
|
|||||||
struct blorp_params params;
|
struct blorp_params params;
|
||||||
blorp_params_init(¶ms);
|
blorp_params_init(¶ms);
|
||||||
|
|
||||||
|
assert((batch->flags & BLORP_BATCH_USE_COMPUTE) == 0);
|
||||||
assert(batch->flags & BLORP_BATCH_NO_EMIT_DEPTH_STENCIL);
|
assert(batch->flags & BLORP_BATCH_NO_EMIT_DEPTH_STENCIL);
|
||||||
|
|
||||||
params.x0 = x0;
|
params.x0 = x0;
|
||||||
@@ -1045,6 +1177,7 @@ blorp_ccs_resolve(struct blorp_batch *batch,
|
|||||||
enum isl_format format,
|
enum isl_format format,
|
||||||
enum isl_aux_op resolve_op)
|
enum isl_aux_op resolve_op)
|
||||||
{
|
{
|
||||||
|
assert((batch->flags & BLORP_BATCH_USE_COMPUTE) == 0);
|
||||||
struct blorp_params params;
|
struct blorp_params params;
|
||||||
|
|
||||||
blorp_params_init(¶ms);
|
blorp_params_init(¶ms);
|
||||||
@@ -1261,6 +1394,8 @@ blorp_ccs_ambiguate(struct blorp_batch *batch,
|
|||||||
struct blorp_surf *surf,
|
struct blorp_surf *surf,
|
||||||
uint32_t level, uint32_t layer)
|
uint32_t level, uint32_t layer)
|
||||||
{
|
{
|
||||||
|
assert((batch->flags & BLORP_BATCH_USE_COMPUTE) == 0);
|
||||||
|
|
||||||
if (ISL_GFX_VER(batch->blorp->isl_dev) >= 10) {
|
if (ISL_GFX_VER(batch->blorp->isl_dev) >= 10) {
|
||||||
/* On gfx10 and above, we have a hardware resolve op for this */
|
/* On gfx10 and above, we have a hardware resolve op for this */
|
||||||
return blorp_ccs_resolve(batch, surf, level, layer, 1,
|
return blorp_ccs_resolve(batch, surf, level, layer, 1,
|
||||||
|
Reference in New Issue
Block a user