intel/blorp: Support compute for slow clears

Reworks:
 * Use BLORP_BATCH_USE_COMPUTE flag rather than compute param to
   blorp_clear (s-b Jason)
 * Use nir_load_global_invocation_id (s-b Jason)
 * Use nir_push_if (s-b Jason)
 * Use nir_image_store (s-b Jason)
 * Require gfx12 for ccs in blorp_clear_supports_compute (s-b Jason)
 * Add nir_pop_if (s-b Ken)
 * Fix aux_usage check on gfx12 blorp_clear_supports_compute (s-b Ken)
 * Use blorp_set_cs_dims (s-b Jason)
 * Simplify rgb-as-red calculation (s-b Jason)
 * Use dim=2d with array=true for nir_image_store (s-b Jason, Francisco)
 * discard => bounds (s-b Ken)
 * Re-add ISL_AUX_USAGE_CCS_E in *_supports_compute (s-b Sagar)

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11564>
This commit is contained in:
Jordan Justen
2018-10-26 12:56:20 -07:00
committed by Marge Bot
parent 2123d59693
commit 22ecb4a10f
2 changed files with 145 additions and 5 deletions

View File

@@ -173,6 +173,11 @@ blorp_fast_clear(struct blorp_batch *batch,
uint32_t level, uint32_t start_layer, uint32_t num_layers,
uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1);
bool
blorp_clear_supports_compute(struct blorp_context *blorp,
uint8_t color_write_disable, bool blend_enabled,
enum isl_aux_usage aux_usage);
void
blorp_clear(struct blorp_batch *batch,
const struct blorp_surf *surf,

View File

@@ -29,6 +29,7 @@
#include "blorp_priv.h"
#include "compiler/brw_eu_defines.h"
#include "dev/intel_debug.h"
#include "blorp_nir_builder.h"
@@ -40,11 +41,12 @@ struct brw_blorp_const_color_prog_key
struct brw_blorp_base_key base;
bool use_simd16_replicated_data;
bool clear_rgb_as_red;
uint8_t local_y;
};
#pragma pack(pop)
static bool
blorp_params_get_clear_kernel(struct blorp_batch *batch,
blorp_params_get_clear_kernel_fs(struct blorp_batch *batch,
struct blorp_params *params,
bool use_replicated_data,
bool clear_rgb_as_red)
@@ -53,8 +55,10 @@ blorp_params_get_clear_kernel(struct blorp_batch *batch,
const struct brw_blorp_const_color_prog_key blorp_key = {
.base = BRW_BLORP_BASE_KEY_INIT(BLORP_SHADER_TYPE_CLEAR),
.base.shader_pipeline = BLORP_SHADER_PIPELINE_RENDER,
.use_simd16_replicated_data = use_replicated_data,
.clear_rgb_as_red = clear_rgb_as_red,
.local_y = 0,
};
if (blorp->lookup_shader(batch, &blorp_key, sizeof(blorp_key),
@@ -103,6 +107,95 @@ blorp_params_get_clear_kernel(struct blorp_batch *batch,
return result;
}
static bool
blorp_params_get_clear_kernel_cs(struct blorp_batch *batch,
struct blorp_params *params,
bool clear_rgb_as_red)
{
struct blorp_context *blorp = batch->blorp;
const struct brw_blorp_const_color_prog_key blorp_key = {
.base = BRW_BLORP_BASE_KEY_INIT(BLORP_SHADER_TYPE_CLEAR),
.base.shader_pipeline = BLORP_SHADER_PIPELINE_COMPUTE,
.use_simd16_replicated_data = false,
.clear_rgb_as_red = clear_rgb_as_red,
.local_y = blorp_get_cs_local_y(params),
};
if (blorp->lookup_shader(batch, &blorp_key, sizeof(blorp_key),
&params->cs_prog_kernel, &params->cs_prog_data))
return true;
void *mem_ctx = ralloc_context(NULL);
nir_builder b;
blorp_nir_init_shader(&b, mem_ctx, MESA_SHADER_COMPUTE, "BLORP-gpgpu-clear");
blorp_set_cs_dims(b.shader, blorp_key.local_y);
nir_ssa_def *dst_pos = nir_load_global_invocation_id(&b, 32);
nir_variable *v_color =
BLORP_CREATE_NIR_INPUT(b.shader, clear_color, glsl_vec4_type());
nir_ssa_def *color = nir_load_var(&b, v_color);
nir_variable *v_bounds_rect =
BLORP_CREATE_NIR_INPUT(b.shader, bounds_rect, glsl_vec4_type());
nir_ssa_def *bounds_rect = nir_load_var(&b, v_bounds_rect);
nir_ssa_def *in_bounds = blorp_check_in_bounds(&b, bounds_rect, dst_pos);
if (clear_rgb_as_red) {
nir_ssa_def *comp = nir_umod(&b, nir_channel(&b, dst_pos, 0),
nir_imm_int(&b, 3));
color = nir_pad_vec4(&b, nir_vector_extract(&b, color, comp));
}
nir_push_if(&b, in_bounds);
nir_image_store(&b, nir_imm_int(&b, 0),
nir_pad_vector_imm_int(&b, dst_pos, 0, 4),
nir_imm_int(&b, 0),
nir_pad_vector_imm_int(&b, color, 0, 4),
nir_imm_int(&b, 0),
.image_dim = GLSL_SAMPLER_DIM_2D,
.image_array = true,
.access = ACCESS_NON_READABLE);
nir_pop_if(&b, NULL);
struct brw_cs_prog_key cs_key;
brw_blorp_init_cs_prog_key(&cs_key);
struct brw_cs_prog_data prog_data;
const unsigned *program =
blorp_compile_cs(blorp, mem_ctx, b.shader, &cs_key, &prog_data);
bool result =
blorp->upload_shader(batch, MESA_SHADER_COMPUTE,
&blorp_key, sizeof(blorp_key),
program, prog_data.base.program_size,
&prog_data.base, sizeof(prog_data),
&params->cs_prog_kernel, &params->cs_prog_data);
ralloc_free(mem_ctx);
return result;
}
static bool
blorp_params_get_clear_kernel(struct blorp_batch *batch,
struct blorp_params *params,
bool use_replicated_data,
bool clear_rgb_as_red)
{
if (batch->flags & BLORP_BATCH_USE_COMPUTE) {
assert(!use_replicated_data);
return blorp_params_get_clear_kernel_cs(batch, params, clear_rgb_as_red);
} else {
return blorp_params_get_clear_kernel_fs(batch, params,
use_replicated_data,
clear_rgb_as_red);
}
}
#pragma pack(push, 1)
struct layer_offset_vs_key {
struct brw_blorp_base_key base;
@@ -336,6 +429,7 @@ blorp_fast_clear(struct blorp_batch *batch,
struct blorp_params params;
blorp_params_init(&params);
params.num_layers = num_layers;
assert((batch->flags & BLORP_BATCH_USE_COMPUTE) == 0);
params.x0 = x0;
params.y0 = y0;
@@ -370,6 +464,24 @@ blorp_fast_clear(struct blorp_batch *batch,
batch->blorp->exec(batch, &params);
}
bool
blorp_clear_supports_compute(struct blorp_context *blorp,
uint8_t color_write_disable, bool blend_enabled,
enum isl_aux_usage aux_usage)
{
if (blorp->isl_dev->info->ver < 7)
return false;
if (color_write_disable != 0 || blend_enabled)
return false;
if (blorp->isl_dev->info->ver >= 12) {
return aux_usage == ISL_AUX_USAGE_GFX12_CCS_E ||
aux_usage == ISL_AUX_USAGE_CCS_E ||
aux_usage == ISL_AUX_USAGE_NONE;
} else {
return aux_usage == ISL_AUX_USAGE_NONE;
}
}
void
blorp_clear(struct blorp_batch *batch,
const struct blorp_surf *surf,
@@ -383,6 +495,11 @@ blorp_clear(struct blorp_batch *batch,
blorp_params_init(&params);
params.snapshot_type = INTEL_SNAPSHOT_SLOW_COLOR_CLEAR;
const bool compute = batch->flags & BLORP_BATCH_USE_COMPUTE;
if (compute)
assert(blorp_clear_supports_compute(batch->blorp, color_write_disable,
false, surf->aux_usage));
/* Manually apply the clear destination swizzle. This way swizzled clears
* will work for swizzles which we can't normally use for rendering and it
* also ensures that they work on pre-Haswell hardware which can't swizlle
@@ -431,6 +548,9 @@ blorp_clear(struct blorp_batch *batch,
if (batch->blorp->isl_dev->info->ver < 6)
use_simd16_replicated_data = false;
if (compute)
use_simd16_replicated_data = false;
/* Constant color writes ignore everyting in blend and color calculator
* state. This is not documented.
*/
@@ -443,7 +563,7 @@ blorp_clear(struct blorp_batch *batch,
clear_rgb_as_red))
return;
if (!blorp_ensure_sf_program(batch, &params))
if (!compute && !blorp_ensure_sf_program(batch, &params))
return;
while (num_layers > 0) {
@@ -456,6 +576,13 @@ blorp_clear(struct blorp_batch *batch,
params.x1 = x1;
params.y1 = y1;
if (compute) {
params.wm_inputs.bounds_rect.x0 = x0;
params.wm_inputs.bounds_rect.y0 = y0;
params.wm_inputs.bounds_rect.x1 = x1;
params.wm_inputs.bounds_rect.y1 = y1;
}
if (params.dst.tile_x_sa || params.dst.tile_y_sa) {
assert(params.dst.surf.samples == 1);
assert(num_layers == 1);
@@ -558,6 +685,8 @@ blorp_clear_stencil_as_rgba(struct blorp_batch *batch,
uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1,
uint8_t stencil_mask, uint8_t stencil_value)
{
assert((batch->flags & BLORP_BATCH_USE_COMPUTE) == 0);
/* We only support separate W-tiled stencil for now */
if (surf->surf->format != ISL_FORMAT_R8_UINT ||
surf->surf->tiling != ISL_TILING_W)
@@ -662,6 +791,8 @@ blorp_clear_depth_stencil(struct blorp_batch *batch,
bool clear_depth, float depth_value,
uint8_t stencil_mask, uint8_t stencil_value)
{
assert((batch->flags & BLORP_BATCH_USE_COMPUTE) == 0);
if (!clear_depth && blorp_clear_stencil_as_rgba(batch, stencil, level,
start_layer, num_layers,
x0, y0, x1, y1,
@@ -987,6 +1118,7 @@ blorp_clear_attachments(struct blorp_batch *batch,
struct blorp_params params;
blorp_params_init(&params);
assert((batch->flags & BLORP_BATCH_USE_COMPUTE) == 0);
assert(batch->flags & BLORP_BATCH_NO_EMIT_DEPTH_STENCIL);
params.x0 = x0;
@@ -1045,6 +1177,7 @@ blorp_ccs_resolve(struct blorp_batch *batch,
enum isl_format format,
enum isl_aux_op resolve_op)
{
assert((batch->flags & BLORP_BATCH_USE_COMPUTE) == 0);
struct blorp_params params;
blorp_params_init(&params);
@@ -1261,6 +1394,8 @@ blorp_ccs_ambiguate(struct blorp_batch *batch,
struct blorp_surf *surf,
uint32_t level, uint32_t layer)
{
assert((batch->flags & BLORP_BATCH_USE_COMPUTE) == 0);
if (ISL_GFX_VER(batch->blorp->isl_dev) >= 10) {
/* On gfx10 and above, we have a hardware resolve op for this */
return blorp_ccs_resolve(batch, surf, level, layer, 1,