i965/fs: Extend back-end interface for limiting the shader dispatch width.

This replaces the current fs_visitor::no16() interface with
fs_visitor::limit_dispatch_width(), which takes an additional
parameter allowing the caller to specify the maximum dispatch width a
shader can be compiled with.

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
This commit is contained in:
Francisco Jerez
2016-05-18 14:39:52 -07:00
parent 2d288cb9ea
commit 37fd13ee2d
3 changed files with 22 additions and 23 deletions

View File

@@ -668,24 +668,26 @@ fs_visitor::fail(const char *format, ...)
} }
/** /**
* Mark this program as impossible to compile in SIMD16 mode. * Mark this program as impossible to compile with dispatch width greater
* than n.
* *
* During the SIMD8 compile (which happens first), we can detect and flag * During the SIMD8 compile (which happens first), we can detect and flag
* things that are unsupported in SIMD16 mode, so the compiler can skip * things that are unsupported in SIMD16+ mode, so the compiler can skip the
* the SIMD16 compile altogether. * SIMD16+ compile altogether.
* *
* During a SIMD16 compile (if one happens anyway), this just calls fail(). * During a compile of dispatch width greater than n (if one happens anyway),
* this just calls fail().
*/ */
void void
fs_visitor::no16(const char *msg) fs_visitor::limit_dispatch_width(unsigned n, const char *msg)
{ {
if (dispatch_width == 16) { if (dispatch_width > n) {
fail("%s", msg); fail("%s", msg);
} else { } else {
simd16_unsupported = true; max_dispatch_width = n;
compiler->shader_perf_log(log_data, compiler->shader_perf_log(log_data,
"SIMD16 shader failed to compile: %s", msg); "Shader dispatch width limited to SIMD%d: %s",
n, msg);
} }
} }
@@ -6328,7 +6330,7 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
simd8_grf_used = v8.grf_used; simd8_grf_used = v8.grf_used;
} }
if (!v8.simd16_unsupported && if (v8.max_dispatch_width >= 16 &&
likely(!(INTEL_DEBUG & DEBUG_NO16) || use_rep_send)) { likely(!(INTEL_DEBUG & DEBUG_NO16) || use_rep_send)) {
/* Try a SIMD16 compile */ /* Try a SIMD16 compile */
fs_visitor v16(compiler, log_data, mem_ctx, key, fs_visitor v16(compiler, log_data, mem_ctx, key,
@@ -6501,8 +6503,8 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
NULL, /* Never used in core profile */ NULL, /* Never used in core profile */
shader, 16, shader_time_index); shader, 16, shader_time_index);
if (likely(!(INTEL_DEBUG & DEBUG_NO16)) && if (likely(!(INTEL_DEBUG & DEBUG_NO16)) &&
!fail_msg && !v8.simd16_unsupported && !fail_msg && v8.max_dispatch_width >= 16 &&
local_workgroup_size <= 16 * max_cs_threads) { simd_required <= 16) {
/* Try a SIMD16 compile */ /* Try a SIMD16 compile */
if (simd_required <= 8) if (simd_required <= 8)
v16.import_uniforms(&v8); v16.import_uniforms(&v8);

View File

@@ -170,7 +170,7 @@ public:
fs_inst *inst); fs_inst *inst);
void vfail(const char *msg, va_list args); void vfail(const char *msg, va_list args);
void fail(const char *msg, ...); void fail(const char *msg, ...);
void no16(const char *msg); void limit_dispatch_width(unsigned n, const char *msg);
void lower_uniform_pull_constant_loads(); void lower_uniform_pull_constant_loads();
bool lower_load_payload(); bool lower_load_payload();
bool lower_pack(); bool lower_pack();
@@ -356,8 +356,6 @@ public:
bool failed; bool failed;
char *fail_msg; char *fail_msg;
bool simd16_unsupported;
char *no16_msg;
/** Register numbers for thread payload fields. */ /** Register numbers for thread payload fields. */
struct thread_payload { struct thread_payload {
@@ -391,8 +389,9 @@ public:
unsigned grf_used; unsigned grf_used;
bool spilled_any_registers; bool spilled_any_registers;
const unsigned dispatch_width; /**< 8 or 16 */ const unsigned dispatch_width; /**< 8, 16 or 32 */
unsigned min_dispatch_width; unsigned min_dispatch_width;
unsigned max_dispatch_width;
int shader_time_index; int shader_time_index;
@@ -505,7 +504,7 @@ private:
const void * const key; const void * const key;
struct brw_stage_prog_data * const prog_data; struct brw_stage_prog_data * const prog_data;
unsigned dispatch_width; /**< 8 or 16 */ unsigned dispatch_width; /**< 8, 16 or 32 */
exec_list discard_halt_patches; exec_list discard_halt_patches;
unsigned promoted_constants; unsigned promoted_constants;

View File

@@ -424,17 +424,16 @@ fs_visitor::emit_fb_writes()
* sounds because the SIMD8 single-source message lacks channel selects * sounds because the SIMD8 single-source message lacks channel selects
* for the second and third subspans. * for the second and third subspans.
*/ */
no16("Missing support for simd16 depth writes on gen6\n"); limit_dispatch_width(8, "Depth writes unsupported in SIMD16+ mode.\n");
} }
if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) { if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) {
/* From the 'Render Target Write message' section of the docs: /* From the 'Render Target Write message' section of the docs:
* "Output Stencil is not supported with SIMD16 Render Target Write * "Output Stencil is not supported with SIMD16 Render Target Write
* Messages." * Messages."
*
* FINISHME: split 16 into 2 8s
*/ */
no16("FINISHME: support 2 simd8 writes for gl_FragStencilRefARB\n"); limit_dispatch_width(8, "gl_FragStencilRefARB unsupported "
"in SIMD16+ mode.\n");
} }
if (do_dual_src) { if (do_dual_src) {
@@ -885,11 +884,10 @@ fs_visitor::init()
min_dispatch_width = 8; min_dispatch_width = 8;
} }
this->max_dispatch_width = 32;
this->prog_data = this->stage_prog_data; this->prog_data = this->stage_prog_data;
this->failed = false; this->failed = false;
this->simd16_unsupported = false;
this->no16_msg = NULL;
this->nir_locals = NULL; this->nir_locals = NULL;
this->nir_ssa_values = NULL; this->nir_ssa_values = NULL;