i965/fs: Add an allow_spilling flag to brw_compile_fs
This allows us to disable spilling for blorp shaders since blorp state setup doesn't handle spilling. Without this, blorp fails hard if you run with INTEL_DEBUG=spill. Reviewed-by: Francisco Jerez <currojerez@riseup.net> Tested-by: Francisco Jerez <currojerez@riseup.net>
This commit is contained in:
@@ -675,7 +675,8 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline,
|
|||||||
unsigned code_size;
|
unsigned code_size;
|
||||||
const unsigned *shader_code =
|
const unsigned *shader_code =
|
||||||
brw_compile_fs(compiler, NULL, mem_ctx, &key, &prog_data, nir,
|
brw_compile_fs(compiler, NULL, mem_ctx, &key, &prog_data, nir,
|
||||||
NULL, -1, -1, pipeline->use_repclear, &code_size, NULL);
|
NULL, -1, -1, true, pipeline->use_repclear,
|
||||||
|
&code_size, NULL);
|
||||||
if (shader_code == NULL) {
|
if (shader_code == NULL) {
|
||||||
ralloc_free(mem_ctx);
|
ralloc_free(mem_ctx);
|
||||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||||
|
@@ -223,7 +223,7 @@ brw_blorp_compile_nir_shader(struct brw_context *brw, struct nir_shader *nir,
|
|||||||
|
|
||||||
const unsigned *program =
|
const unsigned *program =
|
||||||
brw_compile_fs(compiler, brw, mem_ctx, wm_key, &wm_prog_data, nir,
|
brw_compile_fs(compiler, brw, mem_ctx, wm_key, &wm_prog_data, nir,
|
||||||
NULL, -1, -1, use_repclear, program_size, NULL);
|
NULL, -1, -1, false, use_repclear, program_size, NULL);
|
||||||
|
|
||||||
/* Copy the relavent bits of wm_prog_data over into the blorp prog data */
|
/* Copy the relavent bits of wm_prog_data over into the blorp prog data */
|
||||||
prog_data->dispatch_8 = wm_prog_data.dispatch_8;
|
prog_data->dispatch_8 = wm_prog_data.dispatch_8;
|
||||||
|
@@ -790,6 +790,7 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
|
|||||||
struct gl_program *prog,
|
struct gl_program *prog,
|
||||||
int shader_time_index8,
|
int shader_time_index8,
|
||||||
int shader_time_index16,
|
int shader_time_index16,
|
||||||
|
bool allow_spilling,
|
||||||
bool use_rep_send,
|
bool use_rep_send,
|
||||||
unsigned *final_assembly_size,
|
unsigned *final_assembly_size,
|
||||||
char **error_str);
|
char **error_str);
|
||||||
|
@@ -5486,7 +5486,7 @@ fs_visitor::fixup_3src_null_dest()
|
|||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
fs_visitor::allocate_registers()
|
fs_visitor::allocate_registers(bool allow_spilling)
|
||||||
{
|
{
|
||||||
bool allocated_without_spills;
|
bool allocated_without_spills;
|
||||||
|
|
||||||
@@ -5496,6 +5496,8 @@ fs_visitor::allocate_registers()
|
|||||||
SCHEDULE_PRE_LIFO,
|
SCHEDULE_PRE_LIFO,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
bool spill_all = allow_spilling && (INTEL_DEBUG & DEBUG_SPILL_FS);
|
||||||
|
|
||||||
/* Try each scheduling heuristic to see if it can successfully register
|
/* Try each scheduling heuristic to see if it can successfully register
|
||||||
* allocate without spilling. They should be ordered by decreasing
|
* allocate without spilling. They should be ordered by decreasing
|
||||||
* performance but increasing likelihood of allocating.
|
* performance but increasing likelihood of allocating.
|
||||||
@@ -5507,7 +5509,7 @@ fs_visitor::allocate_registers()
|
|||||||
assign_regs_trivial();
|
assign_regs_trivial();
|
||||||
allocated_without_spills = true;
|
allocated_without_spills = true;
|
||||||
} else {
|
} else {
|
||||||
allocated_without_spills = assign_regs(false);
|
allocated_without_spills = assign_regs(false, spill_all);
|
||||||
}
|
}
|
||||||
if (allocated_without_spills)
|
if (allocated_without_spills)
|
||||||
break;
|
break;
|
||||||
@@ -5532,12 +5534,14 @@ fs_visitor::allocate_registers()
|
|||||||
/* Since we're out of heuristics, just go spill registers until we
|
/* Since we're out of heuristics, just go spill registers until we
|
||||||
* get an allocation.
|
* get an allocation.
|
||||||
*/
|
*/
|
||||||
while (!assign_regs(true)) {
|
while (!assign_regs(true, spill_all)) {
|
||||||
if (failed)
|
if (failed)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
assert(last_scratch == 0 || allow_spilling);
|
||||||
|
|
||||||
/* This must come after all optimization and register allocation, since
|
/* This must come after all optimization and register allocation, since
|
||||||
* it inserts dead code that happens to have side effects, and it does
|
* it inserts dead code that happens to have side effects, and it does
|
||||||
* so based on the actual physical registers in use.
|
* so based on the actual physical registers in use.
|
||||||
@@ -5583,7 +5587,7 @@ fs_visitor::run_vs(gl_clip_plane *clip_planes)
|
|||||||
assign_vs_urb_setup();
|
assign_vs_urb_setup();
|
||||||
|
|
||||||
fixup_3src_null_dest();
|
fixup_3src_null_dest();
|
||||||
allocate_registers();
|
allocate_registers(true);
|
||||||
|
|
||||||
return !failed;
|
return !failed;
|
||||||
}
|
}
|
||||||
@@ -5665,7 +5669,7 @@ fs_visitor::run_tcs_single_patch()
|
|||||||
assign_tcs_single_patch_urb_setup();
|
assign_tcs_single_patch_urb_setup();
|
||||||
|
|
||||||
fixup_3src_null_dest();
|
fixup_3src_null_dest();
|
||||||
allocate_registers();
|
allocate_registers(true);
|
||||||
|
|
||||||
return !failed;
|
return !failed;
|
||||||
}
|
}
|
||||||
@@ -5699,7 +5703,7 @@ fs_visitor::run_tes()
|
|||||||
assign_tes_urb_setup();
|
assign_tes_urb_setup();
|
||||||
|
|
||||||
fixup_3src_null_dest();
|
fixup_3src_null_dest();
|
||||||
allocate_registers();
|
allocate_registers(true);
|
||||||
|
|
||||||
return !failed;
|
return !failed;
|
||||||
}
|
}
|
||||||
@@ -5748,13 +5752,13 @@ fs_visitor::run_gs()
|
|||||||
assign_gs_urb_setup();
|
assign_gs_urb_setup();
|
||||||
|
|
||||||
fixup_3src_null_dest();
|
fixup_3src_null_dest();
|
||||||
allocate_registers();
|
allocate_registers(true);
|
||||||
|
|
||||||
return !failed;
|
return !failed;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
fs_visitor::run_fs(bool do_rep_send)
|
fs_visitor::run_fs(bool allow_spilling, bool do_rep_send)
|
||||||
{
|
{
|
||||||
brw_wm_prog_data *wm_prog_data = (brw_wm_prog_data *) this->prog_data;
|
brw_wm_prog_data *wm_prog_data = (brw_wm_prog_data *) this->prog_data;
|
||||||
brw_wm_prog_key *wm_key = (brw_wm_prog_key *) this->key;
|
brw_wm_prog_key *wm_key = (brw_wm_prog_key *) this->key;
|
||||||
@@ -5818,7 +5822,7 @@ fs_visitor::run_fs(bool do_rep_send)
|
|||||||
assign_urb_setup();
|
assign_urb_setup();
|
||||||
|
|
||||||
fixup_3src_null_dest();
|
fixup_3src_null_dest();
|
||||||
allocate_registers();
|
allocate_registers(allow_spilling);
|
||||||
|
|
||||||
if (failed)
|
if (failed)
|
||||||
return false;
|
return false;
|
||||||
@@ -5861,7 +5865,7 @@ fs_visitor::run_cs()
|
|||||||
assign_curb_setup();
|
assign_curb_setup();
|
||||||
|
|
||||||
fixup_3src_null_dest();
|
fixup_3src_null_dest();
|
||||||
allocate_registers();
|
allocate_registers(true);
|
||||||
|
|
||||||
if (failed)
|
if (failed)
|
||||||
return false;
|
return false;
|
||||||
@@ -5986,6 +5990,7 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
|
|||||||
const nir_shader *src_shader,
|
const nir_shader *src_shader,
|
||||||
struct gl_program *prog,
|
struct gl_program *prog,
|
||||||
int shader_time_index8, int shader_time_index16,
|
int shader_time_index8, int shader_time_index16,
|
||||||
|
bool allow_spilling,
|
||||||
bool use_rep_send,
|
bool use_rep_send,
|
||||||
unsigned *final_assembly_size,
|
unsigned *final_assembly_size,
|
||||||
char **error_str)
|
char **error_str)
|
||||||
@@ -6029,7 +6034,7 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
|
|||||||
fs_visitor v8(compiler, log_data, mem_ctx, key,
|
fs_visitor v8(compiler, log_data, mem_ctx, key,
|
||||||
&prog_data->base, prog, shader, 8,
|
&prog_data->base, prog, shader, 8,
|
||||||
shader_time_index8);
|
shader_time_index8);
|
||||||
if (!v8.run_fs(false /* do_rep_send */)) {
|
if (!v8.run_fs(allow_spilling, false /* do_rep_send */)) {
|
||||||
if (error_str)
|
if (error_str)
|
||||||
*error_str = ralloc_strdup(mem_ctx, v8.fail_msg);
|
*error_str = ralloc_strdup(mem_ctx, v8.fail_msg);
|
||||||
|
|
||||||
@@ -6047,7 +6052,7 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
|
|||||||
&prog_data->base, prog, shader, 16,
|
&prog_data->base, prog, shader, 16,
|
||||||
shader_time_index16);
|
shader_time_index16);
|
||||||
v16.import_uniforms(&v8);
|
v16.import_uniforms(&v8);
|
||||||
if (!v16.run_fs(use_rep_send)) {
|
if (!v16.run_fs(allow_spilling, use_rep_send)) {
|
||||||
compiler->shader_perf_log(log_data,
|
compiler->shader_perf_log(log_data,
|
||||||
"SIMD16 shader failed to compile: %s",
|
"SIMD16 shader failed to compile: %s",
|
||||||
v16.fail_msg);
|
v16.fail_msg);
|
||||||
|
@@ -105,14 +105,14 @@ public:
|
|||||||
uint32_t const_offset);
|
uint32_t const_offset);
|
||||||
void DEP_RESOLVE_MOV(const brw::fs_builder &bld, int grf);
|
void DEP_RESOLVE_MOV(const brw::fs_builder &bld, int grf);
|
||||||
|
|
||||||
bool run_fs(bool do_rep_send);
|
bool run_fs(bool allow_spilling, bool do_rep_send);
|
||||||
bool run_vs(gl_clip_plane *clip_planes);
|
bool run_vs(gl_clip_plane *clip_planes);
|
||||||
bool run_tcs_single_patch();
|
bool run_tcs_single_patch();
|
||||||
bool run_tes();
|
bool run_tes();
|
||||||
bool run_gs();
|
bool run_gs();
|
||||||
bool run_cs();
|
bool run_cs();
|
||||||
void optimize();
|
void optimize();
|
||||||
void allocate_registers();
|
void allocate_registers(bool allow_spilling);
|
||||||
void setup_fs_payload_gen4();
|
void setup_fs_payload_gen4();
|
||||||
void setup_fs_payload_gen6();
|
void setup_fs_payload_gen6();
|
||||||
void setup_vs_payload();
|
void setup_vs_payload();
|
||||||
@@ -127,7 +127,7 @@ public:
|
|||||||
void assign_tcs_single_patch_urb_setup();
|
void assign_tcs_single_patch_urb_setup();
|
||||||
void assign_tes_urb_setup();
|
void assign_tes_urb_setup();
|
||||||
void assign_gs_urb_setup();
|
void assign_gs_urb_setup();
|
||||||
bool assign_regs(bool allow_spilling);
|
bool assign_regs(bool allow_spilling, bool spill_all);
|
||||||
void assign_regs_trivial();
|
void assign_regs_trivial();
|
||||||
void calculate_payload_ranges(int payload_node_count,
|
void calculate_payload_ranges(int payload_node_count,
|
||||||
int *payload_last_use_ip);
|
int *payload_last_use_ip);
|
||||||
|
@@ -542,7 +542,7 @@ setup_mrf_hack_interference(fs_visitor *v, struct ra_graph *g,
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
fs_visitor::assign_regs(bool allow_spilling)
|
fs_visitor::assign_regs(bool allow_spilling, bool spill_all)
|
||||||
{
|
{
|
||||||
/* Most of this allocation was written for a reg_width of 1
|
/* Most of this allocation was written for a reg_width of 1
|
||||||
* (dispatch_width == 8). In extending to SIMD16, the code was
|
* (dispatch_width == 8). In extending to SIMD16, the code was
|
||||||
@@ -668,7 +668,7 @@ fs_visitor::assign_regs(bool allow_spilling)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Debug of register spilling: Go spill everything. */
|
/* Debug of register spilling: Go spill everything. */
|
||||||
if (unlikely(INTEL_DEBUG & DEBUG_SPILL_FS)) {
|
if (unlikely(spill_all)) {
|
||||||
int reg = choose_spill_reg(g);
|
int reg = choose_spill_reg(g);
|
||||||
|
|
||||||
if (reg != -1) {
|
if (reg != -1) {
|
||||||
|
@@ -137,7 +137,8 @@ brw_codegen_wm_prog(struct brw_context *brw,
|
|||||||
program = brw_compile_fs(brw->intelScreen->compiler, brw, mem_ctx,
|
program = brw_compile_fs(brw->intelScreen->compiler, brw, mem_ctx,
|
||||||
key, &prog_data, fp->program.Base.nir,
|
key, &prog_data, fp->program.Base.nir,
|
||||||
&fp->program.Base, st_index8, st_index16,
|
&fp->program.Base, st_index8, st_index16,
|
||||||
brw->use_rep_send, &program_size, &error_str);
|
true, brw->use_rep_send,
|
||||||
|
&program_size, &error_str);
|
||||||
if (program == NULL) {
|
if (program == NULL) {
|
||||||
if (prog) {
|
if (prog) {
|
||||||
prog->LinkStatus = false;
|
prog->LinkStatus = false;
|
||||||
|
Reference in New Issue
Block a user