intel/fs: Rework KSP data to be SIMD width-based
Reviewed-by: Matt Turner <mattst88@gmail.com>
This commit is contained in:
@@ -683,11 +683,11 @@ struct brw_wm_prog_data {
|
||||
|
||||
GLuint num_varying_inputs;
|
||||
|
||||
uint8_t reg_blocks_0;
|
||||
uint8_t reg_blocks_2;
|
||||
uint8_t reg_blocks_8;
|
||||
uint8_t reg_blocks_16;
|
||||
|
||||
uint8_t dispatch_grf_start_reg_2;
|
||||
uint32_t prog_offset_2;
|
||||
uint8_t dispatch_grf_start_reg_16;
|
||||
uint32_t prog_offset_16;
|
||||
|
||||
struct {
|
||||
/** @{
|
||||
@@ -784,51 +784,48 @@ brw_fs_simd_width_for_ksp(unsigned ksp_idx, bool simd8_enabled,
|
||||
|
||||
static inline uint32_t
|
||||
_brw_wm_prog_data_prog_offset(const struct brw_wm_prog_data *prog_data,
|
||||
unsigned ksp_idx)
|
||||
unsigned simd_width)
|
||||
{
|
||||
switch (ksp_idx) {
|
||||
case 0: return 0;
|
||||
case 1: return 0;
|
||||
case 2: return prog_data->prog_offset_2;
|
||||
default:
|
||||
unreachable("Invalid KSP index");
|
||||
switch (simd_width) {
|
||||
case 8: return 0;
|
||||
case 16: return prog_data->prog_offset_16;
|
||||
default: return 0;
|
||||
}
|
||||
}
|
||||
|
||||
#define brw_wm_prog_data_prog_offset(prog_data, wm_state, ksp_idx) \
|
||||
_brw_wm_prog_data_prog_offset(prog_data, ksp_idx)
|
||||
_brw_wm_prog_data_prog_offset(prog_data, \
|
||||
brw_wm_state_simd_width_for_ksp(wm_state, ksp_idx))
|
||||
|
||||
static inline uint8_t
|
||||
_brw_wm_prog_data_dispatch_grf_start_reg(const struct brw_wm_prog_data *prog_data,
|
||||
unsigned ksp_idx)
|
||||
unsigned simd_width)
|
||||
{
|
||||
switch (ksp_idx) {
|
||||
case 0: return prog_data->base.dispatch_grf_start_reg;
|
||||
case 1: return 0;
|
||||
case 2: return prog_data->dispatch_grf_start_reg_2;
|
||||
default:
|
||||
unreachable("Invalid KSP index");
|
||||
switch (simd_width) {
|
||||
case 8: return prog_data->base.dispatch_grf_start_reg;
|
||||
case 16: return prog_data->dispatch_grf_start_reg_16;
|
||||
default: return 0;
|
||||
}
|
||||
}
|
||||
|
||||
#define brw_wm_prog_data_dispatch_grf_start_reg(prog_data, wm_state, ksp_idx) \
|
||||
_brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ksp_idx)
|
||||
_brw_wm_prog_data_dispatch_grf_start_reg(prog_data, \
|
||||
brw_wm_state_simd_width_for_ksp(wm_state, ksp_idx))
|
||||
|
||||
static inline uint8_t
|
||||
_brw_wm_prog_data_reg_blocks(const struct brw_wm_prog_data *prog_data,
|
||||
unsigned ksp_idx)
|
||||
unsigned simd_width)
|
||||
{
|
||||
switch (ksp_idx) {
|
||||
case 0: return prog_data->reg_blocks_0;
|
||||
case 1: return 0;
|
||||
case 2: return prog_data->reg_blocks_2;
|
||||
default:
|
||||
unreachable("Invalid KSP index");
|
||||
switch (simd_width) {
|
||||
case 8: return prog_data->reg_blocks_8;
|
||||
case 16: return prog_data->reg_blocks_16;
|
||||
default: return 0;
|
||||
}
|
||||
}
|
||||
|
||||
#define brw_wm_prog_data_reg_blocks(prog_data, wm_state, ksp_idx) \
|
||||
_brw_wm_prog_data_reg_blocks(prog_data, ksp_idx)
|
||||
_brw_wm_prog_data_reg_blocks(prog_data, \
|
||||
brw_wm_state_simd_width_for_ksp(wm_state, ksp_idx))
|
||||
|
||||
struct brw_push_const_block {
|
||||
unsigned dwords; /* Dword count, not reg aligned */
|
||||
|
@@ -7099,8 +7099,6 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
|
||||
brw_compute_barycentric_interp_modes(compiler->devinfo, shader);
|
||||
|
||||
cfg_t *simd8_cfg = NULL, *simd16_cfg = NULL;
|
||||
uint8_t simd8_grf_start = 0, simd16_grf_start = 0;
|
||||
unsigned simd8_grf_used = 0, simd16_grf_used = 0;
|
||||
|
||||
fs_visitor v8(compiler, log_data, mem_ctx, key,
|
||||
&prog_data->base, prog, shader, 8,
|
||||
@@ -7112,8 +7110,8 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
|
||||
return NULL;
|
||||
} else if (likely(!(INTEL_DEBUG & DEBUG_NO8))) {
|
||||
simd8_cfg = v8.cfg;
|
||||
simd8_grf_start = v8.payload.num_regs;
|
||||
simd8_grf_used = v8.grf_used;
|
||||
prog_data->base.dispatch_grf_start_reg = v8.payload.num_regs;
|
||||
prog_data->reg_blocks_8 = brw_register_blocks(v8.grf_used);
|
||||
}
|
||||
|
||||
if (v8.max_dispatch_width >= 16 &&
|
||||
@@ -7129,8 +7127,8 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
|
||||
v16.fail_msg);
|
||||
} else {
|
||||
simd16_cfg = v16.cfg;
|
||||
simd16_grf_start = v16.payload.num_regs;
|
||||
simd16_grf_used = v16.grf_used;
|
||||
prog_data->dispatch_grf_start_reg_16 = v16.payload.num_regs;
|
||||
prog_data->reg_blocks_16 = brw_register_blocks(v16.grf_used);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7146,6 +7144,16 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
|
||||
if (compiler->devinfo->gen < 5 && simd16_cfg)
|
||||
simd8_cfg = NULL;
|
||||
|
||||
if (compiler->devinfo->gen <= 5 && !simd8_cfg) {
|
||||
/* Iron lake and earlier only have one Dispatch GRF start field. Make
|
||||
* the data available in the base prog data struct for convenience.
|
||||
*/
|
||||
if (simd16_cfg) {
|
||||
prog_data->base.dispatch_grf_start_reg =
|
||||
prog_data->dispatch_grf_start_reg_16;
|
||||
}
|
||||
}
|
||||
|
||||
if (prog_data->persample_dispatch) {
|
||||
/* Starting with SandyBridge (where we first get MSAA), the different
|
||||
* pixel dispatch combinations are grouped into classifications A
|
||||
@@ -7184,20 +7192,11 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
|
||||
if (simd8_cfg) {
|
||||
prog_data->dispatch_8 = true;
|
||||
g.generate_code(simd8_cfg, 8);
|
||||
prog_data->base.dispatch_grf_start_reg = simd8_grf_start;
|
||||
prog_data->reg_blocks_0 = brw_register_blocks(simd8_grf_used);
|
||||
}
|
||||
|
||||
if (simd16_cfg) {
|
||||
prog_data->dispatch_16 = true;
|
||||
prog_data->prog_offset_2 = g.generate_code(simd16_cfg, 16);
|
||||
prog_data->dispatch_grf_start_reg_2 = simd16_grf_start;
|
||||
prog_data->reg_blocks_2 = brw_register_blocks(simd16_grf_used);
|
||||
}
|
||||
} else if (simd16_cfg) {
|
||||
if (simd16_cfg) {
|
||||
prog_data->dispatch_16 = true;
|
||||
g.generate_code(simd16_cfg, 16);
|
||||
prog_data->base.dispatch_grf_start_reg = simd16_grf_start;
|
||||
prog_data->reg_blocks_0 = brw_register_blocks(simd16_grf_used);
|
||||
prog_data->prog_offset_16 = g.generate_code(simd16_cfg, 16);
|
||||
}
|
||||
|
||||
return g.get_assembly();
|
||||
|
@@ -126,7 +126,7 @@ fs_visitor::emit_dummy_fs()
|
||||
stage_prog_data->nr_pull_params = 0;
|
||||
stage_prog_data->curb_read_length = 0;
|
||||
stage_prog_data->dispatch_grf_start_reg = 2;
|
||||
wm_prog_data->dispatch_grf_start_reg_2 = 2;
|
||||
wm_prog_data->dispatch_grf_start_reg_16 = 2;
|
||||
grf_used = 1; /* Gen4-5 don't allow zero GRF blocks */
|
||||
|
||||
calculate_cfg();
|
||||
|
Reference in New Issue
Block a user