diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index c801216fc19..f10df1dcbeb 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -6733,6 +6733,72 @@ fs_visitor::lower_simd_width() return progress; } +/** + * Transform barycentric vectors into the interleaved form expected by the PLN + * instruction and returned by the Gen7+ PI shared function. + * + * For channels 0-15 in SIMD16 mode they are expected to be laid out as + * follows in the register file: + * + * rN+0: X[0-7] + * rN+1: Y[0-7] + * rN+2: X[8-15] + * rN+3: Y[8-15] + * + * There is no need to handle SIMD32 here -- This is expected to be run after + * SIMD lowering, since SIMD lowering relies on vectors having the standard + * component layout. + */ +bool +fs_visitor::lower_barycentrics() +{ + const bool has_interleaved_layout = devinfo->has_pln || devinfo->gen >= 7; + bool progress = false; + + if (stage != MESA_SHADER_FRAGMENT || !has_interleaved_layout) + return false; + + foreach_block_and_inst_safe(block, fs_inst, inst, cfg) { + if (inst->exec_size < 16) + continue; + + const fs_builder ibld(this, block, inst); + const fs_builder ubld = ibld.exec_all().group(8, 0); + + switch (inst->opcode) { + case FS_OPCODE_INTERPOLATE_AT_SAMPLE: + case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: + case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: { + assert(inst->exec_size == 16); + const fs_reg tmp = ibld.vgrf(inst->dst.type, 2); + + for (unsigned i = 0; i < 2; i++) { + for (unsigned g = 0; g < inst->exec_size / 8; g++) { + fs_inst *mov = ibld.at(block, inst->next).group(8, g) + .MOV(horiz_offset(offset(inst->dst, ibld, i), + 8 * g), + offset(tmp, ubld, 2 * g + i)); + mov->predicate = inst->predicate; + mov->predicate_inverse = inst->predicate_inverse; + mov->flag_subreg = inst->flag_subreg; + } + } + + inst->dst = tmp; + progress = true; + break; + } + default: + break; + } + } + + if (progress) + invalidate_live_intervals(); + + return progress; +} + void fs_visitor::dump_instructions() { @@ -7252,6 +7318,7 @@ fs_visitor::optimize() } OPT(lower_simd_width); + OPT(lower_barycentrics); /* After SIMD lowering just in case we had to unroll the EOT send. */ OPT(opt_sampler_eot); diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index d1895c1e960..d84f99db036 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -167,6 +167,7 @@ public: bool lower_integer_multiplication(); bool lower_minmax(); bool lower_simd_width(); + bool lower_barycentrics(); bool lower_scoreboard(); bool opt_combine_constants(); diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 886751d0662..ffaf90764f5 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -3574,13 +3574,12 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, const glsl_interp_mode interpolation = (enum glsl_interp_mode) nir_intrinsic_interp_mode(instr); - fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F, 2); if (nir_src_is_const(instr->src[0])) { unsigned msg_data = nir_src_as_uint(instr->src[0]) << 4; emit_pixel_interpolater_send(bld, FS_OPCODE_INTERPOLATE_AT_SAMPLE, - tmp, + dest, fs_reg(), /* src */ brw_imm_ud(msg_data), interpolation); @@ -3595,7 +3594,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, .SHL(msg_data, sample_id, brw_imm_ud(4u)); emit_pixel_interpolater_send(bld, FS_OPCODE_INTERPOLATE_AT_SAMPLE, - tmp, + dest, fs_reg(), /* src */ component(msg_data, 0), interpolation); @@ -3623,7 +3622,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, fs_inst *inst = emit_pixel_interpolater_send(bld, FS_OPCODE_INTERPOLATE_AT_SAMPLE, - tmp, + dest, fs_reg(), /* src */ component(msg_data, 0), interpolation); @@ -3635,7 +3634,6 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, bld.emit(BRW_OPCODE_WHILE)); } } - shuffle_from_pln_layout(bld, dest, tmp); break; } @@ -3645,7 +3643,6 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]); - fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F, 2); if (const_offset) { assert(nir_src_bit_size(instr->src[0]) == 32); unsigned off_x = MIN2((int)(const_offset[0].f32 * 16), 7) & 0xf; @@ -3653,7 +3650,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, emit_pixel_interpolater_send(bld, FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, - tmp, + dest, fs_reg(), /* src */ brw_imm_ud(off_x | (off_y << 4)), interpolation); @@ -3690,12 +3687,11 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, const enum opcode opcode = FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET; emit_pixel_interpolater_send(bld, opcode, - tmp, + dest, src, brw_imm_ud(0u), interpolation); } - shuffle_from_pln_layout(bld, dest, tmp); break; }