diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 805c60a61cb..8fb3c3b4d75 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -637,11 +637,6 @@ private: void generate_halt(fs_inst *inst); - void generate_pack_half_2x16_split(fs_inst *inst, - struct brw_reg dst, - struct brw_reg x, - struct brw_reg y); - void generate_mov_indirect(fs_inst *inst, struct brw_reg dst, struct brw_reg reg, diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index 7a26bff58e1..a99f98b1643 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -1642,55 +1642,6 @@ fs_generator::generate_set_sample_id(fs_inst *inst, } } -void -fs_generator::generate_pack_half_2x16_split(fs_inst *, - struct brw_reg dst, - struct brw_reg x, - struct brw_reg y) -{ - assert(devinfo->ver >= 7); - assert(dst.type == BRW_REGISTER_TYPE_UD); - assert(x.type == BRW_REGISTER_TYPE_F); - assert(y.type == BRW_REGISTER_TYPE_F); - - /* From the Ivybridge PRM, Vol4, Part3, Section 6.27 f32to16: - * - * Because this instruction does not have a 16-bit floating-point type, - * the destination data type must be Word (W). - * - * The destination must be DWord-aligned and specify a horizontal stride - * (HorzStride) of 2. The 16-bit result is stored in the lower word of - * each destination channel and the upper word is not modified. - */ - const enum brw_reg_type t = devinfo->ver > 7 - ? BRW_REGISTER_TYPE_HF : BRW_REGISTER_TYPE_W; - struct brw_reg dst_w = spread(retype(dst, t), 2); - - if (y.file == IMM) { - const uint32_t hhhh0000 = _mesa_float_to_half(y.f) << 16; - - brw_MOV(p, dst, brw_imm_ud(hhhh0000)); - brw_set_default_swsb(p, tgl_swsb_regdist(1)); - } else { - /* Give each 32-bit channel of dst the form below, where "." means - * unchanged. - * 0x....hhhh - */ - brw_F32TO16(p, dst_w, y); - - /* Now the form: - * 0xhhhh0000 - */ - brw_set_default_swsb(p, tgl_swsb_regdist(1)); - brw_SHL(p, dst, dst, brw_imm_ud(16u)); - } - - /* And, finally the form of packHalf2x16's output: - * 0xhhhhllll - */ - brw_F32TO16(p, dst_w, x); -} - void fs_generator::enable_debug(const char *shader_name) { @@ -2350,10 +2301,6 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width, generate_set_sample_id(inst, dst, src[0], src[1]); break; - case FS_OPCODE_PACK_HALF_2x16_SPLIT: - generate_pack_half_2x16_split(inst, dst, src[0], src[1]); - break; - case SHADER_OPCODE_HALT_TARGET: /* This is the place where the final HALT needs to be inserted if * we've emitted any discards. If not, this will emit no code. diff --git a/src/intel/compiler/brw_fs_lower_pack.cpp b/src/intel/compiler/brw_fs_lower_pack.cpp index 0b0f9417513..3a60989ecda 100644 --- a/src/intel/compiler/brw_fs_lower_pack.cpp +++ b/src/intel/compiler/brw_fs_lower_pack.cpp @@ -21,6 +21,7 @@ * IN THE SOFTWARE. */ +#include "util/half_float.h" #include "brw_fs.h" #include "brw_cfg.h" #include "brw_fs_builder.h" @@ -33,7 +34,8 @@ fs_visitor::lower_pack() bool progress = false; foreach_block_and_inst_safe(block, fs_inst, inst, cfg) { - if (inst->opcode != FS_OPCODE_PACK) + if (inst->opcode != FS_OPCODE_PACK && + inst->opcode != FS_OPCODE_PACK_HALF_2x16_SPLIT) continue; assert(inst->dst.file == VGRF); @@ -48,8 +50,36 @@ fs_visitor::lower_pack() */ if (!inst->is_partial_write()) ibld.emit_undef_for_dst(inst); - for (unsigned i = 0; i < inst->sources; i++) - ibld.MOV(subscript(dst, inst->src[i].type, i), inst->src[i]); + + switch (inst->opcode) { + case FS_OPCODE_PACK: + for (unsigned i = 0; i < inst->sources; i++) + ibld.MOV(subscript(dst, inst->src[i].type, i), inst->src[i]); + break; + case FS_OPCODE_PACK_HALF_2x16_SPLIT: + assert(dst.type == BRW_REGISTER_TYPE_UD); + + for (unsigned i = 0; i < inst->sources; i++) { + if (inst->src[i].file == IMM) { + const uint32_t half = _mesa_float_to_half(inst->src[i].f); + ibld.MOV(subscript(dst, BRW_REGISTER_TYPE_UW, i), + brw_imm_uw(half)); + } else if (i == 1 && devinfo->ver < 9) { + /* Pre-Skylake requires DWord aligned destinations */ + fs_reg tmp = ibld.vgrf(BRW_REGISTER_TYPE_UD); + ibld.F32TO16(subscript(tmp, BRW_REGISTER_TYPE_HF, 0), + inst->src[i]); + ibld.MOV(subscript(dst, BRW_REGISTER_TYPE_UW, 1), + subscript(tmp, BRW_REGISTER_TYPE_UW, 0)); + } else { + ibld.F32TO16(subscript(dst, BRW_REGISTER_TYPE_HF, i), + inst->src[i]); + } + } + break; + default: + unreachable("skipped above"); + } inst->remove(block); progress = true;