i965/fs: Add support for bit instructions.
Don't bother scalarizing ir_binop_bfm, since its results are identical for all channels. v2: Subtract result of FBH from 31 (unless an error) to convert MSB counts to LSB counts. v3: Use op0->clone() in ir_triop_bfi to prevent (var_ref channel_expressions) from appearing multiple times in the IR. Reviewed-by: Chris Forbes <chrisf@ijw.co.nz> [v2]
This commit is contained in:
@@ -173,6 +173,13 @@ ALU2(SHL)
|
|||||||
ALU2(SHR)
|
ALU2(SHR)
|
||||||
ALU2(ASR)
|
ALU2(ASR)
|
||||||
ALU3(LRP)
|
ALU3(LRP)
|
||||||
|
ALU1(BFREV)
|
||||||
|
ALU3(BFE)
|
||||||
|
ALU2(BFI1)
|
||||||
|
ALU3(BFI2)
|
||||||
|
ALU1(FBH)
|
||||||
|
ALU1(FBL)
|
||||||
|
ALU1(CBIT)
|
||||||
|
|
||||||
/** Gen4 predicated IF. */
|
/** Gen4 predicated IF. */
|
||||||
fs_inst *
|
fs_inst *
|
||||||
|
@@ -276,6 +276,13 @@ public:
|
|||||||
uint32_t condition);
|
uint32_t condition);
|
||||||
fs_inst *LRP(fs_reg dst, fs_reg a, fs_reg y, fs_reg x);
|
fs_inst *LRP(fs_reg dst, fs_reg a, fs_reg y, fs_reg x);
|
||||||
fs_inst *DEP_RESOLVE_MOV(int grf);
|
fs_inst *DEP_RESOLVE_MOV(int grf);
|
||||||
|
fs_inst *BFREV(fs_reg dst, fs_reg value);
|
||||||
|
fs_inst *BFE(fs_reg dst, fs_reg bits, fs_reg offset, fs_reg value);
|
||||||
|
fs_inst *BFI1(fs_reg dst, fs_reg bits, fs_reg offset);
|
||||||
|
fs_inst *BFI2(fs_reg dst, fs_reg bfi1_dst, fs_reg insert, fs_reg base);
|
||||||
|
fs_inst *FBH(fs_reg dst, fs_reg value);
|
||||||
|
fs_inst *FBL(fs_reg dst, fs_reg value);
|
||||||
|
fs_inst *CBIT(fs_reg dst, fs_reg value);
|
||||||
|
|
||||||
int type_size(const struct glsl_type *type);
|
int type_size(const struct glsl_type *type);
|
||||||
fs_inst *get_instruction_generating_reg(fs_inst *start,
|
fs_inst *get_instruction_generating_reg(fs_inst *start,
|
||||||
|
@@ -216,6 +216,10 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
|
|||||||
case ir_unop_cos_reduced:
|
case ir_unop_cos_reduced:
|
||||||
case ir_unop_dFdx:
|
case ir_unop_dFdx:
|
||||||
case ir_unop_dFdy:
|
case ir_unop_dFdy:
|
||||||
|
case ir_unop_bitfield_reverse:
|
||||||
|
case ir_unop_bit_count:
|
||||||
|
case ir_unop_find_msb:
|
||||||
|
case ir_unop_find_lsb:
|
||||||
for (i = 0; i < vector_elements; i++) {
|
for (i = 0; i < vector_elements; i++) {
|
||||||
ir_rvalue *op0 = get_element(op_var[0], i);
|
ir_rvalue *op0 = get_element(op_var[0], i);
|
||||||
|
|
||||||
@@ -338,11 +342,26 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
|
|||||||
assert(!"noise should have been broken down to function call");
|
assert(!"noise should have been broken down to function call");
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case ir_binop_bfm: {
|
||||||
|
/* Does not need to be scalarized, since its result will be identical
|
||||||
|
* for all channels.
|
||||||
|
*/
|
||||||
|
ir_rvalue *op0 = get_element(op_var[0], 0);
|
||||||
|
ir_rvalue *op1 = get_element(op_var[1], 0);
|
||||||
|
|
||||||
|
assign(ir, 0, new(mem_ctx) ir_expression(expr->operation,
|
||||||
|
element_type,
|
||||||
|
op0,
|
||||||
|
op1));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
case ir_binop_ubo_load:
|
case ir_binop_ubo_load:
|
||||||
assert(!"not yet supported");
|
assert(!"not yet supported");
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case ir_triop_lrp:
|
case ir_triop_lrp:
|
||||||
|
case ir_triop_bitfield_extract:
|
||||||
for (i = 0; i < vector_elements; i++) {
|
for (i = 0; i < vector_elements; i++) {
|
||||||
ir_rvalue *op0 = get_element(op_var[0], i);
|
ir_rvalue *op0 = get_element(op_var[0], i);
|
||||||
ir_rvalue *op1 = get_element(op_var[1], i);
|
ir_rvalue *op1 = get_element(op_var[1], i);
|
||||||
@@ -356,6 +375,23 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case ir_triop_bfi: {
|
||||||
|
/* Only a single BFM is needed for multiple BFIs. */
|
||||||
|
ir_rvalue *op0 = get_element(op_var[0], 0);
|
||||||
|
|
||||||
|
for (i = 0; i < vector_elements; i++) {
|
||||||
|
ir_rvalue *op1 = get_element(op_var[1], i);
|
||||||
|
ir_rvalue *op2 = get_element(op_var[2], i);
|
||||||
|
|
||||||
|
assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
|
||||||
|
element_type,
|
||||||
|
op0->clone(mem_ctx, NULL),
|
||||||
|
op1,
|
||||||
|
op2));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
case ir_unop_pack_snorm_2x16:
|
case ir_unop_pack_snorm_2x16:
|
||||||
case ir_unop_pack_snorm_4x8:
|
case ir_unop_pack_snorm_4x8:
|
||||||
case ir_unop_pack_unorm_2x16:
|
case ir_unop_pack_unorm_2x16:
|
||||||
@@ -366,6 +402,7 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
|
|||||||
case ir_unop_unpack_unorm_2x16:
|
case ir_unop_unpack_unorm_2x16:
|
||||||
case ir_unop_unpack_unorm_4x8:
|
case ir_unop_unpack_unorm_4x8:
|
||||||
case ir_unop_unpack_half_2x16:
|
case ir_unop_unpack_half_2x16:
|
||||||
|
case ir_quadop_bitfield_insert:
|
||||||
case ir_quadop_vector:
|
case ir_quadop_vector:
|
||||||
assert(!"should have been lowered");
|
assert(!"should have been lowered");
|
||||||
break;
|
break;
|
||||||
|
@@ -1209,6 +1209,54 @@ fs_generator::generate_code(exec_list *instructions)
|
|||||||
case BRW_OPCODE_SEL:
|
case BRW_OPCODE_SEL:
|
||||||
brw_SEL(p, dst, src[0], src[1]);
|
brw_SEL(p, dst, src[0], src[1]);
|
||||||
break;
|
break;
|
||||||
|
case BRW_OPCODE_BFREV:
|
||||||
|
/* BFREV only supports UD type for src and dst. */
|
||||||
|
brw_BFREV(p, retype(dst, BRW_REGISTER_TYPE_UD),
|
||||||
|
retype(src[0], BRW_REGISTER_TYPE_UD));
|
||||||
|
break;
|
||||||
|
case BRW_OPCODE_FBH:
|
||||||
|
/* FBH only supports UD type for dst. */
|
||||||
|
brw_FBH(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
|
||||||
|
break;
|
||||||
|
case BRW_OPCODE_FBL:
|
||||||
|
/* FBL only supports UD type for dst. */
|
||||||
|
brw_FBL(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
|
||||||
|
break;
|
||||||
|
case BRW_OPCODE_CBIT:
|
||||||
|
/* CBIT only supports UD type for dst. */
|
||||||
|
brw_CBIT(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case BRW_OPCODE_BFE:
|
||||||
|
brw_set_access_mode(p, BRW_ALIGN_16);
|
||||||
|
if (dispatch_width == 16) {
|
||||||
|
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
|
||||||
|
brw_BFE(p, dst, src[0], src[1], src[2]);
|
||||||
|
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
|
||||||
|
brw_BFE(p, sechalf(dst), sechalf(src[0]), sechalf(src[1]), sechalf(src[2]));
|
||||||
|
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
|
||||||
|
} else {
|
||||||
|
brw_BFE(p, dst, src[0], src[1], src[2]);
|
||||||
|
}
|
||||||
|
brw_set_access_mode(p, BRW_ALIGN_1);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case BRW_OPCODE_BFI1:
|
||||||
|
brw_BFI1(p, dst, src[0], src[1]);
|
||||||
|
break;
|
||||||
|
case BRW_OPCODE_BFI2:
|
||||||
|
brw_set_access_mode(p, BRW_ALIGN_16);
|
||||||
|
if (dispatch_width == 16) {
|
||||||
|
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
|
||||||
|
brw_BFI2(p, dst, src[0], src[1], src[2]);
|
||||||
|
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
|
||||||
|
brw_BFI2(p, sechalf(dst), sechalf(src[0]), sechalf(src[1]), sechalf(src[2]));
|
||||||
|
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
|
||||||
|
} else {
|
||||||
|
brw_BFI2(p, dst, src[0], src[1], src[2]);
|
||||||
|
}
|
||||||
|
brw_set_access_mode(p, BRW_ALIGN_1);
|
||||||
|
break;
|
||||||
|
|
||||||
case BRW_OPCODE_IF:
|
case BRW_OPCODE_IF:
|
||||||
if (inst->src[0].file != BAD_FILE) {
|
if (inst->src[0].file != BAD_FILE) {
|
||||||
|
@@ -587,6 +587,49 @@ fs_visitor::visit(ir_expression *ir)
|
|||||||
emit_math(SHADER_OPCODE_POW, this->result, op[0], op[1]);
|
emit_math(SHADER_OPCODE_POW, this->result, op[0], op[1]);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case ir_unop_bitfield_reverse:
|
||||||
|
emit(BFREV(this->result, op[0]));
|
||||||
|
break;
|
||||||
|
case ir_unop_bit_count:
|
||||||
|
emit(CBIT(this->result, op[0]));
|
||||||
|
break;
|
||||||
|
case ir_unop_find_msb:
|
||||||
|
temp = fs_reg(this, glsl_type::uint_type);
|
||||||
|
emit(FBH(temp, op[0]));
|
||||||
|
|
||||||
|
/* FBH counts from the MSB side, while GLSL's findMSB() wants the count
|
||||||
|
* from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
|
||||||
|
* subtract the result from 31 to convert the MSB count into an LSB count.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* FBH only supports UD type for dst, so use a MOV to convert UD to D. */
|
||||||
|
emit(MOV(this->result, temp));
|
||||||
|
emit(CMP(reg_null_d, this->result, fs_reg(-1), BRW_CONDITIONAL_NZ));
|
||||||
|
|
||||||
|
temp.negate = true;
|
||||||
|
inst = emit(ADD(this->result, temp, fs_reg(31)));
|
||||||
|
inst->predicate = BRW_PREDICATE_NORMAL;
|
||||||
|
break;
|
||||||
|
case ir_unop_find_lsb:
|
||||||
|
emit(FBL(this->result, op[0]));
|
||||||
|
break;
|
||||||
|
case ir_triop_bitfield_extract:
|
||||||
|
/* Note that the instruction's argument order is reversed from GLSL
|
||||||
|
* and the IR.
|
||||||
|
*/
|
||||||
|
emit(BFE(this->result, op[2], op[1], op[0]));
|
||||||
|
break;
|
||||||
|
case ir_binop_bfm:
|
||||||
|
emit(BFI1(this->result, op[0], op[1]));
|
||||||
|
break;
|
||||||
|
case ir_triop_bfi:
|
||||||
|
emit(BFI2(this->result, op[0], op[1], op[2]));
|
||||||
|
break;
|
||||||
|
case ir_quadop_bitfield_insert:
|
||||||
|
assert(!"not reached: should be handled by "
|
||||||
|
"lower_instructions::bitfield_insert_to_bfm_bfi");
|
||||||
|
break;
|
||||||
|
|
||||||
case ir_unop_bit_not:
|
case ir_unop_bit_not:
|
||||||
emit(NOT(this->result, op[0]));
|
emit(NOT(this->result, op[0]));
|
||||||
break;
|
break;
|
||||||
|
Reference in New Issue
Block a user