glsl: Add ir_unop_sin_reduced and ir_unop_cos_reduced
The operate just like ir_unop_sin and ir_unop_cos except that they expect their inputs to be limited to the range [-pi, pi]. Several GPUs require this limited range for their sine and cosine instructions, so having these as operations (along with a to-be-written lowering pass) helps this architectures. These new operations also matche the semantics of the GL_ARB_fragment_program SCS instruction. Having these as operations helps in generating GLSL IR directly from assembly fragment programs.
This commit is contained in:
@@ -239,6 +239,8 @@ static const char *const operator_strs[] = {
|
|||||||
"round_even",
|
"round_even",
|
||||||
"sin",
|
"sin",
|
||||||
"cos",
|
"cos",
|
||||||
|
"sin_reduced",
|
||||||
|
"cos_reduced",
|
||||||
"dFdx",
|
"dFdx",
|
||||||
"dFdy",
|
"dFdy",
|
||||||
"noise",
|
"noise",
|
||||||
|
@@ -729,6 +729,8 @@ enum ir_expression_operation {
|
|||||||
/*@{*/
|
/*@{*/
|
||||||
ir_unop_sin,
|
ir_unop_sin,
|
||||||
ir_unop_cos,
|
ir_unop_cos,
|
||||||
|
ir_unop_sin_reduced, /**< Reduced range sin. [-pi, pi] */
|
||||||
|
ir_unop_cos_reduced, /**< Reduced range cos. [-pi, pi] */
|
||||||
/*@}*/
|
/*@}*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@@ -216,6 +216,7 @@ ir_expression::constant_expression_value()
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case ir_unop_sin:
|
case ir_unop_sin:
|
||||||
|
case ir_unop_sin_reduced:
|
||||||
assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
|
assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
|
||||||
for (unsigned c = 0; c < op[0]->type->components(); c++) {
|
for (unsigned c = 0; c < op[0]->type->components(); c++) {
|
||||||
data.f[c] = sinf(op[0]->value.f[c]);
|
data.f[c] = sinf(op[0]->value.f[c]);
|
||||||
@@ -223,6 +224,7 @@ ir_expression::constant_expression_value()
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case ir_unop_cos:
|
case ir_unop_cos:
|
||||||
|
case ir_unop_cos_reduced:
|
||||||
assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
|
assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
|
||||||
for (unsigned c = 0; c < op[0]->type->components(); c++) {
|
for (unsigned c = 0; c < op[0]->type->components(); c++) {
|
||||||
data.f[c] = cosf(op[0]->value.f[c]);
|
data.f[c] = cosf(op[0]->value.f[c]);
|
||||||
|
@@ -273,6 +273,8 @@ ir_validate::visit_leave(ir_expression *ir)
|
|||||||
case ir_unop_fract:
|
case ir_unop_fract:
|
||||||
case ir_unop_sin:
|
case ir_unop_sin:
|
||||||
case ir_unop_cos:
|
case ir_unop_cos:
|
||||||
|
case ir_unop_sin_reduced:
|
||||||
|
case ir_unop_cos_reduced:
|
||||||
case ir_unop_dFdx:
|
case ir_unop_dFdx:
|
||||||
case ir_unop_dFdy:
|
case ir_unop_dFdy:
|
||||||
assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
|
assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
|
||||||
|
@@ -778,9 +778,11 @@ fs_visitor::visit(ir_expression *ir)
|
|||||||
assert(!"not reached: should be handled by ir_explog_to_explog2");
|
assert(!"not reached: should be handled by ir_explog_to_explog2");
|
||||||
break;
|
break;
|
||||||
case ir_unop_sin:
|
case ir_unop_sin:
|
||||||
|
case ir_unop_sin_reduced:
|
||||||
emit_math(FS_OPCODE_SIN, this->result, op[0]);
|
emit_math(FS_OPCODE_SIN, this->result, op[0]);
|
||||||
break;
|
break;
|
||||||
case ir_unop_cos:
|
case ir_unop_cos:
|
||||||
|
case ir_unop_cos_reduced:
|
||||||
emit_math(FS_OPCODE_COS, this->result, op[0]);
|
emit_math(FS_OPCODE_COS, this->result, op[0]);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@@ -282,6 +282,10 @@ public:
|
|||||||
ir_to_mesa_src_reg src0,
|
ir_to_mesa_src_reg src0,
|
||||||
ir_to_mesa_src_reg src1);
|
ir_to_mesa_src_reg src1);
|
||||||
|
|
||||||
|
void emit_scs(ir_instruction *ir, enum prog_opcode op,
|
||||||
|
ir_to_mesa_dst_reg dst,
|
||||||
|
const ir_to_mesa_src_reg &src);
|
||||||
|
|
||||||
GLboolean try_emit_mad(ir_expression *ir,
|
GLboolean try_emit_mad(ir_expression *ir,
|
||||||
int mul_operand);
|
int mul_operand);
|
||||||
|
|
||||||
@@ -475,6 +479,10 @@ ir_to_mesa_visitor::ir_to_mesa_emit_scalar_op2(ir_instruction *ir,
|
|||||||
GLuint src0_swiz = GET_SWZ(src0.swizzle, i);
|
GLuint src0_swiz = GET_SWZ(src0.swizzle, i);
|
||||||
GLuint src1_swiz = GET_SWZ(src1.swizzle, i);
|
GLuint src1_swiz = GET_SWZ(src1.swizzle, i);
|
||||||
for (j = i + 1; j < 4; j++) {
|
for (j = i + 1; j < 4; j++) {
|
||||||
|
/* If there is another enabled component in the destination that is
|
||||||
|
* derived from the same inputs, generate its value on this pass as
|
||||||
|
* well.
|
||||||
|
*/
|
||||||
if (!(done_mask & (1 << j)) &&
|
if (!(done_mask & (1 << j)) &&
|
||||||
GET_SWZ(src0.swizzle, j) == src0_swiz &&
|
GET_SWZ(src0.swizzle, j) == src0_swiz &&
|
||||||
GET_SWZ(src1.swizzle, j) == src1_swiz) {
|
GET_SWZ(src1.swizzle, j) == src1_swiz) {
|
||||||
@@ -508,6 +516,102 @@ ir_to_mesa_visitor::ir_to_mesa_emit_scalar_op1(ir_instruction *ir,
|
|||||||
ir_to_mesa_emit_scalar_op2(ir, op, dst, src0, undef);
|
ir_to_mesa_emit_scalar_op2(ir, op, dst, src0, undef);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Emit an OPCODE_SCS instruction
|
||||||
|
*
|
||||||
|
* The \c SCS opcode functions a bit differently than the other Mesa (or
|
||||||
|
* ARB_fragment_program) opcodes. Instead of splatting its result across all
|
||||||
|
* four components of the destination, it writes one value to the \c x
|
||||||
|
* component and another value to the \c y component.
|
||||||
|
*
|
||||||
|
* \param ir IR instruction being processed
|
||||||
|
* \param op Either \c OPCODE_SIN or \c OPCODE_COS depending on which
|
||||||
|
* value is desired.
|
||||||
|
* \param dst Destination register
|
||||||
|
* \param src Source register
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
ir_to_mesa_visitor::emit_scs(ir_instruction *ir, enum prog_opcode op,
|
||||||
|
ir_to_mesa_dst_reg dst,
|
||||||
|
const ir_to_mesa_src_reg &src)
|
||||||
|
{
|
||||||
|
/* Vertex programs cannot use the SCS opcode.
|
||||||
|
*/
|
||||||
|
if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) {
|
||||||
|
ir_to_mesa_emit_scalar_op1(ir, op, dst, src);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const unsigned component = (op == OPCODE_SIN) ? 0 : 1;
|
||||||
|
const unsigned scs_mask = (1U << component);
|
||||||
|
int done_mask = ~dst.writemask;
|
||||||
|
ir_to_mesa_src_reg tmp;
|
||||||
|
|
||||||
|
assert(op == OPCODE_SIN || op == OPCODE_COS);
|
||||||
|
|
||||||
|
/* If there are compnents in the destination that differ from the component
|
||||||
|
* that will be written by the SCS instrution, we'll need a temporary.
|
||||||
|
*/
|
||||||
|
if (scs_mask != unsigned(dst.writemask)) {
|
||||||
|
tmp = get_temp(glsl_type::vec4_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < 4; i++) {
|
||||||
|
unsigned this_mask = (1U << i);
|
||||||
|
ir_to_mesa_src_reg src0 = src;
|
||||||
|
|
||||||
|
if ((done_mask & this_mask) != 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/* The source swizzle specified which component of the source generates
|
||||||
|
* sine / cosine for the current component in the destination. The SCS
|
||||||
|
* instruction requires that this value be swizzle to the X component.
|
||||||
|
* Replace the current swizzle with a swizzle that puts the source in
|
||||||
|
* the X component.
|
||||||
|
*/
|
||||||
|
unsigned src0_swiz = GET_SWZ(src.swizzle, i);
|
||||||
|
|
||||||
|
src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
|
||||||
|
src0_swiz, src0_swiz);
|
||||||
|
for (unsigned j = i + 1; j < 4; j++) {
|
||||||
|
/* If there is another enabled component in the destination that is
|
||||||
|
* derived from the same inputs, generate its value on this pass as
|
||||||
|
* well.
|
||||||
|
*/
|
||||||
|
if (!(done_mask & (1 << j)) &&
|
||||||
|
GET_SWZ(src0.swizzle, j) == src0_swiz) {
|
||||||
|
this_mask |= (1 << j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this_mask != scs_mask) {
|
||||||
|
ir_to_mesa_instruction *inst;
|
||||||
|
ir_to_mesa_dst_reg tmp_dst = ir_to_mesa_dst_reg_from_src(tmp);
|
||||||
|
|
||||||
|
/* Emit the SCS instruction.
|
||||||
|
*/
|
||||||
|
inst = ir_to_mesa_emit_op1(ir, OPCODE_SCS, tmp_dst, src0);
|
||||||
|
inst->dst_reg.writemask = scs_mask;
|
||||||
|
|
||||||
|
/* Move the result of the SCS instruction to the desired location in
|
||||||
|
* the destination.
|
||||||
|
*/
|
||||||
|
tmp.swizzle = MAKE_SWIZZLE4(component, component,
|
||||||
|
component, component);
|
||||||
|
inst = ir_to_mesa_emit_op1(ir, OPCODE_SCS, dst, tmp);
|
||||||
|
inst->dst_reg.writemask = this_mask;
|
||||||
|
} else {
|
||||||
|
/* Emit the SCS instruction to write directly to the destination.
|
||||||
|
*/
|
||||||
|
ir_to_mesa_instruction *inst =
|
||||||
|
ir_to_mesa_emit_op1(ir, OPCODE_SCS, dst, src0);
|
||||||
|
inst->dst_reg.writemask = scs_mask;
|
||||||
|
}
|
||||||
|
|
||||||
|
done_mask |= this_mask;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
struct ir_to_mesa_src_reg
|
struct ir_to_mesa_src_reg
|
||||||
ir_to_mesa_visitor::src_reg_for_float(float val)
|
ir_to_mesa_visitor::src_reg_for_float(float val)
|
||||||
{
|
{
|
||||||
@@ -942,6 +1046,12 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
|
|||||||
case ir_unop_cos:
|
case ir_unop_cos:
|
||||||
ir_to_mesa_emit_scalar_op1(ir, OPCODE_COS, result_dst, op[0]);
|
ir_to_mesa_emit_scalar_op1(ir, OPCODE_COS, result_dst, op[0]);
|
||||||
break;
|
break;
|
||||||
|
case ir_unop_sin_reduced:
|
||||||
|
emit_scs(ir, OPCODE_SIN, result_dst, op[0]);
|
||||||
|
break;
|
||||||
|
case ir_unop_cos_reduced:
|
||||||
|
emit_scs(ir, OPCODE_COS, result_dst, op[0]);
|
||||||
|
break;
|
||||||
|
|
||||||
case ir_unop_dFdx:
|
case ir_unop_dFdx:
|
||||||
ir_to_mesa_emit_op1(ir, OPCODE_DDX, result_dst, op[0]);
|
ir_to_mesa_emit_op1(ir, OPCODE_DDX, result_dst, op[0]);
|
||||||
|
Reference in New Issue
Block a user