nir: Add a no wrapping bits to nir_alu_instr
They indicate the operation does not cause overflow or underflow. This is motivated by SPIR-V decorations NoSignedWrap and NoUnsignedWrap. Change the storage of `exact` to be a single bit, so they pack together. v2: Handle no_wrap in nir_instr_set. (Karol) v3: Use two separate flags, since the NIR SSA values and certain instructions are typeless, so just no_wrap would be insufficient to know which one was referred to. (Connor) v4: Don't use nir_instr_set to propagate the flags, unlike `exact`, consider the instructions different if the flags have different values. Fix hashing/comparing. (Jason) Reviewed-by: Karol Herbst <kherbst@redhat.com> [v1] Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
This commit is contained in:
@@ -961,7 +961,14 @@ typedef struct nir_alu_instr {
|
|||||||
* it must ensure that the resulting value is bit-for-bit identical to the
|
* it must ensure that the resulting value is bit-for-bit identical to the
|
||||||
* original.
|
* original.
|
||||||
*/
|
*/
|
||||||
bool exact;
|
bool exact:1;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Indicates that this instruction do not cause wrapping to occur, in the
|
||||||
|
* form of overflow or underflow.
|
||||||
|
*/
|
||||||
|
bool no_signed_wrap:1;
|
||||||
|
bool no_unsigned_wrap:1;
|
||||||
|
|
||||||
nir_alu_dest dest;
|
nir_alu_dest dest;
|
||||||
nir_alu_src src[];
|
nir_alu_src src[];
|
||||||
|
@@ -290,6 +290,8 @@ clone_alu(clone_state *state, const nir_alu_instr *alu)
|
|||||||
{
|
{
|
||||||
nir_alu_instr *nalu = nir_alu_instr_create(state->ns, alu->op);
|
nir_alu_instr *nalu = nir_alu_instr_create(state->ns, alu->op);
|
||||||
nalu->exact = alu->exact;
|
nalu->exact = alu->exact;
|
||||||
|
nalu->no_signed_wrap = alu->no_signed_wrap;
|
||||||
|
nalu->no_unsigned_wrap = alu->no_unsigned_wrap;
|
||||||
|
|
||||||
__clone_dst(state, &nalu->instr, &nalu->dest.dest, &alu->dest.dest);
|
__clone_dst(state, &nalu->instr, &nalu->dest.dest, &alu->dest.dest);
|
||||||
nalu->dest.saturate = alu->dest.saturate;
|
nalu->dest.saturate = alu->dest.saturate;
|
||||||
|
@@ -52,9 +52,14 @@ static uint32_t
|
|||||||
hash_alu(uint32_t hash, const nir_alu_instr *instr)
|
hash_alu(uint32_t hash, const nir_alu_instr *instr)
|
||||||
{
|
{
|
||||||
hash = HASH(hash, instr->op);
|
hash = HASH(hash, instr->op);
|
||||||
|
|
||||||
|
/* We explicitly don't hash instr->exact. */
|
||||||
|
uint8_t flags = instr->no_signed_wrap |
|
||||||
|
instr->no_unsigned_wrap << 1;
|
||||||
|
hash = HASH(hash, flags);
|
||||||
|
|
||||||
hash = HASH(hash, instr->dest.dest.ssa.num_components);
|
hash = HASH(hash, instr->dest.dest.ssa.num_components);
|
||||||
hash = HASH(hash, instr->dest.dest.ssa.bit_size);
|
hash = HASH(hash, instr->dest.dest.ssa.bit_size);
|
||||||
/* We explicitly don't hash instr->dest.dest.exact */
|
|
||||||
|
|
||||||
if (nir_op_infos[instr->op].algebraic_properties & NIR_OP_IS_2SRC_COMMUTATIVE) {
|
if (nir_op_infos[instr->op].algebraic_properties & NIR_OP_IS_2SRC_COMMUTATIVE) {
|
||||||
assert(nir_op_infos[instr->op].num_inputs >= 2);
|
assert(nir_op_infos[instr->op].num_inputs >= 2);
|
||||||
@@ -523,6 +528,14 @@ nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2)
|
|||||||
if (alu1->op != alu2->op)
|
if (alu1->op != alu2->op)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
/* We explicitly don't compare instr->exact. */
|
||||||
|
|
||||||
|
if (alu1->no_signed_wrap != alu2->no_signed_wrap)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (alu1->no_unsigned_wrap != alu2->no_unsigned_wrap)
|
||||||
|
return false;
|
||||||
|
|
||||||
/* TODO: We can probably acutally do something more inteligent such
|
/* TODO: We can probably acutally do something more inteligent such
|
||||||
* as allowing different numbers and taking a maximum or something
|
* as allowing different numbers and taking a maximum or something
|
||||||
* here */
|
* here */
|
||||||
@@ -532,8 +545,6 @@ nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2)
|
|||||||
if (alu1->dest.dest.ssa.bit_size != alu2->dest.dest.ssa.bit_size)
|
if (alu1->dest.dest.ssa.bit_size != alu2->dest.dest.ssa.bit_size)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
/* We explicitly don't hash instr->dest.dest.exact */
|
|
||||||
|
|
||||||
if (nir_op_infos[alu1->op].algebraic_properties & NIR_OP_IS_2SRC_COMMUTATIVE) {
|
if (nir_op_infos[alu1->op].algebraic_properties & NIR_OP_IS_2SRC_COMMUTATIVE) {
|
||||||
if ((!nir_alu_srcs_equal(alu1, alu2, 0, 0) ||
|
if ((!nir_alu_srcs_equal(alu1, alu2, 0, 0) ||
|
||||||
!nir_alu_srcs_equal(alu1, alu2, 1, 1)) &&
|
!nir_alu_srcs_equal(alu1, alu2, 1, 1)) &&
|
||||||
|
@@ -241,6 +241,10 @@ print_alu_instr(nir_alu_instr *instr, print_state *state)
|
|||||||
fprintf(fp, "!");
|
fprintf(fp, "!");
|
||||||
if (instr->dest.saturate)
|
if (instr->dest.saturate)
|
||||||
fprintf(fp, ".sat");
|
fprintf(fp, ".sat");
|
||||||
|
if (instr->no_signed_wrap)
|
||||||
|
fprintf(fp, ".nsw");
|
||||||
|
if (instr->no_unsigned_wrap)
|
||||||
|
fprintf(fp, ".nuw");
|
||||||
fprintf(fp, " ");
|
fprintf(fp, " ");
|
||||||
|
|
||||||
for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
|
for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
|
||||||
|
@@ -379,8 +379,10 @@ write_alu(write_ctx *ctx, const nir_alu_instr *alu)
|
|||||||
{
|
{
|
||||||
blob_write_uint32(ctx->blob, alu->op);
|
blob_write_uint32(ctx->blob, alu->op);
|
||||||
uint32_t flags = alu->exact;
|
uint32_t flags = alu->exact;
|
||||||
flags |= alu->dest.saturate << 1;
|
flags |= alu->no_signed_wrap << 1;
|
||||||
flags |= alu->dest.write_mask << 2;
|
flags |= alu->no_unsigned_wrap << 2;
|
||||||
|
flags |= alu->dest.saturate << 3;
|
||||||
|
flags |= alu->dest.write_mask << 4;
|
||||||
blob_write_uint32(ctx->blob, flags);
|
blob_write_uint32(ctx->blob, flags);
|
||||||
|
|
||||||
write_dest(ctx, &alu->dest.dest);
|
write_dest(ctx, &alu->dest.dest);
|
||||||
@@ -403,8 +405,10 @@ read_alu(read_ctx *ctx)
|
|||||||
|
|
||||||
uint32_t flags = blob_read_uint32(ctx->blob);
|
uint32_t flags = blob_read_uint32(ctx->blob);
|
||||||
alu->exact = flags & 1;
|
alu->exact = flags & 1;
|
||||||
alu->dest.saturate = flags & 2;
|
alu->no_signed_wrap = flags & 2;
|
||||||
alu->dest.write_mask = flags >> 2;
|
alu->no_unsigned_wrap = flags & 4;
|
||||||
|
alu->dest.saturate = flags & 8;
|
||||||
|
alu->dest.write_mask = flags >> 4;
|
||||||
|
|
||||||
read_dest(ctx, &alu->dest.dest, &alu->instr);
|
read_dest(ctx, &alu->dest.dest, &alu->instr);
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user