nir: update opcode definitions for different bit sizes

Some opcodes need explicit bitsizes, and sometimes we need to use the
double version when constant folding.

v2: fix output type for u2f (Iago)

v3: do not change vecN opcodes to be float. The next commit will add
    infrastructure to enable 64-bit integer constant folding so this is isn't
    really necessary. Also, that created problems with source modifiers in
    some cases (Iago)

v4 (Jason):
  - do not change bcsel to work in terms of floats
  - leave ldexp generic

Squashed changes to handle different bit sizes when constant
folding since otherwise we would break the build.

v2:
- Use the bit-size information from the opcode information if defined (Iago)
- Use helpers to get type size and base type of nir_alu_type enum (Sam)
- Do not fallback to sized types to guess bit-size information. (Jason)

Squashed changes in i965 and gallium/nir drivers to support sized types.
These functions should only see sized types, but we can't make that change
until we make sure that nir uses the sized versions in all the relevant places.
A later commit will address this.

Signed-off-by: Iago Toral Quiroga <itoral@igalia.com>
Signed-off-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
This commit is contained in:
Connor Abbott
2015-08-14 10:45:06 -07:00
committed by Samuel Iglesias Gonsálvez
parent 6700d7e423
commit 9076c4e289
7 changed files with 282 additions and 157 deletions

View File

@@ -101,6 +101,7 @@ union nir_constant_data {
int i[16]; int i[16];
float f[16]; float f[16];
bool b[16]; bool b[16];
double d[16];
}; };
typedef struct nir_constant { typedef struct nir_constant {
@@ -1209,8 +1210,11 @@ nir_tex_instr_src_index(nir_tex_instr *instr, nir_tex_src_type type)
typedef struct { typedef struct {
union { union {
float f[4]; float f[4];
double d[4];
int32_t i[4]; int32_t i[4];
uint32_t u[4]; uint32_t u[4];
int64_t l[4];
uint64_t ul[4];
}; };
} nir_const_value; } nir_const_value;

View File

@@ -28,4 +28,4 @@
#include "nir.h" #include "nir.h"
nir_const_value nir_eval_const_opcode(nir_op op, unsigned num_components, nir_const_value nir_eval_const_opcode(nir_op op, unsigned num_components,
nir_const_value *src); unsigned bit_size, nir_const_value *src);

View File

@@ -1,4 +1,43 @@
#! /usr/bin/python2 #! /usr/bin/python2
def type_has_size(type_):
return type_[-1:].isdigit()
def type_sizes(type_):
if type_.endswith("8"):
return [8]
elif type_.endswith("16"):
return [16]
elif type_.endswith("32"):
return [32]
elif type_.endswith("64"):
return [64]
else:
return [32, 64]
def type_add_size(type_, size):
if type_has_size(type_):
return type_
return type_ + str(size)
def get_const_field(type_):
if type_ == "int32":
return "i"
if type_ == "uint32":
return "u"
if type_ == "int64":
return "l"
if type_ == "uint64":
return "ul"
if type_ == "bool32":
return "b"
if type_ == "float32":
return "f"
if type_ == "float64":
return "d"
raise Exception(str(type_))
assert(0)
template = """\ template = """\
/* /*
* Copyright (C) 2014 Intel Corporation * Copyright (C) 2014 Intel Corporation
@@ -205,21 +244,42 @@ unpack_half_1x16(uint16_t u)
} }
/* Some typed vector structures to make things like src0.y work */ /* Some typed vector structures to make things like src0.y work */
% for type in ["float", "int", "uint", "bool"]: typedef float float32_t;
struct ${type}_vec { typedef double float64_t;
${type} x; typedef bool bool32_t;
${type} y; % for type in ["float", "int", "uint"]:
${type} z; % for width in [32, 64]:
${type} w; struct ${type}${width}_vec {
${type}${width}_t x;
${type}${width}_t y;
${type}${width}_t z;
${type}${width}_t w;
}; };
% endfor % endfor
% endfor
struct bool32_vec {
bool x;
bool y;
bool z;
bool w;
};
% for name, op in sorted(opcodes.iteritems()): % for name, op in sorted(opcodes.iteritems()):
static nir_const_value static nir_const_value
evaluate_${name}(unsigned num_components, nir_const_value *_src) evaluate_${name}(unsigned num_components, unsigned bit_size,
nir_const_value *_src)
{ {
nir_const_value _dst_val = { { {0, 0, 0, 0} } }; nir_const_value _dst_val = { { {0, 0, 0, 0} } };
switch (bit_size) {
% for bit_size in [32, 64]:
case ${bit_size}: {
<%
output_type = type_add_size(op.output_type, bit_size)
input_types = [type_add_size(type_, bit_size) for type_ in op.input_types]
%>
## For each non-per-component input, create a variable srcN that ## For each non-per-component input, create a variable srcN that
## contains x, y, z, and w elements which are filled in with the ## contains x, y, z, and w elements which are filled in with the
## appropriately-typed values. ## appropriately-typed values.
@@ -231,12 +291,12 @@ evaluate_${name}(unsigned num_components, nir_const_value *_src)
<% continue %> <% continue %>
%endif %endif
struct ${op.input_types[j]}_vec src${j} = { struct ${input_types[j]}_vec src${j} = {
% for k in range(op.input_sizes[j]): % for k in range(op.input_sizes[j]):
% if op.input_types[j] == "bool": % if input_types[j] == "bool32":
_src[${j}].u[${k}] != 0, _src[${j}].u[${k}] != 0,
% else: % else:
_src[${j}].${op.input_types[j][:1]}[${k}], _src[${j}].${get_const_field(input_types[j])}[${k}],
% endif % endif
% endfor % endfor
}; };
@@ -255,10 +315,11 @@ evaluate_${name}(unsigned num_components, nir_const_value *_src)
% elif "src" + str(j) not in op.const_expr: % elif "src" + str(j) not in op.const_expr:
## Avoid unused variable warnings ## Avoid unused variable warnings
<% continue %> <% continue %>
% elif op.input_types[j] == "bool": % elif input_types[j] == "bool32":
bool src${j} = _src[${j}].u[_i] != 0; bool src${j} = _src[${j}].u[_i] != 0;
% else: % else:
${op.input_types[j]} src${j} = _src[${j}].${op.input_types[j][:1]}[_i]; ${input_types[j]}_t src${j} =
_src[${j}].${get_const_field(input_types[j])}[_i];
% endif % endif
% endfor % endfor
@@ -266,19 +327,19 @@ evaluate_${name}(unsigned num_components, nir_const_value *_src)
## result of the const_expr to it. If const_expr already contains ## result of the const_expr to it. If const_expr already contains
## writes to dst, just include const_expr directly. ## writes to dst, just include const_expr directly.
% if "dst" in op.const_expr: % if "dst" in op.const_expr:
${op.output_type} dst; ${output_type}_t dst;
${op.const_expr} ${op.const_expr}
% else: % else:
${op.output_type} dst = ${op.const_expr}; ${output_type}_t dst = ${op.const_expr};
% endif % endif
## Store the current component of the actual destination to the ## Store the current component of the actual destination to the
## value of dst. ## value of dst.
% if op.output_type == "bool": % if output_type == "bool32":
## Sanitize the C value to a proper NIR bool ## Sanitize the C value to a proper NIR bool
_dst_val.u[_i] = dst ? NIR_TRUE : NIR_FALSE; _dst_val.u[_i] = dst ? NIR_TRUE : NIR_FALSE;
% else: % else:
_dst_val.${op.output_type[:1]}[_i] = dst; _dst_val.${get_const_field(output_type)}[_i] = dst;
% endif % endif
} }
% else: % else:
@@ -286,7 +347,7 @@ evaluate_${name}(unsigned num_components, nir_const_value *_src)
## appropriately-typed elements x, y, z, and w and assign the result ## appropriately-typed elements x, y, z, and w and assign the result
## of the const_expr to all components of dst, or include the ## of the const_expr to all components of dst, or include the
## const_expr directly if it writes to dst already. ## const_expr directly if it writes to dst already.
struct ${op.output_type}_vec dst; struct ${output_type}_vec dst;
% if "dst" in op.const_expr: % if "dst" in op.const_expr:
${op.const_expr} ${op.const_expr}
@@ -301,27 +362,35 @@ evaluate_${name}(unsigned num_components, nir_const_value *_src)
## For each component in the destination, copy the value of dst to ## For each component in the destination, copy the value of dst to
## the actual destination. ## the actual destination.
% for k in range(op.output_size): % for k in range(op.output_size):
% if op.output_type == "bool": % if output_type == "bool32":
## Sanitize the C value to a proper NIR bool ## Sanitize the C value to a proper NIR bool
_dst_val.u[${k}] = dst.${"xyzw"[k]} ? NIR_TRUE : NIR_FALSE; _dst_val.u[${k}] = dst.${"xyzw"[k]} ? NIR_TRUE : NIR_FALSE;
% else: % else:
_dst_val.${op.output_type[:1]}[${k}] = dst.${"xyzw"[k]}; _dst_val.${get_const_field(output_type)}[${k}] = dst.${"xyzw"[k]};
% endif % endif
% endfor % endfor
% endif % endif
break;
}
% endfor
default:
unreachable("unknown bit width");
}
return _dst_val; return _dst_val;
} }
% endfor % endfor
nir_const_value nir_const_value
nir_eval_const_opcode(nir_op op, unsigned num_components, nir_eval_const_opcode(nir_op op, unsigned num_components,
nir_const_value *src) unsigned bit_width, nir_const_value *src)
{ {
switch (op) { switch (op) {
% for name in sorted(opcodes.iterkeys()): % for name in sorted(opcodes.iterkeys()):
case nir_op_${name}: { case nir_op_${name}: {
return evaluate_${name}(num_components, src); return evaluate_${name}(num_components, bit_width, src);
break; break;
} }
% endfor % endfor
@@ -333,4 +402,7 @@ nir_eval_const_opcode(nir_op op, unsigned num_components,
from nir_opcodes import opcodes from nir_opcodes import opcodes
from mako.template import Template from mako.template import Template
print Template(template).render(opcodes=opcodes) print Template(template).render(opcodes=opcodes, type_sizes=type_sizes,
type_has_size=type_has_size,
type_add_size=type_add_size,
get_const_field=get_const_field)

View File

@@ -90,8 +90,12 @@ class Opcode(object):
# helper variables for strings # helper variables for strings
tfloat = "float" tfloat = "float"
tint = "int" tint = "int"
tbool = "bool" tbool = "bool32"
tuint = "uint" tuint = "uint"
tfloat32 = "float32"
tint32 = "int32"
tuint32 = "uint32"
tfloat64 = "float64"
commutative = "commutative " commutative = "commutative "
associative = "associative " associative = "associative "
@@ -155,56 +159,56 @@ unop("frsq", tfloat, "1.0f / sqrtf(src0)")
unop("fsqrt", tfloat, "sqrtf(src0)") unop("fsqrt", tfloat, "sqrtf(src0)")
unop("fexp2", tfloat, "exp2f(src0)") unop("fexp2", tfloat, "exp2f(src0)")
unop("flog2", tfloat, "log2f(src0)") unop("flog2", tfloat, "log2f(src0)")
unop_convert("f2i", tint, tfloat, "src0") # Float-to-integer conversion. unop_convert("f2i", tint32, tfloat32, "src0") # Float-to-integer conversion.
unop_convert("f2u", tuint, tfloat, "src0") # Float-to-unsigned conversion unop_convert("f2u", tuint32, tfloat32, "src0") # Float-to-unsigned conversion
unop_convert("i2f", tfloat, tint, "src0") # Integer-to-float conversion. unop_convert("i2f", tfloat32, tint32, "src0") # Integer-to-float conversion.
# Float-to-boolean conversion # Float-to-boolean conversion
unop_convert("f2b", tbool, tfloat, "src0 != 0.0f") unop_convert("f2b", tbool, tfloat32, "src0 != 0.0f")
# Boolean-to-float conversion # Boolean-to-float conversion
unop_convert("b2f", tfloat, tbool, "src0 ? 1.0f : 0.0f") unop_convert("b2f", tfloat32, tbool, "src0 ? 1.0f : 0.0f")
# Int-to-boolean conversion # Int-to-boolean conversion
unop_convert("i2b", tbool, tint, "src0 != 0") unop_convert("i2b", tbool, tint32, "src0 != 0")
unop_convert("b2i", tint, tbool, "src0 ? 1 : 0") # Boolean-to-int conversion unop_convert("b2i", tint32, tbool, "src0 ? 1 : 0") # Boolean-to-int conversion
unop_convert("u2f", tfloat, tuint, "src0") # Unsigned-to-float conversion. unop_convert("u2f", tfloat32, tuint32, "src0") # Unsigned-to-float conversion.
# Unary floating-point rounding operations. # Unary floating-point rounding operations.
unop("ftrunc", tfloat, "truncf(src0)") unop("ftrunc", tfloat, "bit_size == 64 ? trunc(src0) : truncf(src0)")
unop("fceil", tfloat, "ceilf(src0)") unop("fceil", tfloat, "bit_size == 64 ? ceil(src0) : ceilf(src0)")
unop("ffloor", tfloat, "floorf(src0)") unop("ffloor", tfloat, "bit_size == 64 ? floor(src0) : floorf(src0)")
unop("ffract", tfloat, "src0 - floorf(src0)") unop("ffract", tfloat, "src0 - (bit_size == 64 ? floor(src0) : floorf(src0))")
unop("fround_even", tfloat, "_mesa_roundevenf(src0)") unop("fround_even", tfloat, "bit_size == 64 ? _mesa_roundeven(src0) : _mesa_roundevenf(src0)")
# Trigonometric operations. # Trigonometric operations.
unop("fsin", tfloat, "sinf(src0)") unop("fsin", tfloat, "bit_size == 64 ? sin(src0) : sinf(src0)")
unop("fcos", tfloat, "cosf(src0)") unop("fcos", tfloat, "bit_size == 64 ? cos(src0) : cosf(src0)")
# Partial derivatives. # Partial derivatives.
unop("fddx", tfloat, "0.0f") # the derivative of a constant is 0. unop("fddx", tfloat, "0.0") # the derivative of a constant is 0.
unop("fddy", tfloat, "0.0f") unop("fddy", tfloat, "0.0")
unop("fddx_fine", tfloat, "0.0f") unop("fddx_fine", tfloat, "0.0")
unop("fddy_fine", tfloat, "0.0f") unop("fddy_fine", tfloat, "0.0")
unop("fddx_coarse", tfloat, "0.0f") unop("fddx_coarse", tfloat, "0.0")
unop("fddy_coarse", tfloat, "0.0f") unop("fddy_coarse", tfloat, "0.0")
# Floating point pack and unpack operations. # Floating point pack and unpack operations.
def pack_2x16(fmt): def pack_2x16(fmt):
unop_horiz("pack_" + fmt + "_2x16", 1, tuint, 2, tfloat, """ unop_horiz("pack_" + fmt + "_2x16", 1, tuint32, 2, tfloat32, """
dst.x = (uint32_t) pack_fmt_1x16(src0.x); dst.x = (uint32_t) pack_fmt_1x16(src0.x);
dst.x |= ((uint32_t) pack_fmt_1x16(src0.y)) << 16; dst.x |= ((uint32_t) pack_fmt_1x16(src0.y)) << 16;
""".replace("fmt", fmt)) """.replace("fmt", fmt))
def pack_4x8(fmt): def pack_4x8(fmt):
unop_horiz("pack_" + fmt + "_4x8", 1, tuint, 4, tfloat, """ unop_horiz("pack_" + fmt + "_4x8", 1, tuint32, 4, tfloat32, """
dst.x = (uint32_t) pack_fmt_1x8(src0.x); dst.x = (uint32_t) pack_fmt_1x8(src0.x);
dst.x |= ((uint32_t) pack_fmt_1x8(src0.y)) << 8; dst.x |= ((uint32_t) pack_fmt_1x8(src0.y)) << 8;
dst.x |= ((uint32_t) pack_fmt_1x8(src0.z)) << 16; dst.x |= ((uint32_t) pack_fmt_1x8(src0.z)) << 16;
@@ -212,13 +216,13 @@ dst.x |= ((uint32_t) pack_fmt_1x8(src0.w)) << 24;
""".replace("fmt", fmt)) """.replace("fmt", fmt))
def unpack_2x16(fmt): def unpack_2x16(fmt):
unop_horiz("unpack_" + fmt + "_2x16", 2, tfloat, 1, tuint, """ unop_horiz("unpack_" + fmt + "_2x16", 2, tfloat32, 1, tuint32, """
dst.x = unpack_fmt_1x16((uint16_t)(src0.x & 0xffff)); dst.x = unpack_fmt_1x16((uint16_t)(src0.x & 0xffff));
dst.y = unpack_fmt_1x16((uint16_t)(src0.x << 16)); dst.y = unpack_fmt_1x16((uint16_t)(src0.x << 16));
""".replace("fmt", fmt)) """.replace("fmt", fmt))
def unpack_4x8(fmt): def unpack_4x8(fmt):
unop_horiz("unpack_" + fmt + "_4x8", 4, tfloat, 1, tuint, """ unop_horiz("unpack_" + fmt + "_4x8", 4, tfloat32, 1, tuint32, """
dst.x = unpack_fmt_1x8((uint8_t)(src0.x & 0xff)); dst.x = unpack_fmt_1x8((uint8_t)(src0.x & 0xff));
dst.y = unpack_fmt_1x8((uint8_t)((src0.x >> 8) & 0xff)); dst.y = unpack_fmt_1x8((uint8_t)((src0.x >> 8) & 0xff));
dst.z = unpack_fmt_1x8((uint8_t)((src0.x >> 16) & 0xff)); dst.z = unpack_fmt_1x8((uint8_t)((src0.x >> 16) & 0xff));
@@ -237,11 +241,11 @@ unpack_2x16("unorm")
unpack_4x8("unorm") unpack_4x8("unorm")
unpack_2x16("half") unpack_2x16("half")
unop_horiz("pack_uvec2_to_uint", 1, tuint, 2, tuint, """ unop_horiz("pack_uvec2_to_uint", 1, tuint32, 2, tuint32, """
dst.x = (src0.x & 0xffff) | (src0.y >> 16); dst.x = (src0.x & 0xffff) | (src0.y >> 16);
""") """)
unop_horiz("pack_uvec4_to_uint", 1, tuint, 4, tuint, """ unop_horiz("pack_uvec4_to_uint", 1, tuint32, 4, tuint32, """
dst.x = (src0.x << 0) | dst.x = (src0.x << 0) |
(src0.y << 8) | (src0.y << 8) |
(src0.z << 16) | (src0.z << 16) |
@@ -251,22 +255,22 @@ dst.x = (src0.x << 0) |
# Lowered floating point unpacking operations. # Lowered floating point unpacking operations.
unop_horiz("unpack_half_2x16_split_x", 1, tfloat, 1, tuint, unop_horiz("unpack_half_2x16_split_x", 1, tfloat32, 1, tuint32,
"unpack_half_1x16((uint16_t)(src0.x & 0xffff))") "unpack_half_1x16((uint16_t)(src0.x & 0xffff))")
unop_horiz("unpack_half_2x16_split_y", 1, tfloat, 1, tuint, unop_horiz("unpack_half_2x16_split_y", 1, tfloat32, 1, tuint32,
"unpack_half_1x16((uint16_t)(src0.x >> 16))") "unpack_half_1x16((uint16_t)(src0.x >> 16))")
# Bit operations, part of ARB_gpu_shader5. # Bit operations, part of ARB_gpu_shader5.
unop("bitfield_reverse", tuint, """ unop("bitfield_reverse", tuint32, """
/* we're not winning any awards for speed here, but that's ok */ /* we're not winning any awards for speed here, but that's ok */
dst = 0; dst = 0;
for (unsigned bit = 0; bit < 32; bit++) for (unsigned bit = 0; bit < 32; bit++)
dst |= ((src0 >> bit) & 1) << (31 - bit); dst |= ((src0 >> bit) & 1) << (31 - bit);
""") """)
unop("bit_count", tuint, """ unop("bit_count", tuint32, """
dst = 0; dst = 0;
for (unsigned bit = 0; bit < 32; bit++) { for (unsigned bit = 0; bit < 32; bit++) {
if ((src0 >> bit) & 1) if ((src0 >> bit) & 1)
@@ -274,7 +278,7 @@ for (unsigned bit = 0; bit < 32; bit++) {
} }
""") """)
unop_convert("ufind_msb", tint, tuint, """ unop_convert("ufind_msb", tint32, tuint32, """
dst = -1; dst = -1;
for (int bit = 31; bit > 0; bit--) { for (int bit = 31; bit > 0; bit--) {
if ((src0 >> bit) & 1) { if ((src0 >> bit) & 1) {
@@ -284,7 +288,7 @@ for (int bit = 31; bit > 0; bit--) {
} }
""") """)
unop("ifind_msb", tint, """ unop("ifind_msb", tint32, """
dst = -1; dst = -1;
for (int bit = 31; bit >= 0; bit--) { for (int bit = 31; bit >= 0; bit--) {
/* If src0 < 0, we're looking for the first 0 bit. /* If src0 < 0, we're looking for the first 0 bit.
@@ -298,7 +302,7 @@ for (int bit = 31; bit >= 0; bit--) {
} }
""") """)
unop("find_lsb", tint, """ unop("find_lsb", tint32, """
dst = -1; dst = -1;
for (unsigned bit = 0; bit < 32; bit++) { for (unsigned bit = 0; bit < 32; bit++) {
if ((src0 >> bit) & 1) { if ((src0 >> bit) & 1) {
@@ -358,10 +362,10 @@ binop("fmul", tfloat, commutative + associative, "src0 * src1")
# low 32-bits of signed/unsigned integer multiply # low 32-bits of signed/unsigned integer multiply
binop("imul", tint, commutative + associative, "src0 * src1") binop("imul", tint, commutative + associative, "src0 * src1")
# high 32-bits of signed integer multiply # high 32-bits of signed integer multiply
binop("imul_high", tint, commutative, binop("imul_high", tint32, commutative,
"(int32_t)(((int64_t) src0 * (int64_t) src1) >> 32)") "(int32_t)(((int64_t) src0 * (int64_t) src1) >> 32)")
# high 32-bits of unsigned integer multiply # high 32-bits of unsigned integer multiply
binop("umul_high", tuint, commutative, binop("umul_high", tuint32, commutative,
"(uint32_t)(((uint64_t) src0 * (uint64_t) src1) >> 32)") "(uint32_t)(((uint64_t) src0 * (uint64_t) src1) >> 32)")
binop("fdiv", tfloat, "", "src0 / src1") binop("fdiv", tfloat, "", "src0 / src1")
@@ -412,18 +416,18 @@ binop_reduce("bany_inequal", 1, tbool, tint, "{src0} != {src1}",
# non-integer-aware GLSL-style comparisons that return 0.0 or 1.0 # non-integer-aware GLSL-style comparisons that return 0.0 or 1.0
binop_reduce("fall_equal", 1, tfloat, tfloat, "{src0} == {src1}", binop_reduce("fall_equal", 1, tfloat32, tfloat32, "{src0} == {src1}",
"{src0} && {src1}", "{src} ? 1.0f : 0.0f") "{src0} && {src1}", "{src} ? 1.0f : 0.0f")
binop_reduce("fany_nequal", 1, tfloat, tfloat, "{src0} != {src1}", binop_reduce("fany_nequal", 1, tfloat32, tfloat32, "{src0} != {src1}",
"{src0} || {src1}", "{src} ? 1.0f : 0.0f") "{src0} || {src1}", "{src} ? 1.0f : 0.0f")
# These comparisons for integer-less hardware return 1.0 and 0.0 for true # These comparisons for integer-less hardware return 1.0 and 0.0 for true
# and false respectively # and false respectively
binop("slt", tfloat, "", "(src0 < src1) ? 1.0f : 0.0f") # Set on Less Than binop("slt", tfloat32, "", "(src0 < src1) ? 1.0f : 0.0f") # Set on Less Than
binop("sge", tfloat, "", "(src0 >= src1) ? 1.0f : 0.0f") # Set on Greater or Equal binop("sge", tfloat32, "", "(src0 >= src1) ? 1.0f : 0.0f") # Set on Greater or Equal
binop("seq", tfloat, commutative, "(src0 == src1) ? 1.0f : 0.0f") # Set on Equal binop("seq", tfloat32, commutative, "(src0 == src1) ? 1.0f : 0.0f") # Set on Equal
binop("sne", tfloat, commutative, "(src0 != src1) ? 1.0f : 0.0f") # Set on Not Equal binop("sne", tfloat32, commutative, "(src0 != src1) ? 1.0f : 0.0f") # Set on Not Equal
binop("ishl", tint, "", "src0 << src1") binop("ishl", tint, "", "src0 << src1")
@@ -446,11 +450,11 @@ binop("ixor", tuint, commutative + associative, "src0 ^ src1")
# These use (src != 0.0) for testing the truth of the input, and output 1.0 # These use (src != 0.0) for testing the truth of the input, and output 1.0
# for true and 0.0 for false # for true and 0.0 for false
binop("fand", tfloat, commutative, binop("fand", tfloat32, commutative,
"((src0 != 0.0f) && (src1 != 0.0f)) ? 1.0f : 0.0f") "((src0 != 0.0f) && (src1 != 0.0f)) ? 1.0f : 0.0f")
binop("for", tfloat, commutative, binop("for", tfloat32, commutative,
"((src0 != 0.0f) || (src1 != 0.0f)) ? 1.0f : 0.0f") "((src0 != 0.0f) || (src1 != 0.0f)) ? 1.0f : 0.0f")
binop("fxor", tfloat, commutative, binop("fxor", tfloat32, commutative,
"(src0 != 0.0f && src1 == 0.0f) || (src0 == 0.0f && src1 != 0.0f) ? 1.0f : 0.0f") "(src0 != 0.0f && src1 == 0.0f) || (src0 == 0.0f && src1 != 0.0f) ? 1.0f : 0.0f")
binop_reduce("fdot", 1, tfloat, tfloat, "{src0} * {src1}", "{src0} + {src1}", binop_reduce("fdot", 1, tfloat, tfloat, "{src0} * {src1}", "{src0} + {src1}",
@@ -472,7 +476,7 @@ binop("imax", tint, commutative + associative, "src1 > src0 ? src1 : src0")
binop("umax", tuint, commutative + associative, "src1 > src0 ? src1 : src0") binop("umax", tuint, commutative + associative, "src1 > src0 ? src1 : src0")
# Saturated vector add for 4 8bit ints. # Saturated vector add for 4 8bit ints.
binop("usadd_4x8", tint, commutative + associative, """ binop("usadd_4x8", tint32, commutative + associative, """
dst = 0; dst = 0;
for (int i = 0; i < 32; i += 8) { for (int i = 0; i < 32; i += 8) {
dst |= MIN2(((src0 >> i) & 0xff) + ((src1 >> i) & 0xff), 0xff) << i; dst |= MIN2(((src0 >> i) & 0xff) + ((src1 >> i) & 0xff), 0xff) << i;
@@ -480,7 +484,7 @@ for (int i = 0; i < 32; i += 8) {
""") """)
# Saturated vector subtract for 4 8bit ints. # Saturated vector subtract for 4 8bit ints.
binop("ussub_4x8", tint, "", """ binop("ussub_4x8", tint32, "", """
dst = 0; dst = 0;
for (int i = 0; i < 32; i += 8) { for (int i = 0; i < 32; i += 8) {
int src0_chan = (src0 >> i) & 0xff; int src0_chan = (src0 >> i) & 0xff;
@@ -491,7 +495,7 @@ for (int i = 0; i < 32; i += 8) {
""") """)
# vector min for 4 8bit ints. # vector min for 4 8bit ints.
binop("umin_4x8", tint, commutative + associative, """ binop("umin_4x8", tint32, commutative + associative, """
dst = 0; dst = 0;
for (int i = 0; i < 32; i += 8) { for (int i = 0; i < 32; i += 8) {
dst |= MIN2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i; dst |= MIN2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i;
@@ -499,7 +503,7 @@ for (int i = 0; i < 32; i += 8) {
""") """)
# vector max for 4 8bit ints. # vector max for 4 8bit ints.
binop("umax_4x8", tint, commutative + associative, """ binop("umax_4x8", tint32, commutative + associative, """
dst = 0; dst = 0;
for (int i = 0; i < 32; i += 8) { for (int i = 0; i < 32; i += 8) {
dst |= MAX2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i; dst |= MAX2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i;
@@ -507,7 +511,7 @@ for (int i = 0; i < 32; i += 8) {
""") """)
# unorm multiply: (a * b) / 255. # unorm multiply: (a * b) / 255.
binop("umul_unorm_4x8", tint, commutative + associative, """ binop("umul_unorm_4x8", tint32, commutative + associative, """
dst = 0; dst = 0;
for (int i = 0; i < 32; i += 8) { for (int i = 0; i < 32; i += 8) {
int src0_chan = (src0 >> i) & 0xff; int src0_chan = (src0 >> i) & 0xff;
@@ -516,15 +520,15 @@ for (int i = 0; i < 32; i += 8) {
} }
""") """)
binop("fpow", tfloat, "", "powf(src0, src1)") binop("fpow", tfloat, "", "bit_size == 64 ? powf(src0, src1) : pow(src0, src1)")
binop_horiz("pack_half_2x16_split", 1, tuint, 1, tfloat, 1, tfloat, binop_horiz("pack_half_2x16_split", 1, tuint32, 1, tfloat32, 1, tfloat32,
"pack_half_1x16(src0.x) | (pack_half_1x16(src1.x) << 16)") "pack_half_1x16(src0.x) | (pack_half_1x16(src1.x) << 16)")
# bfm implements the behavior of the first operation of the SM5 "bfi" assembly # bfm implements the behavior of the first operation of the SM5 "bfi" assembly
# and that of the "bfi1" i965 instruction. That is, it has undefined behavior # and that of the "bfi1" i965 instruction. That is, it has undefined behavior
# if either of its arguments are 32. # if either of its arguments are 32.
binop_convert("bfm", tuint, tint, "", """ binop_convert("bfm", tuint32, tint32, "", """
int bits = src0, offset = src1; int bits = src0, offset = src1;
if (offset < 0 || bits < 0 || offset > 31 || bits > 31 || offset + bits > 32) if (offset < 0 || bits < 0 || offset > 31 || bits > 31 || offset + bits > 32)
dst = 0; /* undefined */ dst = 0; /* undefined */
@@ -533,7 +537,7 @@ else
""") """)
opcode("ldexp", 0, tfloat, [0, 0], [tfloat, tint], "", """ opcode("ldexp", 0, tfloat, [0, 0], [tfloat, tint], "", """
dst = ldexpf(src0, src1); dst = (bit_size == 64) ? ldexp(src0, src1) : ldexpf(src0, src1);
/* flush denormals to zero. */ /* flush denormals to zero. */
if (!isnormal(dst)) if (!isnormal(dst))
dst = copysignf(0.0f, src0); dst = copysignf(0.0f, src0);
@@ -573,12 +577,12 @@ triop("flrp", tfloat, "src0 * (1 - src2) + src1 * src2")
# bools (0.0 vs 1.0) and one for integer bools (0 vs ~0). # bools (0.0 vs 1.0) and one for integer bools (0 vs ~0).
triop("fcsel", tfloat, "(src0 != 0.0f) ? src1 : src2") triop("fcsel", tfloat32, "(src0 != 0.0f) ? src1 : src2")
opcode("bcsel", 0, tuint, [0, 0, 0], opcode("bcsel", 0, tuint, [0, 0, 0],
[tbool, tuint, tuint], "", "src0 ? src1 : src2") [tbool, tuint, tuint], "", "src0 ? src1 : src2")
# SM5 bfi assembly # SM5 bfi assembly
triop("bfi", tuint, """ triop("bfi", tuint32, """
unsigned mask = src0, insert = src1, base = src2; unsigned mask = src0, insert = src1, base = src2;
if (mask == 0) { if (mask == 0) {
dst = base; dst = base;
@@ -593,8 +597,8 @@ if (mask == 0) {
""") """)
# SM5 ubfe/ibfe assembly # SM5 ubfe/ibfe assembly
opcode("ubfe", 0, tuint, opcode("ubfe", 0, tuint32,
[0, 0, 0], [tuint, tint, tint], "", """ [0, 0, 0], [tuint32, tint32, tint32], "", """
unsigned base = src0; unsigned base = src0;
int offset = src1, bits = src2; int offset = src1, bits = src2;
if (bits == 0) { if (bits == 0) {
@@ -607,8 +611,8 @@ if (bits == 0) {
dst = base >> offset; dst = base >> offset;
} }
""") """)
opcode("ibfe", 0, tint, opcode("ibfe", 0, tint32,
[0, 0, 0], [tint, tint, tint], "", """ [0, 0, 0], [tint32, tint32, tint32], "", """
int base = src0; int base = src0;
int offset = src1, bits = src2; int offset = src1, bits = src2;
if (bits == 0) { if (bits == 0) {
@@ -623,8 +627,8 @@ if (bits == 0) {
""") """)
# GLSL bitfieldExtract() # GLSL bitfieldExtract()
opcode("ubitfield_extract", 0, tuint, opcode("ubitfield_extract", 0, tuint32,
[0, 0, 0], [tuint, tint, tint], "", """ [0, 0, 0], [tuint32, tint32, tint32], "", """
unsigned base = src0; unsigned base = src0;
int offset = src1, bits = src2; int offset = src1, bits = src2;
if (bits == 0) { if (bits == 0) {
@@ -635,8 +639,8 @@ if (bits == 0) {
dst = (base >> offset) & ((1ull << bits) - 1); dst = (base >> offset) & ((1ull << bits) - 1);
} }
""") """)
opcode("ibitfield_extract", 0, tint, opcode("ibitfield_extract", 0, tint32,
[0, 0, 0], [tint, tint, tint], "", """ [0, 0, 0], [tint32, tint32, tint32], "", """
int base = src0; int base = src0;
int offset = src1, bits = src2; int offset = src1, bits = src2;
if (bits == 0) { if (bits == 0) {
@@ -663,8 +667,8 @@ def quadop_horiz(name, output_size, src1_size, src2_size, src3_size,
[tuint, tuint, tuint, tuint], [tuint, tuint, tuint, tuint],
"", const_expr) "", const_expr)
opcode("bitfield_insert", 0, tuint, [0, 0, 0, 0], opcode("bitfield_insert", 0, tuint32, [0, 0, 0, 0],
[tuint, tuint, tint, tint], "", """ [tuint32, tuint32, tint32, tint32], "", """
unsigned base = src0, insert = src1; unsigned base = src0, insert = src1;
int offset = src2, bits = src3; int offset = src2, bits = src3;
if (bits == 0) { if (bits == 0) {

View File

@@ -46,10 +46,28 @@ constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx)
if (!instr->dest.dest.is_ssa) if (!instr->dest.dest.is_ssa)
return false; return false;
/* In the case that any outputs/inputs have unsized types, then we need to
* guess the bit-size. In this case, the validator ensures that all
* bit-sizes match so we can just take the bit-size from first
* output/input with an unsized type. If all the outputs/inputs are sized
* then we don't need to guess the bit-size at all because the code we
* generate for constant opcodes in this case already knows the sizes of
* the types involved and does not need the provided bit-size for anything
* (although it still requires to receive a valid bit-size).
*/
unsigned bit_size = 0;
if (!nir_alu_type_get_type_size(nir_op_infos[instr->op].output_type))
bit_size = instr->dest.dest.ssa.bit_size;
for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
if (!instr->src[i].src.is_ssa) if (!instr->src[i].src.is_ssa)
return false; return false;
if (bit_size == 0 &&
!nir_alu_type_get_type_size(nir_op_infos[instr->op].input_sizes[i])) {
bit_size = instr->src[i].src.ssa->bit_size;
}
nir_instr *src_instr = instr->src[i].src.ssa->parent_instr; nir_instr *src_instr = instr->src[i].src.ssa->parent_instr;
if (src_instr->type != nir_instr_type_load_const) if (src_instr->type != nir_instr_type_load_const)
@@ -58,6 +76,9 @@ constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx)
for (unsigned j = 0; j < nir_ssa_alu_instr_src_components(instr, i); for (unsigned j = 0; j < nir_ssa_alu_instr_src_components(instr, i);
j++) { j++) {
if (load_const->def.bit_size == 64)
src[i].ul[j] = load_const->value.ul[instr->src[i].swizzle[j]];
else
src[i].u[j] = load_const->value.u[instr->src[i].swizzle[j]]; src[i].u[j] = load_const->value.u[instr->src[i].swizzle[j]];
} }
@@ -65,17 +86,21 @@ constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx)
assert(!instr->src[i].abs && !instr->src[i].negate); assert(!instr->src[i].abs && !instr->src[i].negate);
} }
if (bit_size == 0)
bit_size = 32;
/* We shouldn't have any saturate modifiers in the optimization loop. */ /* We shouldn't have any saturate modifiers in the optimization loop. */
assert(!instr->dest.saturate); assert(!instr->dest.saturate);
nir_const_value dest = nir_const_value dest =
nir_eval_const_opcode(instr->op, instr->dest.dest.ssa.num_components, nir_eval_const_opcode(instr->op, instr->dest.dest.ssa.num_components,
src); bit_size, src);
nir_load_const_instr *new_instr = nir_load_const_instr *new_instr =
nir_load_const_instr_create(mem_ctx, nir_load_const_instr_create(mem_ctx,
instr->dest.dest.ssa.num_components); instr->dest.dest.ssa.num_components);
new_instr->def.bit_size = instr->dest.dest.ssa.bit_size;
new_instr->value = dest; new_instr->value = dest;
nir_instr_insert_before(&instr->instr, &new_instr->instr); nir_instr_insert_before(&instr->instr, &new_instr->instr);

View File

@@ -885,7 +885,9 @@ ntq_emit_comparison(struct vc4_compile *c, struct qreg *dest,
struct qreg src0 = ntq_get_alu_src(c, compare_instr, 0); struct qreg src0 = ntq_get_alu_src(c, compare_instr, 0);
struct qreg src1 = ntq_get_alu_src(c, compare_instr, 1); struct qreg src1 = ntq_get_alu_src(c, compare_instr, 1);
if (nir_op_infos[compare_instr->op].input_types[0] == nir_type_float) unsigned unsized_type =
nir_alu_type_get_base_type(nir_op_infos[compare_instr->op].input_types[0]);
if (unsized_type == nir_type_float)
qir_SF(c, qir_FSUB(c, src0, src1)); qir_SF(c, qir_FSUB(c, src0, src1));
else else
qir_SF(c, qir_SUB(c, src0, src1)); qir_SF(c, qir_SUB(c, src0, src1));

View File

@@ -623,12 +623,24 @@ brw_type_for_nir_type(nir_alu_type type)
{ {
switch (type) { switch (type) {
case nir_type_uint: case nir_type_uint:
case nir_type_uint32:
return BRW_REGISTER_TYPE_UD; return BRW_REGISTER_TYPE_UD;
case nir_type_bool: case nir_type_bool:
case nir_type_int: case nir_type_int:
case nir_type_bool32:
case nir_type_int32:
return BRW_REGISTER_TYPE_D; return BRW_REGISTER_TYPE_D;
case nir_type_float: case nir_type_float:
case nir_type_float32:
return BRW_REGISTER_TYPE_F; return BRW_REGISTER_TYPE_F;
case nir_type_float64:
return BRW_REGISTER_TYPE_DF;
case nir_type_int64:
case nir_type_uint64:
/* TODO we should only see these in moves, so for now it's ok, but when
* we add actual 64-bit integer support we should fix this.
*/
return BRW_REGISTER_TYPE_DF;
default: default:
unreachable("unknown type"); unreachable("unknown type");
} }
@@ -644,12 +656,18 @@ brw_glsl_base_type_for_nir_type(nir_alu_type type)
{ {
switch (type) { switch (type) {
case nir_type_float: case nir_type_float:
case nir_type_float32:
return GLSL_TYPE_FLOAT; return GLSL_TYPE_FLOAT;
case nir_type_float64:
return GLSL_TYPE_DOUBLE;
case nir_type_int: case nir_type_int:
case nir_type_int32:
return GLSL_TYPE_INT; return GLSL_TYPE_INT;
case nir_type_uint: case nir_type_uint:
case nir_type_uint32:
return GLSL_TYPE_UINT; return GLSL_TYPE_UINT;
default: default: