intel/compiler/fs: Add support for 16-bit sampler msg payload
For SIMD8 half float payload, each component takes a full register, so we can use existing LOAD_PAYLOAD infrastruture for required padding by alternating plain 8-wide half float vector and null vector. Also this patch removes an unwanted assertion from opt_copy_propagation_local for LOAD_PAYLOAD. Reviewed-by: Sagar Ghuge <sagar.ghuge@intel.com> Reviewed-by: Francisco Jerez <currojerez@riseup.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11766>
This commit is contained in:

committed by
Sagar Ghuge

parent
936412af27
commit
0374b56faa
@@ -5221,16 +5221,23 @@ lower_sampler_logical_send_gfx7(const fs_builder &bld, fs_inst *inst, opcode op,
|
|||||||
const fs_reg &surface_handle,
|
const fs_reg &surface_handle,
|
||||||
const fs_reg &sampler_handle,
|
const fs_reg &sampler_handle,
|
||||||
const fs_reg &tg4_offset,
|
const fs_reg &tg4_offset,
|
||||||
|
unsigned payload_type_bit_size,
|
||||||
unsigned coord_components,
|
unsigned coord_components,
|
||||||
unsigned grad_components)
|
unsigned grad_components)
|
||||||
{
|
{
|
||||||
const intel_device_info *devinfo = bld.shader->devinfo;
|
const intel_device_info *devinfo = bld.shader->devinfo;
|
||||||
|
const enum brw_reg_type payload_type =
|
||||||
|
brw_reg_type_from_bit_size(payload_type_bit_size, BRW_REGISTER_TYPE_F);
|
||||||
|
const enum brw_reg_type payload_unsigned_type =
|
||||||
|
brw_reg_type_from_bit_size(payload_type_bit_size, BRW_REGISTER_TYPE_UD);
|
||||||
|
const enum brw_reg_type payload_signed_type =
|
||||||
|
brw_reg_type_from_bit_size(payload_type_bit_size, BRW_REGISTER_TYPE_D);
|
||||||
const brw_stage_prog_data *prog_data = bld.shader->stage_prog_data;
|
const brw_stage_prog_data *prog_data = bld.shader->stage_prog_data;
|
||||||
unsigned reg_width = bld.dispatch_width() / 8;
|
unsigned reg_width = bld.dispatch_width() / 8;
|
||||||
unsigned header_size = 0, length = 0;
|
unsigned header_size = 0, length = 0;
|
||||||
fs_reg sources[MAX_SAMPLER_MESSAGE_SIZE];
|
fs_reg sources[MAX_SAMPLER_MESSAGE_SIZE];
|
||||||
for (unsigned i = 0; i < ARRAY_SIZE(sources); i++)
|
for (unsigned i = 0; i < ARRAY_SIZE(sources); i++)
|
||||||
sources[i] = bld.vgrf(BRW_REGISTER_TYPE_F);
|
sources[i] = bld.vgrf(payload_type);
|
||||||
|
|
||||||
/* We must have exactly one of surface/sampler and surface/sampler_handle */
|
/* We must have exactly one of surface/sampler and surface/sampler_handle */
|
||||||
assert((surface.file == BAD_FILE) != (surface_handle.file == BAD_FILE));
|
assert((surface.file == BAD_FILE) != (surface_handle.file == BAD_FILE));
|
||||||
@@ -5369,23 +5376,23 @@ lower_sampler_logical_send_gfx7(const fs_builder &bld, fs_inst *inst, opcode op,
|
|||||||
coordinate_done = true;
|
coordinate_done = true;
|
||||||
break;
|
break;
|
||||||
case SHADER_OPCODE_TXS:
|
case SHADER_OPCODE_TXS:
|
||||||
bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), lod);
|
bld.MOV(retype(sources[length], payload_unsigned_type), lod);
|
||||||
length++;
|
length++;
|
||||||
break;
|
break;
|
||||||
case SHADER_OPCODE_IMAGE_SIZE_LOGICAL:
|
case SHADER_OPCODE_IMAGE_SIZE_LOGICAL:
|
||||||
/* We need an LOD; just use 0 */
|
/* We need an LOD; just use 0 */
|
||||||
bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), brw_imm_ud(0));
|
bld.MOV(retype(sources[length], payload_unsigned_type), brw_imm_ud(0));
|
||||||
length++;
|
length++;
|
||||||
break;
|
break;
|
||||||
case SHADER_OPCODE_TXF:
|
case SHADER_OPCODE_TXF:
|
||||||
/* Unfortunately, the parameters for LD are intermixed: u, lod, v, r.
|
/* Unfortunately, the parameters for LD are intermixed: u, lod, v, r.
|
||||||
* On Gfx9 they are u, v, lod, r
|
* On Gfx9 they are u, v, lod, r
|
||||||
*/
|
*/
|
||||||
bld.MOV(retype(sources[length++], BRW_REGISTER_TYPE_D), coordinate);
|
bld.MOV(retype(sources[length++], payload_signed_type), coordinate);
|
||||||
|
|
||||||
if (devinfo->ver >= 9) {
|
if (devinfo->ver >= 9) {
|
||||||
if (coord_components >= 2) {
|
if (coord_components >= 2) {
|
||||||
bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D),
|
bld.MOV(retype(sources[length], payload_signed_type),
|
||||||
offset(coordinate, bld, 1));
|
offset(coordinate, bld, 1));
|
||||||
} else {
|
} else {
|
||||||
sources[length] = brw_imm_d(0);
|
sources[length] = brw_imm_d(0);
|
||||||
@@ -5396,12 +5403,12 @@ lower_sampler_logical_send_gfx7(const fs_builder &bld, fs_inst *inst, opcode op,
|
|||||||
if (devinfo->ver >= 9 && lod.is_zero()) {
|
if (devinfo->ver >= 9 && lod.is_zero()) {
|
||||||
op = SHADER_OPCODE_TXF_LZ;
|
op = SHADER_OPCODE_TXF_LZ;
|
||||||
} else {
|
} else {
|
||||||
bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), lod);
|
bld.MOV(retype(sources[length], payload_signed_type), lod);
|
||||||
length++;
|
length++;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (unsigned i = devinfo->ver >= 9 ? 2 : 1; i < coord_components; i++)
|
for (unsigned i = devinfo->ver >= 9 ? 2 : 1; i < coord_components; i++)
|
||||||
bld.MOV(retype(sources[length++], BRW_REGISTER_TYPE_D),
|
bld.MOV(retype(sources[length++], payload_signed_type),
|
||||||
offset(coordinate, bld, i));
|
offset(coordinate, bld, i));
|
||||||
|
|
||||||
coordinate_done = true;
|
coordinate_done = true;
|
||||||
@@ -5414,20 +5421,19 @@ lower_sampler_logical_send_gfx7(const fs_builder &bld, fs_inst *inst, opcode op,
|
|||||||
if (op == SHADER_OPCODE_TXF_UMS ||
|
if (op == SHADER_OPCODE_TXF_UMS ||
|
||||||
op == SHADER_OPCODE_TXF_CMS ||
|
op == SHADER_OPCODE_TXF_CMS ||
|
||||||
op == SHADER_OPCODE_TXF_CMS_W) {
|
op == SHADER_OPCODE_TXF_CMS_W) {
|
||||||
bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), sample_index);
|
bld.MOV(retype(sources[length++], payload_unsigned_type), sample_index);
|
||||||
length++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (op == SHADER_OPCODE_TXF_CMS || op == SHADER_OPCODE_TXF_CMS_W) {
|
if (op == SHADER_OPCODE_TXF_CMS || op == SHADER_OPCODE_TXF_CMS_W) {
|
||||||
/* Data from the multisample control surface. */
|
/* Data from the multisample control surface. */
|
||||||
bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), mcs);
|
bld.MOV(retype(sources[length], payload_unsigned_type), mcs);
|
||||||
length++;
|
length++;
|
||||||
|
|
||||||
/* On Gfx9+ we'll use ld2dms_w instead which has two registers for
|
/* On Gfx9+ we'll use ld2dms_w instead which has two registers for
|
||||||
* the MCS data.
|
* the MCS data.
|
||||||
*/
|
*/
|
||||||
if (op == SHADER_OPCODE_TXF_CMS_W) {
|
if (op == SHADER_OPCODE_TXF_CMS_W) {
|
||||||
bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD),
|
bld.MOV(retype(sources[length], payload_unsigned_type),
|
||||||
mcs.file == IMM ?
|
mcs.file == IMM ?
|
||||||
mcs :
|
mcs :
|
||||||
offset(mcs, bld, 1));
|
offset(mcs, bld, 1));
|
||||||
@@ -5439,7 +5445,7 @@ lower_sampler_logical_send_gfx7(const fs_builder &bld, fs_inst *inst, opcode op,
|
|||||||
* texture coordinates.
|
* texture coordinates.
|
||||||
*/
|
*/
|
||||||
for (unsigned i = 0; i < coord_components; i++)
|
for (unsigned i = 0; i < coord_components; i++)
|
||||||
bld.MOV(retype(sources[length++], BRW_REGISTER_TYPE_D),
|
bld.MOV(retype(sources[length++], payload_signed_type),
|
||||||
offset(coordinate, bld, i));
|
offset(coordinate, bld, i));
|
||||||
|
|
||||||
coordinate_done = true;
|
coordinate_done = true;
|
||||||
@@ -5450,7 +5456,7 @@ lower_sampler_logical_send_gfx7(const fs_builder &bld, fs_inst *inst, opcode op,
|
|||||||
bld.MOV(sources[length++], offset(coordinate, bld, i));
|
bld.MOV(sources[length++], offset(coordinate, bld, i));
|
||||||
|
|
||||||
for (unsigned i = 0; i < 2; i++) /* offu, offv */
|
for (unsigned i = 0; i < 2; i++) /* offu, offv */
|
||||||
bld.MOV(retype(sources[length++], BRW_REGISTER_TYPE_D),
|
bld.MOV(retype(sources[length++], payload_signed_type),
|
||||||
offset(tg4_offset, bld, i));
|
offset(tg4_offset, bld, i));
|
||||||
|
|
||||||
if (coord_components == 3) /* r if present */
|
if (coord_components == 3) /* r if present */
|
||||||
@@ -5465,7 +5471,8 @@ lower_sampler_logical_send_gfx7(const fs_builder &bld, fs_inst *inst, opcode op,
|
|||||||
/* Set up the coordinate (except for cases where it was done above) */
|
/* Set up the coordinate (except for cases where it was done above) */
|
||||||
if (!coordinate_done) {
|
if (!coordinate_done) {
|
||||||
for (unsigned i = 0; i < coord_components; i++)
|
for (unsigned i = 0; i < coord_components; i++)
|
||||||
bld.MOV(sources[length++], offset(coordinate, bld, i));
|
bld.MOV(retype(sources[length++], payload_type),
|
||||||
|
offset(coordinate, bld, i));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (min_lod.file != BAD_FILE) {
|
if (min_lod.file != BAD_FILE) {
|
||||||
@@ -5477,15 +5484,27 @@ lower_sampler_logical_send_gfx7(const fs_builder &bld, fs_inst *inst, opcode op,
|
|||||||
bld.MOV(sources[length++], min_lod);
|
bld.MOV(sources[length++], min_lod);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned mlen;
|
const fs_reg src_payload =
|
||||||
if (reg_width == 2)
|
fs_reg(VGRF, bld.shader->alloc.allocate(length * reg_width),
|
||||||
mlen = length * reg_width - header_size;
|
BRW_REGISTER_TYPE_F);
|
||||||
else
|
/* In case of 16-bit payload each component takes one full register in
|
||||||
mlen = length * reg_width;
|
* both SIMD8H and SIMD16H modes. In both cases one reg can hold 16
|
||||||
|
* elements. In SIMD8H case hardware simply expects the components to be
|
||||||
const fs_reg src_payload = fs_reg(VGRF, bld.shader->alloc.allocate(mlen),
|
* padded (i.e., aligned on reg boundary).
|
||||||
BRW_REGISTER_TYPE_F);
|
*/
|
||||||
bld.LOAD_PAYLOAD(src_payload, sources, length, header_size);
|
fs_inst *load_payload_inst =
|
||||||
|
emit_load_payload_with_padding(bld, src_payload, sources, length,
|
||||||
|
header_size, REG_SIZE);
|
||||||
|
unsigned mlen = load_payload_inst->size_written / REG_SIZE;
|
||||||
|
unsigned simd_mode = 0;
|
||||||
|
if (payload_type_bit_size == 16) {
|
||||||
|
assert(devinfo->ver >= 11);
|
||||||
|
simd_mode = inst->exec_size <= 8 ? GFX10_SAMPLER_SIMD_MODE_SIMD8H :
|
||||||
|
GFX10_SAMPLER_SIMD_MODE_SIMD16H;
|
||||||
|
} else {
|
||||||
|
simd_mode = inst->exec_size <= 8 ? BRW_SAMPLER_SIMD_MODE_SIMD8 :
|
||||||
|
BRW_SAMPLER_SIMD_MODE_SIMD16;
|
||||||
|
}
|
||||||
|
|
||||||
/* Generate the SEND. */
|
/* Generate the SEND. */
|
||||||
inst->opcode = SHADER_OPCODE_SEND;
|
inst->opcode = SHADER_OPCODE_SEND;
|
||||||
@@ -5494,9 +5513,6 @@ lower_sampler_logical_send_gfx7(const fs_builder &bld, fs_inst *inst, opcode op,
|
|||||||
|
|
||||||
const unsigned msg_type =
|
const unsigned msg_type =
|
||||||
sampler_msg_type(devinfo, op, inst->shadow_compare);
|
sampler_msg_type(devinfo, op, inst->shadow_compare);
|
||||||
const unsigned simd_mode =
|
|
||||||
inst->exec_size <= 8 ? BRW_SAMPLER_SIMD_MODE_SIMD8 :
|
|
||||||
BRW_SAMPLER_SIMD_MODE_SIMD16;
|
|
||||||
|
|
||||||
uint32_t base_binding_table_index;
|
uint32_t base_binding_table_index;
|
||||||
switch (op) {
|
switch (op) {
|
||||||
@@ -5599,6 +5615,34 @@ lower_sampler_logical_send_gfx7(const fs_builder &bld, fs_inst *inst, opcode op,
|
|||||||
assert(inst->mlen <= MAX_SAMPLER_MESSAGE_SIZE);
|
assert(inst->mlen <= MAX_SAMPLER_MESSAGE_SIZE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static unsigned
|
||||||
|
get_sampler_msg_payload_type_bit_size(const fs_reg *src)
|
||||||
|
{
|
||||||
|
unsigned src_type_size = 0;
|
||||||
|
|
||||||
|
/* All sources need to have the same size, therefore seek the first valid
|
||||||
|
* and take the size from there.
|
||||||
|
*/
|
||||||
|
for (unsigned i = 0; i < TEX_LOGICAL_NUM_SRCS; i++) {
|
||||||
|
if (src[i].file != BAD_FILE) {
|
||||||
|
src_type_size = brw_reg_type_to_size(src[i].type);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(src_type_size == 2 || src_type_size == 4);
|
||||||
|
|
||||||
|
#ifndef NDEBUG
|
||||||
|
/* Make sure all sources agree. */
|
||||||
|
for (unsigned i = 0; i < TEX_LOGICAL_NUM_SRCS; i++) {
|
||||||
|
assert(src[i].file == BAD_FILE ||
|
||||||
|
brw_reg_type_to_size(src[i].type) == src_type_size);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return src_type_size * 8;
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, opcode op)
|
lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, opcode op)
|
||||||
{
|
{
|
||||||
@@ -5621,12 +5665,19 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, opcode op)
|
|||||||
const unsigned grad_components = inst->src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].ud;
|
const unsigned grad_components = inst->src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].ud;
|
||||||
|
|
||||||
if (devinfo->ver >= 7) {
|
if (devinfo->ver >= 7) {
|
||||||
|
const unsigned msg_payload_type_bit_size =
|
||||||
|
get_sampler_msg_payload_type_bit_size(inst->src);
|
||||||
|
|
||||||
|
/* 16-bit payloads are available only on gfx11+ */
|
||||||
|
assert(msg_payload_type_bit_size != 16 || devinfo->ver >= 11);
|
||||||
|
|
||||||
lower_sampler_logical_send_gfx7(bld, inst, op, coordinate,
|
lower_sampler_logical_send_gfx7(bld, inst, op, coordinate,
|
||||||
shadow_c, lod, lod2, min_lod,
|
shadow_c, lod, lod2, min_lod,
|
||||||
sample_index,
|
sample_index,
|
||||||
mcs, surface, sampler,
|
mcs, surface, sampler,
|
||||||
surface_handle, sampler_handle,
|
surface_handle, sampler_handle,
|
||||||
tg4_offset,
|
tg4_offset,
|
||||||
|
msg_payload_type_bit_size,
|
||||||
coord_components, grad_components);
|
coord_components, grad_components);
|
||||||
} else if (devinfo->ver >= 5) {
|
} else if (devinfo->ver >= 5) {
|
||||||
lower_sampler_logical_send_gfx5(bld, inst, op, coordinate,
|
lower_sampler_logical_send_gfx5(bld, inst, op, coordinate,
|
||||||
|
@@ -1026,7 +1026,6 @@ fs_visitor::opt_copy_propagation_local(void *copy_prop_ctx, bblock_t *block,
|
|||||||
int offset = 0;
|
int offset = 0;
|
||||||
for (int i = 0; i < inst->sources; i++) {
|
for (int i = 0; i < inst->sources; i++) {
|
||||||
int effective_width = i < inst->header_size ? 8 : inst->exec_size;
|
int effective_width = i < inst->header_size ? 8 : inst->exec_size;
|
||||||
assert(effective_width * type_sz(inst->src[i].type) % REG_SIZE == 0);
|
|
||||||
const unsigned size_written = effective_width *
|
const unsigned size_written = effective_width *
|
||||||
type_sz(inst->src[i].type);
|
type_sz(inst->src[i].type);
|
||||||
if (inst->src[i].file == VGRF ||
|
if (inst->src[i].file == VGRF ||
|
||||||
|
Reference in New Issue
Block a user