intel/brw: Delete legacy texture opcodes
We first generate the logical opcodes, and these days fully lower to SHADER_OPCODE_SEND. In the past, we lowered to a non-logical variant and handled that in the generator. These days, we were just using the non-logical opcodes as an awkward intermediate opcode change during the lowering...which isn't really necessary at all. This patch eliminates them by using the original logical opcodes. Reviewed-by: Caio Oliveira <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27908>
This commit is contained in:

committed by
Marge Bot

parent
19248f48eb
commit
ad37622a8f
@@ -279,47 +279,26 @@ enum opcode {
|
||||
/**
|
||||
* Texture sampling opcodes.
|
||||
*
|
||||
* LOGICAL opcodes are eventually translated to the matching non-LOGICAL
|
||||
* opcode but instead of taking a single payload blob they expect their
|
||||
* arguments separately as individual sources. The position/ordering of the
|
||||
* arguments are defined by the enum tex_logical_srcs.
|
||||
* LOGICAL opcodes are eventually translated to SHADER_OPCODE_SEND but
|
||||
* take parameters as individual sources. See enum tex_logical_srcs.
|
||||
*/
|
||||
SHADER_OPCODE_TEX,
|
||||
SHADER_OPCODE_TEX_LOGICAL,
|
||||
SHADER_OPCODE_TXD,
|
||||
SHADER_OPCODE_TXD_LOGICAL,
|
||||
SHADER_OPCODE_TXF,
|
||||
SHADER_OPCODE_TXF_LOGICAL,
|
||||
SHADER_OPCODE_TXF_LZ,
|
||||
SHADER_OPCODE_TXL,
|
||||
SHADER_OPCODE_TXL_LOGICAL,
|
||||
SHADER_OPCODE_TXL_LZ,
|
||||
SHADER_OPCODE_TXS,
|
||||
SHADER_OPCODE_TXS_LOGICAL,
|
||||
FS_OPCODE_TXB,
|
||||
FS_OPCODE_TXB_LOGICAL,
|
||||
SHADER_OPCODE_TXF_CMS_W,
|
||||
SHADER_OPCODE_TXF_CMS_W_LOGICAL,
|
||||
SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL,
|
||||
SHADER_OPCODE_TXF_MCS,
|
||||
SHADER_OPCODE_TXF_MCS_LOGICAL,
|
||||
SHADER_OPCODE_LOD,
|
||||
SHADER_OPCODE_LOD_LOGICAL,
|
||||
SHADER_OPCODE_TG4,
|
||||
SHADER_OPCODE_TG4_LOGICAL,
|
||||
SHADER_OPCODE_TG4_IMPLICIT_LOD,
|
||||
SHADER_OPCODE_TG4_IMPLICIT_LOD_LOGICAL,
|
||||
SHADER_OPCODE_TG4_EXPLICIT_LOD,
|
||||
SHADER_OPCODE_TG4_EXPLICIT_LOD_LOGICAL,
|
||||
SHADER_OPCODE_TG4_BIAS,
|
||||
SHADER_OPCODE_TG4_BIAS_LOGICAL,
|
||||
SHADER_OPCODE_TG4_OFFSET,
|
||||
SHADER_OPCODE_TG4_OFFSET_LOGICAL,
|
||||
SHADER_OPCODE_TG4_OFFSET_LOD,
|
||||
SHADER_OPCODE_TG4_OFFSET_LOD_LOGICAL,
|
||||
SHADER_OPCODE_TG4_OFFSET_BIAS,
|
||||
SHADER_OPCODE_TG4_OFFSET_BIAS_LOGICAL,
|
||||
SHADER_OPCODE_SAMPLEINFO,
|
||||
SHADER_OPCODE_SAMPLEINFO_LOGICAL,
|
||||
|
||||
SHADER_OPCODE_IMAGE_SIZE_LOGICAL,
|
||||
|
@@ -237,25 +237,6 @@ fs_inst::is_control_source(unsigned arg) const
|
||||
|
||||
case SHADER_OPCODE_MOV_INDIRECT:
|
||||
case SHADER_OPCODE_CLUSTER_BROADCAST:
|
||||
case SHADER_OPCODE_TEX:
|
||||
case FS_OPCODE_TXB:
|
||||
case SHADER_OPCODE_TXD:
|
||||
case SHADER_OPCODE_TXF:
|
||||
case SHADER_OPCODE_TXF_LZ:
|
||||
case SHADER_OPCODE_TXF_CMS_W:
|
||||
case SHADER_OPCODE_TXF_MCS:
|
||||
case SHADER_OPCODE_TXL:
|
||||
case SHADER_OPCODE_TXL_LZ:
|
||||
case SHADER_OPCODE_TXS:
|
||||
case SHADER_OPCODE_LOD:
|
||||
case SHADER_OPCODE_TG4:
|
||||
case SHADER_OPCODE_TG4_OFFSET:
|
||||
case SHADER_OPCODE_TG4_BIAS:
|
||||
case SHADER_OPCODE_TG4_EXPLICIT_LOD:
|
||||
case SHADER_OPCODE_TG4_IMPLICIT_LOD:
|
||||
case SHADER_OPCODE_TG4_OFFSET_LOD:
|
||||
case SHADER_OPCODE_TG4_OFFSET_BIAS:
|
||||
case SHADER_OPCODE_SAMPLEINFO:
|
||||
return arg == 1 || arg == 2;
|
||||
|
||||
case SHADER_OPCODE_SEND:
|
||||
@@ -277,25 +258,6 @@ fs_inst::is_payload(unsigned arg) const
|
||||
case SHADER_OPCODE_INTERLOCK:
|
||||
case SHADER_OPCODE_MEMORY_FENCE:
|
||||
case SHADER_OPCODE_BARRIER:
|
||||
case SHADER_OPCODE_TEX:
|
||||
case FS_OPCODE_TXB:
|
||||
case SHADER_OPCODE_TXD:
|
||||
case SHADER_OPCODE_TXF:
|
||||
case SHADER_OPCODE_TXF_LZ:
|
||||
case SHADER_OPCODE_TXF_CMS_W:
|
||||
case SHADER_OPCODE_TXF_MCS:
|
||||
case SHADER_OPCODE_TXL:
|
||||
case SHADER_OPCODE_TXL_LZ:
|
||||
case SHADER_OPCODE_TXS:
|
||||
case SHADER_OPCODE_LOD:
|
||||
case SHADER_OPCODE_TG4:
|
||||
case SHADER_OPCODE_TG4_OFFSET:
|
||||
case SHADER_OPCODE_TG4_BIAS:
|
||||
case SHADER_OPCODE_TG4_EXPLICIT_LOD:
|
||||
case SHADER_OPCODE_TG4_IMPLICIT_LOD:
|
||||
case SHADER_OPCODE_TG4_OFFSET_LOD:
|
||||
case SHADER_OPCODE_TG4_OFFSET_BIAS:
|
||||
case SHADER_OPCODE_SAMPLEINFO:
|
||||
return arg == 0;
|
||||
|
||||
case SHADER_OPCODE_SEND:
|
||||
@@ -959,29 +921,6 @@ fs_inst::size_read(int arg) const
|
||||
}
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_TEX:
|
||||
case FS_OPCODE_TXB:
|
||||
case SHADER_OPCODE_TXD:
|
||||
case SHADER_OPCODE_TXF:
|
||||
case SHADER_OPCODE_TXF_LZ:
|
||||
case SHADER_OPCODE_TXF_CMS_W:
|
||||
case SHADER_OPCODE_TXF_MCS:
|
||||
case SHADER_OPCODE_TXL:
|
||||
case SHADER_OPCODE_TXL_LZ:
|
||||
case SHADER_OPCODE_TXS:
|
||||
case SHADER_OPCODE_LOD:
|
||||
case SHADER_OPCODE_TG4:
|
||||
case SHADER_OPCODE_TG4_OFFSET:
|
||||
case SHADER_OPCODE_TG4_BIAS:
|
||||
case SHADER_OPCODE_TG4_EXPLICIT_LOD:
|
||||
case SHADER_OPCODE_TG4_IMPLICIT_LOD:
|
||||
case SHADER_OPCODE_TG4_OFFSET_LOD:
|
||||
case SHADER_OPCODE_TG4_OFFSET_BIAS:
|
||||
case SHADER_OPCODE_SAMPLEINFO:
|
||||
if (arg == 0 && src[0].file == VGRF)
|
||||
return mlen * REG_SIZE;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@@ -2343,78 +2282,40 @@ brw_instruction_name(const struct brw_isa_info *isa, enum opcode op)
|
||||
case SHADER_OPCODE_UNDEF:
|
||||
return "undef";
|
||||
|
||||
case SHADER_OPCODE_TEX:
|
||||
return "tex";
|
||||
case SHADER_OPCODE_TEX_LOGICAL:
|
||||
return "tex_logical";
|
||||
case SHADER_OPCODE_TXD:
|
||||
return "txd";
|
||||
case SHADER_OPCODE_TXD_LOGICAL:
|
||||
return "txd_logical";
|
||||
case SHADER_OPCODE_TXF:
|
||||
return "txf";
|
||||
case SHADER_OPCODE_TXF_LOGICAL:
|
||||
return "txf_logical";
|
||||
case SHADER_OPCODE_TXF_LZ:
|
||||
return "txf_lz";
|
||||
case SHADER_OPCODE_TXL:
|
||||
return "txl";
|
||||
case SHADER_OPCODE_TXL_LOGICAL:
|
||||
return "txl_logical";
|
||||
case SHADER_OPCODE_TXL_LZ:
|
||||
return "txl_lz";
|
||||
case SHADER_OPCODE_TXS:
|
||||
return "txs";
|
||||
case SHADER_OPCODE_TXS_LOGICAL:
|
||||
return "txs_logical";
|
||||
case FS_OPCODE_TXB:
|
||||
return "txb";
|
||||
case FS_OPCODE_TXB_LOGICAL:
|
||||
return "txb_logical";
|
||||
case SHADER_OPCODE_TXF_CMS_W:
|
||||
return "txf_cms_w";
|
||||
case SHADER_OPCODE_TXF_CMS_W_LOGICAL:
|
||||
return "txf_cms_w_logical";
|
||||
case SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL:
|
||||
return "txf_cms_w_gfx12_logical";
|
||||
case SHADER_OPCODE_TXF_MCS:
|
||||
return "txf_mcs";
|
||||
case SHADER_OPCODE_TXF_MCS_LOGICAL:
|
||||
return "txf_mcs_logical";
|
||||
case SHADER_OPCODE_LOD:
|
||||
return "lod";
|
||||
case SHADER_OPCODE_LOD_LOGICAL:
|
||||
return "lod_logical";
|
||||
case SHADER_OPCODE_TG4:
|
||||
return "tg4";
|
||||
case SHADER_OPCODE_TG4_LOGICAL:
|
||||
return "tg4_logical";
|
||||
case SHADER_OPCODE_TG4_OFFSET:
|
||||
return "tg4_offset";
|
||||
case SHADER_OPCODE_TG4_OFFSET_LOGICAL:
|
||||
return "tg4_offset_logical";
|
||||
case SHADER_OPCODE_TG4_OFFSET_LOD:
|
||||
return "tg4_offset_lod";
|
||||
case SHADER_OPCODE_TG4_OFFSET_LOD_LOGICAL:
|
||||
return "tg4_offset_lod_logical";
|
||||
case SHADER_OPCODE_TG4_OFFSET_BIAS:
|
||||
return "tg4_offset_bias";
|
||||
case SHADER_OPCODE_TG4_OFFSET_BIAS_LOGICAL:
|
||||
return "tg4_offset_bias_logical";
|
||||
case SHADER_OPCODE_TG4_BIAS:
|
||||
return "tg4_b";
|
||||
case SHADER_OPCODE_TG4_BIAS_LOGICAL:
|
||||
return "tg4_b_logical";
|
||||
case SHADER_OPCODE_TG4_EXPLICIT_LOD:
|
||||
return "tg4_l";
|
||||
case SHADER_OPCODE_TG4_EXPLICIT_LOD_LOGICAL:
|
||||
return "tg4_l_logical";
|
||||
case SHADER_OPCODE_TG4_IMPLICIT_LOD:
|
||||
return "tg4_i";
|
||||
case SHADER_OPCODE_TG4_IMPLICIT_LOD_LOGICAL:
|
||||
return "tg4_i_logical";
|
||||
case SHADER_OPCODE_SAMPLEINFO:
|
||||
return "sampleinfo";
|
||||
case SHADER_OPCODE_SAMPLEINFO_LOGICAL:
|
||||
return "sampleinfo_logical";
|
||||
|
||||
|
@@ -173,15 +173,15 @@ get_sampler_lowered_simd_width(const struct intel_device_info *devinfo,
|
||||
* message, it will push it over 5 arguments and we have to fall back to
|
||||
* SIMD8.
|
||||
*/
|
||||
if (inst->opcode != SHADER_OPCODE_TEX &&
|
||||
if (inst->opcode != SHADER_OPCODE_TEX_LOGICAL &&
|
||||
inst->components_read(TEX_LOGICAL_SRC_MIN_LOD))
|
||||
return devinfo->ver < 20 ? 8 : 16;
|
||||
|
||||
/* On Gfx9+ the LOD argument is for free if we're able to use the LZ
|
||||
* variant of the TXL or TXF message.
|
||||
*/
|
||||
const bool implicit_lod = (inst->opcode == SHADER_OPCODE_TXL ||
|
||||
inst->opcode == SHADER_OPCODE_TXF) &&
|
||||
const bool implicit_lod = (inst->opcode == SHADER_OPCODE_TXL_LOGICAL ||
|
||||
inst->opcode == SHADER_OPCODE_TXF_LOGICAL) &&
|
||||
inst->src[TEX_LOGICAL_SRC_LOD].is_zero();
|
||||
|
||||
/* Calculate the total number of argument components that need to be passed
|
||||
|
@@ -550,26 +550,7 @@ namespace {
|
||||
0, 2 /* XXX */,
|
||||
0, 0, 0, 8 /* XXX */, 0, 0);
|
||||
|
||||
case SHADER_OPCODE_TEX:
|
||||
case FS_OPCODE_TXB:
|
||||
case SHADER_OPCODE_TXD:
|
||||
case SHADER_OPCODE_TXF:
|
||||
case SHADER_OPCODE_TXF_LZ:
|
||||
case SHADER_OPCODE_TXL:
|
||||
case SHADER_OPCODE_TXL_LZ:
|
||||
case SHADER_OPCODE_TXF_CMS_W:
|
||||
case SHADER_OPCODE_TXF_MCS:
|
||||
case SHADER_OPCODE_TXS:
|
||||
case SHADER_OPCODE_LOD:
|
||||
case SHADER_OPCODE_GET_BUFFER_SIZE:
|
||||
case SHADER_OPCODE_TG4:
|
||||
case SHADER_OPCODE_TG4_BIAS:
|
||||
case SHADER_OPCODE_TG4_EXPLICIT_LOD:
|
||||
case SHADER_OPCODE_TG4_IMPLICIT_LOD:
|
||||
case SHADER_OPCODE_TG4_OFFSET:
|
||||
case SHADER_OPCODE_TG4_OFFSET_LOD:
|
||||
case SHADER_OPCODE_TG4_OFFSET_BIAS:
|
||||
case SHADER_OPCODE_SAMPLEINFO:
|
||||
return calculate_desc(info, EU_UNIT_SAMPLER, 2, 0, 0, 0, 16 /* XXX */,
|
||||
8 /* XXX */, 750 /* XXX */, 0, 0,
|
||||
2 /* XXX */, 0);
|
||||
|
@@ -571,10 +571,11 @@ is_high_sampler(const struct intel_device_info *devinfo, const fs_reg &sampler)
|
||||
|
||||
static unsigned
|
||||
sampler_msg_type(const intel_device_info *devinfo,
|
||||
opcode opcode, bool shadow_compare, bool has_min_lod)
|
||||
opcode opcode, bool shadow_compare,
|
||||
bool lod_is_zero, bool has_min_lod)
|
||||
{
|
||||
switch (opcode) {
|
||||
case SHADER_OPCODE_TEX:
|
||||
case SHADER_OPCODE_TEX_LOGICAL:
|
||||
if (devinfo->ver >= 20 && has_min_lod) {
|
||||
return shadow_compare ? XE2_SAMPLER_MESSAGE_SAMPLE_COMPARE_MLOD :
|
||||
XE2_SAMPLER_MESSAGE_SAMPLE_MLOD;
|
||||
@@ -582,72 +583,71 @@ sampler_msg_type(const intel_device_info *devinfo,
|
||||
return shadow_compare ? GFX5_SAMPLER_MESSAGE_SAMPLE_COMPARE :
|
||||
GFX5_SAMPLER_MESSAGE_SAMPLE;
|
||||
}
|
||||
case FS_OPCODE_TXB:
|
||||
case FS_OPCODE_TXB_LOGICAL:
|
||||
return shadow_compare ? GFX5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE :
|
||||
GFX5_SAMPLER_MESSAGE_SAMPLE_BIAS;
|
||||
case SHADER_OPCODE_TXL:
|
||||
case SHADER_OPCODE_TXL_LOGICAL:
|
||||
assert(!has_min_lod);
|
||||
if (lod_is_zero) {
|
||||
return shadow_compare ? GFX9_SAMPLER_MESSAGE_SAMPLE_C_LZ :
|
||||
GFX9_SAMPLER_MESSAGE_SAMPLE_LZ;
|
||||
}
|
||||
return shadow_compare ? GFX5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE :
|
||||
GFX5_SAMPLER_MESSAGE_SAMPLE_LOD;
|
||||
case SHADER_OPCODE_TXL_LZ:
|
||||
assert(!has_min_lod);
|
||||
return shadow_compare ? GFX9_SAMPLER_MESSAGE_SAMPLE_C_LZ :
|
||||
GFX9_SAMPLER_MESSAGE_SAMPLE_LZ;
|
||||
case SHADER_OPCODE_TXS:
|
||||
case SHADER_OPCODE_TXS_LOGICAL:
|
||||
case SHADER_OPCODE_IMAGE_SIZE_LOGICAL:
|
||||
assert(!has_min_lod);
|
||||
return GFX5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
|
||||
case SHADER_OPCODE_TXD:
|
||||
case SHADER_OPCODE_TXD_LOGICAL:
|
||||
return shadow_compare ? HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE :
|
||||
GFX5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
|
||||
case SHADER_OPCODE_TXF:
|
||||
case SHADER_OPCODE_TXF_LOGICAL:
|
||||
assert(!has_min_lod);
|
||||
return GFX5_SAMPLER_MESSAGE_SAMPLE_LD;
|
||||
case SHADER_OPCODE_TXF_LZ:
|
||||
assert(!has_min_lod);
|
||||
return GFX9_SAMPLER_MESSAGE_SAMPLE_LD_LZ;
|
||||
case SHADER_OPCODE_TXF_CMS_W:
|
||||
return lod_is_zero ? GFX9_SAMPLER_MESSAGE_SAMPLE_LD_LZ :
|
||||
GFX5_SAMPLER_MESSAGE_SAMPLE_LD;
|
||||
case SHADER_OPCODE_TXF_CMS_W_LOGICAL:
|
||||
case SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL:
|
||||
assert(!has_min_lod);
|
||||
return GFX9_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W;
|
||||
case SHADER_OPCODE_TXF_MCS:
|
||||
case SHADER_OPCODE_TXF_MCS_LOGICAL:
|
||||
assert(!has_min_lod);
|
||||
return GFX7_SAMPLER_MESSAGE_SAMPLE_LD_MCS;
|
||||
case SHADER_OPCODE_LOD:
|
||||
case SHADER_OPCODE_LOD_LOGICAL:
|
||||
assert(!has_min_lod);
|
||||
return GFX5_SAMPLER_MESSAGE_LOD;
|
||||
case SHADER_OPCODE_TG4:
|
||||
case SHADER_OPCODE_TG4_LOGICAL:
|
||||
assert(!has_min_lod);
|
||||
return shadow_compare ? GFX7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C :
|
||||
GFX7_SAMPLER_MESSAGE_SAMPLE_GATHER4;
|
||||
break;
|
||||
case SHADER_OPCODE_TG4_OFFSET:
|
||||
case SHADER_OPCODE_TG4_OFFSET_LOGICAL:
|
||||
assert(!has_min_lod);
|
||||
return shadow_compare ? GFX7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C :
|
||||
GFX7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO;
|
||||
case SHADER_OPCODE_TG4_OFFSET_LOD:
|
||||
case SHADER_OPCODE_TG4_OFFSET_LOD_LOGICAL:
|
||||
assert(!has_min_lod);
|
||||
assert(devinfo->ver >= 20);
|
||||
return shadow_compare ? XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_L_C:
|
||||
XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_L;
|
||||
case SHADER_OPCODE_TG4_OFFSET_BIAS:
|
||||
case SHADER_OPCODE_TG4_OFFSET_BIAS_LOGICAL:
|
||||
assert(!has_min_lod);
|
||||
assert(devinfo->ver >= 20);
|
||||
return XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_B;
|
||||
case SHADER_OPCODE_TG4_BIAS:
|
||||
case SHADER_OPCODE_TG4_BIAS_LOGICAL:
|
||||
assert(!has_min_lod);
|
||||
assert(devinfo->ver >= 20);
|
||||
return XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_B;
|
||||
case SHADER_OPCODE_TG4_EXPLICIT_LOD:
|
||||
case SHADER_OPCODE_TG4_EXPLICIT_LOD_LOGICAL:
|
||||
assert(!has_min_lod);
|
||||
assert(devinfo->ver >= 20);
|
||||
return shadow_compare ? XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_L_C :
|
||||
XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_L;
|
||||
case SHADER_OPCODE_TG4_IMPLICIT_LOD:
|
||||
case SHADER_OPCODE_TG4_IMPLICIT_LOD_LOGICAL:
|
||||
assert(!has_min_lod);
|
||||
assert(devinfo->ver >= 20);
|
||||
return shadow_compare ? XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_I_C :
|
||||
XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_I;
|
||||
case SHADER_OPCODE_SAMPLEINFO:
|
||||
case SHADER_OPCODE_SAMPLEINFO_LOGICAL:
|
||||
assert(!has_min_lod);
|
||||
return GFX6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO;
|
||||
default:
|
||||
@@ -702,14 +702,14 @@ static bool
|
||||
shader_opcode_needs_header(opcode op)
|
||||
{
|
||||
switch (op) {
|
||||
case SHADER_OPCODE_TG4:
|
||||
case SHADER_OPCODE_TG4_OFFSET:
|
||||
case SHADER_OPCODE_TG4_OFFSET_BIAS:
|
||||
case SHADER_OPCODE_TG4_OFFSET_LOD:
|
||||
case SHADER_OPCODE_TG4_BIAS:
|
||||
case SHADER_OPCODE_TG4_EXPLICIT_LOD:
|
||||
case SHADER_OPCODE_TG4_IMPLICIT_LOD:
|
||||
case SHADER_OPCODE_SAMPLEINFO:
|
||||
case SHADER_OPCODE_TG4_LOGICAL:
|
||||
case SHADER_OPCODE_TG4_OFFSET_LOGICAL:
|
||||
case SHADER_OPCODE_TG4_OFFSET_BIAS_LOGICAL:
|
||||
case SHADER_OPCODE_TG4_OFFSET_LOD_LOGICAL:
|
||||
case SHADER_OPCODE_TG4_BIAS_LOGICAL:
|
||||
case SHADER_OPCODE_TG4_EXPLICIT_LOD_LOGICAL:
|
||||
case SHADER_OPCODE_TG4_IMPLICIT_LOD_LOGICAL:
|
||||
case SHADER_OPCODE_SAMPLEINFO_LOGICAL:
|
||||
return true;
|
||||
default:
|
||||
break;
|
||||
@@ -719,7 +719,7 @@ shader_opcode_needs_header(opcode op)
|
||||
}
|
||||
|
||||
static void
|
||||
lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, opcode op,
|
||||
lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst,
|
||||
const fs_reg &coordinate,
|
||||
const fs_reg &shadow_c,
|
||||
fs_reg lod, const fs_reg &lod2,
|
||||
@@ -746,6 +746,7 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, opcode op,
|
||||
brw_reg_type_from_bit_size(payload_type_bit_size, BRW_REGISTER_TYPE_D);
|
||||
unsigned reg_width = bld.dispatch_width() / 8;
|
||||
unsigned header_size = 0, length = 0;
|
||||
opcode op = inst->opcode;
|
||||
fs_reg sources[1 + MAX_SAMPLER_MESSAGE_SIZE];
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(sources); i++)
|
||||
sources[i] = bld.vgrf(payload_type);
|
||||
@@ -855,22 +856,14 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, opcode op,
|
||||
}
|
||||
}
|
||||
|
||||
/* Change the opcode to account for LOD being zero before the
|
||||
* switch-statement that emits sources based on the opcode.
|
||||
*/
|
||||
if (lod.is_zero()) {
|
||||
if (op == SHADER_OPCODE_TXL)
|
||||
op = SHADER_OPCODE_TXL_LZ;
|
||||
else if (op == SHADER_OPCODE_TXF)
|
||||
op = SHADER_OPCODE_TXF_LZ;
|
||||
}
|
||||
const bool lod_is_zero = lod.is_zero();
|
||||
|
||||
/* On Xe2 and newer platforms, min_lod is the first parameter specifically
|
||||
* so that a bunch of other, possibly unused, parameters don't need to also
|
||||
* be included.
|
||||
*/
|
||||
const unsigned msg_type =
|
||||
sampler_msg_type(devinfo, op, inst->shadow_compare,
|
||||
sampler_msg_type(devinfo, op, inst->shadow_compare, lod_is_zero,
|
||||
min_lod.file != BAD_FILE);
|
||||
|
||||
const bool min_lod_is_first = devinfo->ver >= 20 &&
|
||||
@@ -891,16 +884,19 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, opcode op,
|
||||
|
||||
/* Set up the LOD info */
|
||||
switch (op) {
|
||||
case FS_OPCODE_TXB:
|
||||
case SHADER_OPCODE_TG4_BIAS:
|
||||
case SHADER_OPCODE_TG4_EXPLICIT_LOD:
|
||||
case SHADER_OPCODE_TG4_OFFSET_LOD:
|
||||
case SHADER_OPCODE_TG4_OFFSET_BIAS:
|
||||
case SHADER_OPCODE_TXL:
|
||||
case SHADER_OPCODE_TXL_LOGICAL:
|
||||
if (lod_is_zero)
|
||||
break;
|
||||
FALLTHROUGH;
|
||||
case FS_OPCODE_TXB_LOGICAL:
|
||||
case SHADER_OPCODE_TG4_BIAS_LOGICAL:
|
||||
case SHADER_OPCODE_TG4_EXPLICIT_LOD_LOGICAL:
|
||||
case SHADER_OPCODE_TG4_OFFSET_LOD_LOGICAL:
|
||||
case SHADER_OPCODE_TG4_OFFSET_BIAS_LOGICAL:
|
||||
bld.MOV(sources[length], lod);
|
||||
length++;
|
||||
break;
|
||||
case SHADER_OPCODE_TXD:
|
||||
case SHADER_OPCODE_TXD_LOGICAL:
|
||||
/* TXD should have been lowered in SIMD16 mode (in SIMD32 mode in
|
||||
* Xe2+).
|
||||
*/
|
||||
@@ -923,7 +919,7 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, opcode op,
|
||||
|
||||
coordinate_done = true;
|
||||
break;
|
||||
case SHADER_OPCODE_TXS:
|
||||
case SHADER_OPCODE_TXS_LOGICAL:
|
||||
bld.MOV(retype(sources[length], payload_unsigned_type), lod);
|
||||
length++;
|
||||
break;
|
||||
@@ -932,8 +928,7 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, opcode op,
|
||||
bld.MOV(retype(sources[length], payload_unsigned_type), brw_imm_ud(0));
|
||||
length++;
|
||||
break;
|
||||
case SHADER_OPCODE_TXF:
|
||||
case SHADER_OPCODE_TXF_LZ:
|
||||
case SHADER_OPCODE_TXF_LOGICAL:
|
||||
/* On Gfx9 the parameters are intermixed they are u, v, lod, r. */
|
||||
bld.MOV(retype(sources[length++], payload_signed_type), coordinate);
|
||||
|
||||
@@ -945,7 +940,7 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, opcode op,
|
||||
}
|
||||
length++;
|
||||
|
||||
if (op != SHADER_OPCODE_TXF_LZ) {
|
||||
if (!lod_is_zero) {
|
||||
bld.MOV(retype(sources[length], payload_signed_type), lod);
|
||||
length++;
|
||||
}
|
||||
@@ -957,50 +952,38 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, opcode op,
|
||||
coordinate_done = true;
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_TXF_CMS_W:
|
||||
case SHADER_OPCODE_TXF_MCS:
|
||||
if (op == SHADER_OPCODE_TXF_CMS_W) {
|
||||
bld.MOV(retype(sources[length++], payload_unsigned_type), sample_index);
|
||||
}
|
||||
case SHADER_OPCODE_TXF_CMS_W_LOGICAL:
|
||||
case SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL:
|
||||
bld.MOV(retype(sources[length++], payload_unsigned_type), sample_index);
|
||||
|
||||
/* Data from the multisample control surface. */
|
||||
if (op == SHADER_OPCODE_TXF_CMS_W) {
|
||||
unsigned num_mcs_components = 1;
|
||||
|
||||
/* From the Gfx12HP BSpec: Render Engine - 3D and GPGPU Programs -
|
||||
for (unsigned i = 0; i < 2; ++i) {
|
||||
/* Sampler always writes 4/8 register worth of data but for ld_mcs
|
||||
* only valid data is in first two register. So with 16-bit
|
||||
* payload, we need to split 2-32bit register into 4-16-bit
|
||||
* payload.
|
||||
*
|
||||
* From the Gfx12HP BSpec: Render Engine - 3D and GPGPU Programs -
|
||||
* Shared Functions - 3D Sampler - Messages - Message Format:
|
||||
*
|
||||
* ld2dms_w si mcs0 mcs1 mcs2 mcs3 u v r
|
||||
*/
|
||||
if (op == SHADER_OPCODE_TXF_CMS_W)
|
||||
num_mcs_components = 2;
|
||||
|
||||
for (unsigned i = 0; i < num_mcs_components; ++i) {
|
||||
/* Sampler always writes 4/8 register worth of data but for ld_mcs
|
||||
* only valid data is in first two register. So with 16-bit
|
||||
* payload, we need to split 2-32bit register into 4-16-bit
|
||||
* payload.
|
||||
*
|
||||
* From the Gfx12HP BSpec: Render Engine - 3D and GPGPU Programs -
|
||||
* Shared Functions - 3D Sampler - Messages - Message Format:
|
||||
*
|
||||
* ld2dms_w si mcs0 mcs1 mcs2 mcs3 u v r
|
||||
*/
|
||||
if (devinfo->verx10 >= 125 && op == SHADER_OPCODE_TXF_CMS_W) {
|
||||
fs_reg tmp = offset(mcs, bld, i);
|
||||
bld.MOV(retype(sources[length++], payload_unsigned_type),
|
||||
mcs.file == IMM ? mcs :
|
||||
subscript(tmp, payload_unsigned_type, 0));
|
||||
bld.MOV(retype(sources[length++], payload_unsigned_type),
|
||||
mcs.file == IMM ? mcs :
|
||||
subscript(tmp, payload_unsigned_type, 1));
|
||||
} else {
|
||||
bld.MOV(retype(sources[length++], payload_unsigned_type),
|
||||
mcs.file == IMM ? mcs : offset(mcs, bld, i));
|
||||
}
|
||||
if (op == SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL) {
|
||||
fs_reg tmp = offset(mcs, bld, i);
|
||||
bld.MOV(retype(sources[length++], payload_unsigned_type),
|
||||
mcs.file == IMM ? mcs :
|
||||
subscript(tmp, payload_unsigned_type, 0));
|
||||
bld.MOV(retype(sources[length++], payload_unsigned_type),
|
||||
mcs.file == IMM ? mcs :
|
||||
subscript(tmp, payload_unsigned_type, 1));
|
||||
} else {
|
||||
bld.MOV(retype(sources[length++], payload_unsigned_type),
|
||||
mcs.file == IMM ? mcs : offset(mcs, bld, i));
|
||||
}
|
||||
}
|
||||
FALLTHROUGH;
|
||||
|
||||
case SHADER_OPCODE_TXF_MCS_LOGICAL:
|
||||
/* There is no offsetting for this message; just copy in the integer
|
||||
* texture coordinates.
|
||||
*/
|
||||
@@ -1010,7 +993,7 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, opcode op,
|
||||
|
||||
coordinate_done = true;
|
||||
break;
|
||||
case SHADER_OPCODE_TG4_OFFSET:
|
||||
case SHADER_OPCODE_TG4_OFFSET_LOGICAL:
|
||||
/* More crazy intermixing */
|
||||
for (unsigned i = 0; i < 2; i++) /* u, v */
|
||||
bld.MOV(sources[length++], offset(coordinate, bld, i));
|
||||
@@ -1037,7 +1020,7 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, opcode op,
|
||||
|
||||
if (min_lod.file != BAD_FILE && !min_lod_is_first) {
|
||||
/* Account for all of the missing coordinate sources */
|
||||
if (op == FS_OPCODE_TXB && devinfo->ver >= 20 &&
|
||||
if (op == FS_OPCODE_TXB_LOGICAL && devinfo->ver >= 20 &&
|
||||
inst->has_packed_lod_ai_src) {
|
||||
/* Bspec 64985:
|
||||
*
|
||||
@@ -1052,7 +1035,7 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, opcode op,
|
||||
* Param BIAS_AI U V R MLOD
|
||||
*/
|
||||
length += 3 - coord_components;
|
||||
} else if (op == SHADER_OPCODE_TXD && devinfo->verx10 >= 125) {
|
||||
} else if (op == SHADER_OPCODE_TXD_LOGICAL && devinfo->verx10 >= 125) {
|
||||
/* On DG2 and newer platforms, sample_d can only be used with 1D and
|
||||
* 2D surfaces, so the maximum number of gradient components is 2.
|
||||
* In spite of this limitation, the Bspec lists a mysterious R
|
||||
@@ -1065,14 +1048,14 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, opcode op,
|
||||
length += (2 - grad_components) * 2;
|
||||
} else {
|
||||
length += 4 - coord_components;
|
||||
if (op == SHADER_OPCODE_TXD)
|
||||
if (op == SHADER_OPCODE_TXD_LOGICAL)
|
||||
length += (3 - grad_components) * 2;
|
||||
}
|
||||
|
||||
bld.MOV(sources[length++], min_lod);
|
||||
|
||||
/* Wa_14014595444: Populate MLOD as parameter 5 (twice). */
|
||||
if (devinfo->verx10 == 125 && op == FS_OPCODE_TXB &&
|
||||
if (devinfo->verx10 == 125 && op == FS_OPCODE_TXB_LOGICAL &&
|
||||
!inst->shadow_compare)
|
||||
bld.MOV(sources[length++], min_lod);
|
||||
}
|
||||
@@ -1113,10 +1096,6 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, opcode op,
|
||||
inst->opcode = SHADER_OPCODE_SEND;
|
||||
inst->mlen = mlen;
|
||||
inst->header_size = header_size;
|
||||
|
||||
assert(msg_type == sampler_msg_type(devinfo, op, inst->shadow_compare,
|
||||
min_lod.file != BAD_FILE));
|
||||
|
||||
inst->sfid = BRW_SFID_SAMPLER;
|
||||
if (surface.file == IMM &&
|
||||
(sampler.file == IMM || sampler_handle.file != BAD_FILE)) {
|
||||
@@ -1203,7 +1182,7 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, opcode op,
|
||||
|
||||
static unsigned
|
||||
get_sampler_msg_payload_type_bit_size(const intel_device_info *devinfo,
|
||||
opcode op, const fs_inst *inst)
|
||||
const fs_inst *inst)
|
||||
{
|
||||
assert(inst);
|
||||
const fs_reg *src = inst->src;
|
||||
@@ -1227,7 +1206,7 @@ get_sampler_msg_payload_type_bit_size(const intel_device_info *devinfo,
|
||||
* which is already in 16-bits unlike the other parameters that need forced
|
||||
* conversion.
|
||||
*/
|
||||
if (devinfo->verx10 < 125 || op != SHADER_OPCODE_TXF_CMS_W) {
|
||||
if (inst->opcode != SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL) {
|
||||
for (unsigned i = 0; i < TEX_LOGICAL_NUM_SRCS; i++) {
|
||||
assert(src[i].file == BAD_FILE ||
|
||||
brw_reg_type_to_size(src[i].type) == src_type_size);
|
||||
@@ -1246,10 +1225,9 @@ get_sampler_msg_payload_type_bit_size(const intel_device_info *devinfo,
|
||||
* ld_mcs SIMD8H and SIMD16H Only
|
||||
* ld2dms REMOVEDBY(GEN:HAS:1406788836)
|
||||
*/
|
||||
|
||||
if (op == SHADER_OPCODE_TXF_CMS_W ||
|
||||
op == SHADER_OPCODE_TXF_MCS ||
|
||||
(op == FS_OPCODE_TXB && !inst->has_packed_lod_ai_src &&
|
||||
if (inst->opcode == SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL ||
|
||||
inst->opcode == SHADER_OPCODE_TXF_MCS_LOGICAL ||
|
||||
(inst->opcode == FS_OPCODE_TXB_LOGICAL && !inst->has_packed_lod_ai_src &&
|
||||
devinfo->ver >= 20))
|
||||
src_type_size = 2;
|
||||
|
||||
@@ -1257,7 +1235,7 @@ get_sampler_msg_payload_type_bit_size(const intel_device_info *devinfo,
|
||||
}
|
||||
|
||||
static void
|
||||
lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, opcode op)
|
||||
lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||
{
|
||||
const intel_device_info *devinfo = bld.shader->devinfo;
|
||||
const fs_reg coordinate = inst->src[TEX_LOGICAL_SRC_COORDINATE];
|
||||
@@ -1280,12 +1258,12 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, opcode op)
|
||||
const bool residency = inst->src[TEX_LOGICAL_SRC_RESIDENCY].ud != 0;
|
||||
|
||||
const unsigned msg_payload_type_bit_size =
|
||||
get_sampler_msg_payload_type_bit_size(devinfo, op, inst);
|
||||
get_sampler_msg_payload_type_bit_size(devinfo, inst);
|
||||
|
||||
/* 16-bit payloads are available only on gfx11+ */
|
||||
assert(msg_payload_type_bit_size != 16 || devinfo->ver >= 11);
|
||||
|
||||
lower_sampler_logical_send(bld, inst, op, coordinate,
|
||||
lower_sampler_logical_send(bld, inst, coordinate,
|
||||
shadow_c, lod, lod2, min_lod,
|
||||
sample_index,
|
||||
mcs, surface, sampler,
|
||||
@@ -2757,80 +2735,25 @@ brw_fs_lower_logical_sends(fs_visitor &s)
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_TEX_LOGICAL:
|
||||
lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TEX);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_TXD_LOGICAL:
|
||||
lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXD);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_TXF_LOGICAL:
|
||||
lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXF);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_TXL_LOGICAL:
|
||||
lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXL);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_TXS_LOGICAL:
|
||||
lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXS);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_IMAGE_SIZE_LOGICAL:
|
||||
lower_sampler_logical_send(ibld, inst,
|
||||
SHADER_OPCODE_IMAGE_SIZE_LOGICAL);
|
||||
break;
|
||||
|
||||
case FS_OPCODE_TXB_LOGICAL:
|
||||
lower_sampler_logical_send(ibld, inst, FS_OPCODE_TXB);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_TXF_CMS_W_LOGICAL:
|
||||
case SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL:
|
||||
lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXF_CMS_W);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_TXF_MCS_LOGICAL:
|
||||
lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXF_MCS);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_LOD_LOGICAL:
|
||||
lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_LOD);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_TG4_LOGICAL:
|
||||
lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TG4);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_TG4_BIAS_LOGICAL:
|
||||
assert(devinfo->ver >= 20);
|
||||
lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TG4_BIAS);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_TG4_EXPLICIT_LOD_LOGICAL:
|
||||
assert(devinfo->ver >= 20);
|
||||
lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TG4_EXPLICIT_LOD);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_TG4_IMPLICIT_LOD_LOGICAL:
|
||||
assert(devinfo->ver >= 20);
|
||||
lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TG4_IMPLICIT_LOD);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_TG4_OFFSET_LOGICAL:
|
||||
lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TG4_OFFSET);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_TG4_OFFSET_LOD_LOGICAL:
|
||||
lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TG4_OFFSET_LOD);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_TG4_OFFSET_BIAS_LOGICAL:
|
||||
lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TG4_OFFSET_BIAS);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_SAMPLEINFO_LOGICAL:
|
||||
lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_SAMPLEINFO);
|
||||
lower_sampler_logical_send(ibld, inst);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_GET_BUFFER_SIZE:
|
||||
|
@@ -231,90 +231,6 @@ schedule_node::set_latency(const struct brw_isa_info *isa)
|
||||
latency = 24;
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_TEX:
|
||||
case SHADER_OPCODE_TXD:
|
||||
case SHADER_OPCODE_TXF:
|
||||
case SHADER_OPCODE_TXF_LZ:
|
||||
case SHADER_OPCODE_TXL:
|
||||
case SHADER_OPCODE_TXL_LZ:
|
||||
/* 18 cycles:
|
||||
* mov(8) g115<1>F 0F { align1 WE_normal 1Q };
|
||||
* mov(8) g114<1>F 0F { align1 WE_normal 1Q };
|
||||
* send(8) g4<1>UW g114<8,8,1>F
|
||||
* sampler (10, 0, 0, 1) mlen 2 rlen 4 { align1 WE_normal 1Q };
|
||||
*
|
||||
* 697 +/-49 cycles (min 610, n=26):
|
||||
* mov(8) g115<1>F 0F { align1 WE_normal 1Q };
|
||||
* mov(8) g114<1>F 0F { align1 WE_normal 1Q };
|
||||
* send(8) g4<1>UW g114<8,8,1>F
|
||||
* sampler (10, 0, 0, 1) mlen 2 rlen 4 { align1 WE_normal 1Q };
|
||||
* mov(8) null g4<8,8,1>F { align1 WE_normal 1Q };
|
||||
*
|
||||
* So the latency on our first texture load of the batchbuffer takes
|
||||
* ~700 cycles, since the caches are cold at that point.
|
||||
*
|
||||
* 840 +/- 92 cycles (min 720, n=25):
|
||||
* mov(8) g115<1>F 0F { align1 WE_normal 1Q };
|
||||
* mov(8) g114<1>F 0F { align1 WE_normal 1Q };
|
||||
* send(8) g4<1>UW g114<8,8,1>F
|
||||
* sampler (10, 0, 0, 1) mlen 2 rlen 4 { align1 WE_normal 1Q };
|
||||
* mov(8) null g4<8,8,1>F { align1 WE_normal 1Q };
|
||||
* send(8) g4<1>UW g114<8,8,1>F
|
||||
* sampler (10, 0, 0, 1) mlen 2 rlen 4 { align1 WE_normal 1Q };
|
||||
* mov(8) null g4<8,8,1>F { align1 WE_normal 1Q };
|
||||
*
|
||||
* On the second load, it takes just an extra ~140 cycles, and after
|
||||
* accounting for the 14 cycles of the MOV's latency, that makes ~130.
|
||||
*
|
||||
* 683 +/- 49 cycles (min = 602, n=47):
|
||||
* mov(8) g115<1>F 0F { align1 WE_normal 1Q };
|
||||
* mov(8) g114<1>F 0F { align1 WE_normal 1Q };
|
||||
* send(8) g4<1>UW g114<8,8,1>F
|
||||
* sampler (10, 0, 0, 1) mlen 2 rlen 4 { align1 WE_normal 1Q };
|
||||
* send(8) g50<1>UW g114<8,8,1>F
|
||||
* sampler (10, 0, 0, 1) mlen 2 rlen 4 { align1 WE_normal 1Q };
|
||||
* mov(8) null g4<8,8,1>F { align1 WE_normal 1Q };
|
||||
*
|
||||
* The unit appears to be pipelined, since this matches up with the
|
||||
* cache-cold case, despite there being two loads here. If you replace
|
||||
* the g4 in the MOV to null with g50, it's still 693 +/- 52 (n=39).
|
||||
*
|
||||
* So, take some number between the cache-hot 140 cycles and the
|
||||
* cache-cold 700 cycles. No particular tuning was done on this.
|
||||
*
|
||||
* I haven't done significant testing of the non-TEX opcodes. TXL at
|
||||
* least looked about the same as TEX.
|
||||
*/
|
||||
latency = 200;
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_TXS:
|
||||
/* Testing textureSize(sampler2D, 0), one load was 420 +/- 41
|
||||
* cycles (n=15):
|
||||
* mov(8) g114<1>UD 0D { align1 WE_normal 1Q };
|
||||
* send(8) g6<1>UW g114<8,8,1>F
|
||||
* sampler (10, 0, 10, 1) mlen 1 rlen 4 { align1 WE_normal 1Q };
|
||||
* mov(16) g6<1>F g6<8,8,1>D { align1 WE_normal 1Q };
|
||||
*
|
||||
*
|
||||
* Two loads was 535 +/- 30 cycles (n=19):
|
||||
* mov(16) g114<1>UD 0D { align1 WE_normal 1H };
|
||||
* send(16) g6<1>UW g114<8,8,1>F
|
||||
* sampler (10, 0, 10, 2) mlen 2 rlen 8 { align1 WE_normal 1H };
|
||||
* mov(16) g114<1>UD 0D { align1 WE_normal 1H };
|
||||
* mov(16) g6<1>F g6<8,8,1>D { align1 WE_normal 1H };
|
||||
* send(16) g8<1>UW g114<8,8,1>F
|
||||
* sampler (10, 0, 10, 2) mlen 2 rlen 8 { align1 WE_normal 1H };
|
||||
* mov(16) g8<1>F g8<8,8,1>D { align1 WE_normal 1H };
|
||||
* add(16) g6<1>F g6<8,8,1>F g8<8,8,1>F { align1 WE_normal 1H };
|
||||
*
|
||||
* Since the only caches that should matter are just the
|
||||
* instruction/state cache containing the surface state, assume that we
|
||||
* always have hot caches.
|
||||
*/
|
||||
latency = 100;
|
||||
break;
|
||||
|
||||
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
|
||||
/* testing using varying-index pull constants:
|
||||
*
|
||||
@@ -352,12 +268,83 @@ schedule_node::set_latency(const struct brw_isa_info *isa)
|
||||
switch (msg_type) {
|
||||
case GFX5_SAMPLER_MESSAGE_SAMPLE_RESINFO:
|
||||
case GFX6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO:
|
||||
/* See also SHADER_OPCODE_TXS */
|
||||
/* Testing textureSize(sampler2D, 0), one load was 420 +/- 41
|
||||
* cycles (n=15):
|
||||
* mov(8) g114<1>UD 0D { align1 WE_normal 1Q };
|
||||
* send(8) g6<1>UW g114<8,8,1>F
|
||||
* sampler (10, 0, 10, 1) mlen 1 rlen 4 { align1 WE_normal 1Q };
|
||||
* mov(16) g6<1>F g6<8,8,1>D { align1 WE_normal 1Q };
|
||||
*
|
||||
*
|
||||
* Two loads was 535 +/- 30 cycles (n=19):
|
||||
* mov(16) g114<1>UD 0D { align1 WE_normal 1H };
|
||||
* send(16) g6<1>UW g114<8,8,1>F
|
||||
* sampler (10, 0, 10, 2) mlen 2 rlen 8 { align1 WE_normal 1H };
|
||||
* mov(16) g114<1>UD 0D { align1 WE_normal 1H };
|
||||
* mov(16) g6<1>F g6<8,8,1>D { align1 WE_normal 1H };
|
||||
* send(16) g8<1>UW g114<8,8,1>F
|
||||
* sampler (10, 0, 10, 2) mlen 2 rlen 8 { align1 WE_normal 1H };
|
||||
* mov(16) g8<1>F g8<8,8,1>D { align1 WE_normal 1H };
|
||||
* add(16) g6<1>F g6<8,8,1>F g8<8,8,1>F { align1 WE_normal 1H };
|
||||
*
|
||||
* Since the only caches that should matter are just the
|
||||
* instruction/state cache containing the surface state,
|
||||
* assume that we always have hot caches.
|
||||
*/
|
||||
latency = 100;
|
||||
break;
|
||||
|
||||
default:
|
||||
/* See also SHADER_OPCODE_TEX */
|
||||
/* 18 cycles:
|
||||
* mov(8) g115<1>F 0F { align1 WE_normal 1Q };
|
||||
* mov(8) g114<1>F 0F { align1 WE_normal 1Q };
|
||||
* send(8) g4<1>UW g114<8,8,1>F
|
||||
* sampler (10, 0, 0, 1) mlen 2 rlen 4 { align1 WE_normal 1Q };
|
||||
*
|
||||
* 697 +/-49 cycles (min 610, n=26):
|
||||
* mov(8) g115<1>F 0F { align1 WE_normal 1Q };
|
||||
* mov(8) g114<1>F 0F { align1 WE_normal 1Q };
|
||||
* send(8) g4<1>UW g114<8,8,1>F
|
||||
* sampler (10, 0, 0, 1) mlen 2 rlen 4 { align1 WE_normal 1Q };
|
||||
* mov(8) null g4<8,8,1>F { align1 WE_normal 1Q };
|
||||
*
|
||||
* So the latency on our first texture load of the batchbuffer
|
||||
* takes ~700 cycles, since the caches are cold at that point.
|
||||
*
|
||||
* 840 +/- 92 cycles (min 720, n=25):
|
||||
* mov(8) g115<1>F 0F { align1 WE_normal 1Q };
|
||||
* mov(8) g114<1>F 0F { align1 WE_normal 1Q };
|
||||
* send(8) g4<1>UW g114<8,8,1>F
|
||||
* sampler (10, 0, 0, 1) mlen 2 rlen 4 { align1 WE_normal 1Q };
|
||||
* mov(8) null g4<8,8,1>F { align1 WE_normal 1Q };
|
||||
* send(8) g4<1>UW g114<8,8,1>F
|
||||
* sampler (10, 0, 0, 1) mlen 2 rlen 4 { align1 WE_normal 1Q };
|
||||
* mov(8) null g4<8,8,1>F { align1 WE_normal 1Q };
|
||||
*
|
||||
* On the second load, it takes just an extra ~140 cycles, and
|
||||
* after accounting for the 14 cycles of the MOV's latency, that
|
||||
* makes ~130.
|
||||
*
|
||||
* 683 +/- 49 cycles (min = 602, n=47):
|
||||
* mov(8) g115<1>F 0F { align1 WE_normal 1Q };
|
||||
* mov(8) g114<1>F 0F { align1 WE_normal 1Q };
|
||||
* send(8) g4<1>UW g114<8,8,1>F
|
||||
* sampler (10, 0, 0, 1) mlen 2 rlen 4 { align1 WE_normal 1Q };
|
||||
* send(8) g50<1>UW g114<8,8,1>F
|
||||
* sampler (10, 0, 0, 1) mlen 2 rlen 4 { align1 WE_normal 1Q };
|
||||
* mov(8) null g4<8,8,1>F { align1 WE_normal 1Q };
|
||||
*
|
||||
* The unit appears to be pipelined, since this matches up with
|
||||
* the cache-cold case, despite there being two loads here. If
|
||||
* you replace the g4 in the MOV to null with g50, it's still
|
||||
* 693 +/- 52 (n=39).
|
||||
*
|
||||
* So, take some number between the cache-hot 140 cycles and the
|
||||
* cache-cold 700 cycles. No particular tuning was done on this.
|
||||
*
|
||||
* I haven't done significant testing of the non-TEX opcodes.
|
||||
* TXL at least looked about the same as TEX.
|
||||
*/
|
||||
latency = 200;
|
||||
break;
|
||||
}
|
||||
|
Reference in New Issue
Block a user