pvr: Add support for generating transfer fragment programs
Co-authored-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com> Co-authored-by: Rajnesh Kanwal <rajnesh.kanwal@imgtec.com> Signed-off-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com> Signed-off-by: Rajnesh Kanwal <rajnesh.kanwal@imgtec.com> Signed-off-by: Simon Perretta <simon.perretta@imgtec.com> Acked-by: Frank Binns <frank.binns@imgtec.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21550>
This commit is contained in:

committed by
Marge Bot

parent
eeac8336ef
commit
f0b47cfd65
@@ -370,6 +370,12 @@ rogue_reg *rogue_pixout_reg(rogue_shader *shader, unsigned index)
|
||||
return rogue_reg_cached(shader, ROGUE_REG_CLASS_PIXOUT, index);
|
||||
}
|
||||
|
||||
PUBLIC
|
||||
rogue_reg *rogue_special_reg(rogue_shader *shader, unsigned index)
|
||||
{
|
||||
return rogue_reg_cached(shader, ROGUE_REG_CLASS_SPECIAL, index);
|
||||
}
|
||||
|
||||
PUBLIC
|
||||
rogue_reg *rogue_vtxin_reg(rogue_shader *shader, unsigned index)
|
||||
{
|
||||
@@ -585,6 +591,16 @@ rogue_coeff_regarray(rogue_shader *shader, unsigned size, unsigned start_index)
|
||||
start_index);
|
||||
}
|
||||
|
||||
PUBLIC
|
||||
rogue_regarray *
|
||||
rogue_shared_regarray(rogue_shader *shader, unsigned size, unsigned start_index)
|
||||
{
|
||||
return rogue_regarray_cached(shader,
|
||||
size,
|
||||
ROGUE_REG_CLASS_SHARED,
|
||||
start_index);
|
||||
}
|
||||
|
||||
PUBLIC
|
||||
rogue_regarray *rogue_ssa_vec_regarray(rogue_shader *shader,
|
||||
unsigned size,
|
||||
|
@@ -1887,6 +1887,8 @@ rogue_reg *rogue_const_reg(rogue_shader *shader, unsigned index);
|
||||
|
||||
rogue_reg *rogue_pixout_reg(rogue_shader *shader, unsigned index);
|
||||
|
||||
rogue_reg *rogue_special_reg(rogue_shader *shader, unsigned index);
|
||||
|
||||
rogue_reg *rogue_vtxin_reg(rogue_shader *shader, unsigned index);
|
||||
|
||||
rogue_reg *rogue_vtxout_reg(rogue_shader *shader, unsigned index);
|
||||
@@ -1905,6 +1907,10 @@ rogue_temp_regarray(rogue_shader *shader, unsigned size, unsigned start_index);
|
||||
rogue_regarray *
|
||||
rogue_coeff_regarray(rogue_shader *shader, unsigned size, unsigned start_index);
|
||||
|
||||
rogue_regarray *rogue_shared_regarray(rogue_shader *shader,
|
||||
unsigned size,
|
||||
unsigned start_index);
|
||||
|
||||
rogue_regarray *rogue_ssa_vec_regarray(rogue_shader *shader,
|
||||
unsigned size,
|
||||
unsigned start_index,
|
||||
|
@@ -682,11 +682,9 @@ static inline void rogue_feedback_used_regs(rogue_build_ctx *ctx,
|
||||
{
|
||||
/* TODO NEXT: Use this counting method elsewhere as well. */
|
||||
ctx->common_data[shader->stage].temps =
|
||||
__bitset_count(shader->regs_used[ROGUE_REG_CLASS_TEMP],
|
||||
BITSET_WORDS(rogue_reg_infos[ROGUE_REG_CLASS_TEMP].num));
|
||||
ctx->common_data[shader->stage].internals = __bitset_count(
|
||||
shader->regs_used[ROGUE_REG_CLASS_INTERNAL],
|
||||
BITSET_WORDS(rogue_reg_infos[ROGUE_REG_CLASS_INTERNAL].num));
|
||||
rogue_count_used_regs(shader, ROGUE_REG_CLASS_TEMP);
|
||||
ctx->common_data[shader->stage].internals =
|
||||
rogue_count_used_regs(shader, ROGUE_REG_CLASS_INTERNAL);
|
||||
}
|
||||
|
||||
static bool ssa_def_cb(nir_ssa_def *ssa, void *state)
|
||||
|
@@ -1304,14 +1304,14 @@ static void rogue_encode_instr_group(rogue_instr_group *group,
|
||||
}
|
||||
|
||||
PUBLIC
|
||||
void rogue_encode_shader(UNUSED rogue_build_ctx *ctx,
|
||||
void rogue_encode_shader(rogue_build_ctx *ctx,
|
||||
rogue_shader *shader,
|
||||
struct util_dynarray *binary)
|
||||
{
|
||||
if (!shader->is_grouped)
|
||||
unreachable("Can't encode shader with ungrouped instructions.");
|
||||
|
||||
util_dynarray_init(binary, shader);
|
||||
util_dynarray_init(binary, ctx);
|
||||
|
||||
rogue_foreach_instr_group_in_shader (group, shader)
|
||||
rogue_encode_instr_group(group, binary);
|
||||
|
@@ -581,7 +581,7 @@ const rogue_alu_op_info rogue_alu_op_infos[ROGUE_ALU_OP_COUNT] = {
|
||||
},
|
||||
.supported_dst_types = { [0] = T(REG) | T(REGARRAY) | T(IO), },
|
||||
.supported_src_types = {
|
||||
[0] = T(REG),
|
||||
[0] = T(REG) | T(REGARRAY),
|
||||
},
|
||||
},
|
||||
[ROGUE_ALU_OP_FADD] = { .str = "fadd", .num_dsts = 1, .num_srcs = 2,
|
||||
@@ -685,7 +685,7 @@ const rogue_alu_op_info rogue_alu_op_infos[ROGUE_ALU_OP_COUNT] = {
|
||||
[ROGUE_ALU_OP_MOV] = { .str = "mov", .num_dsts = 1, .num_srcs = 1,
|
||||
.supported_dst_types = { [0] = T(REG) | T(REGARRAY), },
|
||||
.supported_src_types = {
|
||||
[0] = T(REG) | T(IMM),
|
||||
[0] = T(REG) | T(REGARRAY) | T(IMM),
|
||||
},
|
||||
},
|
||||
[ROGUE_ALU_OP_CMOV] = { .str = "cmov", .num_dsts = 1, .num_srcs = 3,
|
||||
|
@@ -643,10 +643,15 @@ void rogue_print_shader(FILE *fp, const rogue_shader *shader)
|
||||
{
|
||||
fputs("/*", fp);
|
||||
|
||||
if (shader->name)
|
||||
fprintf(fp, " \"%s\":", shader->name);
|
||||
if (shader->stage == MESA_SHADER_NONE)
|
||||
fputs(" USC program", fp);
|
||||
else
|
||||
fprintf(fp, " %s shader", _mesa_shader_stage_to_string(shader->stage));
|
||||
|
||||
fprintf(fp, " %s shader */\n", _mesa_shader_stage_to_string(shader->stage));
|
||||
if (shader->name)
|
||||
fprintf(fp, " - %s", shader->name);
|
||||
|
||||
fputs(" */\n", fp);
|
||||
|
||||
rogue_foreach_block (block, shader)
|
||||
rogue_print_block(fp, block);
|
||||
|
@@ -67,6 +67,7 @@ pvr_files = files(
|
||||
'pvr_wsi.c',
|
||||
|
||||
'usc/pvr_uscgen.c',
|
||||
'usc/pvr_uscgen_tq.c',
|
||||
)
|
||||
|
||||
pvr_includes = [
|
||||
|
@@ -57,6 +57,11 @@
|
||||
#define PVR_PIPELINE_LAYOUT_SUPPORTED_DESCRIPTOR_TYPE_COUNT \
|
||||
(uint32_t)(VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT + 1U)
|
||||
|
||||
#define PVR_TRANSFER_MAX_LAYERS 1U
|
||||
#define PVR_TRANSFER_MAX_LOADS 4U
|
||||
#define PVR_TRANSFER_MAX_IMAGES \
|
||||
(PVR_TRANSFER_MAX_LAYERS * PVR_TRANSFER_MAX_LOADS)
|
||||
|
||||
/* TODO: move into a common surface library? */
|
||||
enum pvr_memlayout {
|
||||
PVR_MEMLAYOUT_UNDEFINED = 0, /* explicitly treat 0 as undefined */
|
||||
@@ -141,6 +146,30 @@ enum pvr_stage_allocation {
|
||||
PVR_STAGE_ALLOCATION_COUNT
|
||||
};
|
||||
|
||||
enum pvr_resolve_op {
|
||||
PVR_RESOLVE_BLEND,
|
||||
PVR_RESOLVE_MIN,
|
||||
PVR_RESOLVE_MAX,
|
||||
PVR_RESOLVE_SAMPLE0,
|
||||
PVR_RESOLVE_SAMPLE1,
|
||||
PVR_RESOLVE_SAMPLE2,
|
||||
PVR_RESOLVE_SAMPLE3,
|
||||
PVR_RESOLVE_SAMPLE4,
|
||||
PVR_RESOLVE_SAMPLE5,
|
||||
PVR_RESOLVE_SAMPLE6,
|
||||
PVR_RESOLVE_SAMPLE7,
|
||||
};
|
||||
|
||||
enum pvr_alpha_type {
|
||||
PVR_ALPHA_NONE,
|
||||
PVR_ALPHA_SOURCE,
|
||||
PVR_ALPHA_PREMUL_SOURCE,
|
||||
PVR_ALPHA_GLOBAL,
|
||||
PVR_ALPHA_PREMUL_SOURCE_WITH_GLOBAL,
|
||||
PVR_ALPHA_CUSTOM,
|
||||
PVR_ALPHA_AATEXT,
|
||||
};
|
||||
|
||||
enum pvr_event_state {
|
||||
PVR_EVENT_STATE_SET_BY_HOST,
|
||||
PVR_EVENT_STATE_RESET_BY_HOST,
|
||||
|
@@ -28,6 +28,7 @@
|
||||
#include <vulkan/vulkan.h>
|
||||
|
||||
#include "hwdef/rogue_hw_utils.h"
|
||||
#include "pvr_common.h"
|
||||
#include "pvr_formats.h"
|
||||
#include "pvr_private.h"
|
||||
#include "util/bitpack_helpers.h"
|
||||
@@ -1031,3 +1032,76 @@ bool pvr_format_is_pbe_downscalable(VkFormat vk_format)
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t pvr_pbe_pixel_num_loads(enum pvr_transfer_pbe_pixel_src pbe_format,
|
||||
uint32_t alpha_type)
|
||||
{
|
||||
switch (alpha_type) {
|
||||
default:
|
||||
case PVR_ALPHA_NONE:
|
||||
break;
|
||||
case PVR_ALPHA_SOURCE:
|
||||
case PVR_ALPHA_PREMUL_SOURCE:
|
||||
case PVR_ALPHA_PREMUL_SOURCE_WITH_GLOBAL:
|
||||
case PVR_ALPHA_GLOBAL:
|
||||
return 2U;
|
||||
}
|
||||
|
||||
switch (pbe_format) {
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_UU8888:
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_US8888:
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_UU16U16:
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_US16S16:
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_SU8888:
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_SS8888:
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_SU16U16:
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_SS16S16:
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_UU1010102:
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_SU1010102:
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_UU1010102:
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_SU1010102:
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_SU32U32:
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_S4XU32:
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_US32S32:
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_U4XS32:
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_F16F16:
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_U16NORM:
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_S16NORM:
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_F32:
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_F32X2:
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_F32X4:
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_RAW32:
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_RAW64:
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_RAW128:
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_F16_U8:
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_SWAP_LMSB:
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_MOV_BY45:
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_D24S8:
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_S8D24:
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_D32S8:
|
||||
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D24_D32:
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D32U_D32F:
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D32_D24S8:
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_S8D24_D24S8:
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D32S8:
|
||||
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D32S8:
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D32S8_D32S8:
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32S8_D32S8:
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D24S8:
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D24S8:
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D24S8_D24S8:
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32_D24S8:
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32U_D24S8:
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_Y_UV_INTERLEAVED:
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_Y_U_V:
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_YUV_PACKED:
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_YVU_PACKED:
|
||||
return 1U;
|
||||
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_NUM:
|
||||
default:
|
||||
return 0U;
|
||||
}
|
||||
}
|
||||
|
@@ -57,6 +57,156 @@ enum pvr_pbe_accum_format {
|
||||
PVR_PBE_ACCUM_FORMAT_U24,
|
||||
};
|
||||
|
||||
/**
|
||||
* Pixel related shader selector. The logic selecting the shader has to take
|
||||
* into account the pixel related properties (controlling the conversion path in
|
||||
* the shader) and the geometry related properties (controlling the sample
|
||||
* position calcs). These two can be orthogonal.
|
||||
*
|
||||
* integer format conversions, bit depth : 8, 16, 32 per ch formats : signed,
|
||||
* unsigned. Strategy: convert everything to U32 or S32 then USC pack. PBE just
|
||||
* pass through.
|
||||
*
|
||||
* fixed point format conversions, bit depth 565, 1555, 555 etc. Strategy:
|
||||
* fcnorm to 4 F32, then USC pack to F16F16. PBE converts to destination
|
||||
*
|
||||
* float/fixed format conversions
|
||||
* strategy: fcnorm, then pack to f16 _when_ destination is not f32.
|
||||
* fmt | unorm | flt |
|
||||
* 8 | x | |
|
||||
* 16 | x | x |
|
||||
* 32 | x | x |
|
||||
*
|
||||
*
|
||||
* non-merge type DS blit table
|
||||
* **********************************************
|
||||
* * * S8 D16 D24S8 D32 D32S8 *
|
||||
* **********************************************
|
||||
* * S8 * cpy i i i i *
|
||||
* * D16 * i cpy i - i *
|
||||
* * D24S8 * swiz - cpy (1) - *
|
||||
* * D32 * i - i cpy i *
|
||||
* * D32S8 * (2) - - cpy cpy *
|
||||
* **********************************************
|
||||
*
|
||||
* merge with stencil pick type DS blit table
|
||||
* **********************************************
|
||||
* * * S8 D16 D24S8 D32 D32S8 *
|
||||
* **********************************************
|
||||
* * S8 * i i (1) i (2) *
|
||||
* * D16 * i i i i i *
|
||||
* * D24S8 * i i (3) i (4) *
|
||||
* * D32 * i i i i i *
|
||||
* * D32S8 * i i (5) i (6) *
|
||||
* **********************************************
|
||||
*
|
||||
* merge with depth pick type DS blit table
|
||||
* **********************************************
|
||||
* * * S8 D16 D24S8 D32 D32S8 *
|
||||
* **********************************************
|
||||
* * S8 * i i i i i *
|
||||
* * D16 * - - - - - *
|
||||
* * D24S8 * - - (s) - - *
|
||||
* * D32 * - - (1) - (2) *
|
||||
* * D32S8 * - - - - (s) *
|
||||
* **********************************************
|
||||
*
|
||||
* D formats are unpacked into a single register according to their format
|
||||
* S formats are unpacked into a single register in U8
|
||||
* D24S8 is in a single 32 bit register (as the PBE can't read it from
|
||||
* unpacked.)
|
||||
*
|
||||
* Swizzles are applied on the TPU not the PBE because of potential
|
||||
* accumulation i.e. a non-iterated shader doesn't know if it writes the output
|
||||
* buffer for PBE emit or a second pass blend.
|
||||
*/
|
||||
enum pvr_transfer_pbe_pixel_src {
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_UU8888 = 0,
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_US8888 = 1,
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_UU16U16 = 2,
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_US16S16 = 3,
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_SU8888 = 4,
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_SS8888 = 5,
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_SU16U16 = 6,
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_SS16S16 = 7,
|
||||
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_UU1010102 = 8,
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_SU1010102 = 9,
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_UU1010102 = 10,
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_SU1010102 = 11,
|
||||
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_SU32U32 = 12,
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_S4XU32 = 13,
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_US32S32 = 14,
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_U4XS32 = 15,
|
||||
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_F16F16 = 16,
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_U16NORM = 17,
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_S16NORM = 18,
|
||||
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_F32X4 = 19,
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_F32X2 = 20,
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_F32 = 21,
|
||||
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_RAW32 = 22,
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_RAW64 = 23,
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_RAW128 = 24,
|
||||
|
||||
/* f16 to U8 conversion in shader. */
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_F16_U8 = 25,
|
||||
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_SWAP_LMSB = 26,
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_MOV_BY45 = 27,
|
||||
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_D24S8 = 28,
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_S8D24 = 29,
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_D32S8 = 30,
|
||||
|
||||
/* D: D32_S8 */
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D32S8 = 31,
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D32S8 = 32,
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D32S8_D32S8 = 33,
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32S8_D32S8 = 34,
|
||||
|
||||
/* D: D32 */
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D24_D32 = 35,
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D32U_D32F = 36,
|
||||
|
||||
/* D : D24_S8 */
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D24S8 = 37,
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D24S8 = 38,
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D24S8_D24S8 = 39,
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D32_D24S8 = 40,
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32_D24S8 = 41,
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32U_D24S8 = 42,
|
||||
|
||||
/* ob0 holds Y and ob0 holds U or V. */
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_YUV_PACKED = 43,
|
||||
|
||||
/* ob0 holds Y, ob1 holds U, ob2 holds V. */
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_Y_U_V = 44,
|
||||
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_MASK16 = 45,
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_MASK32 = 46,
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_MASK48 = 47,
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_MASK64 = 48,
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_MASK96 = 49,
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_MASK128 = 50,
|
||||
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_CONV_S8D24_D24S8 = 51,
|
||||
|
||||
/* ob0 holds Y and ob0 holds V or U. */
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_YVU_PACKED = 52,
|
||||
|
||||
/* ob0 holds Y, ob1 holds UV interleaved. */
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_Y_UV_INTERLEAVED = 53,
|
||||
|
||||
/* FIXME: This changes for other BVNC's which may change the hashing logic
|
||||
* in pvr_hash_shader.
|
||||
*/
|
||||
PVR_TRANSFER_PBE_PIXEL_SRC_NUM = 54,
|
||||
};
|
||||
|
||||
const uint8_t *pvr_get_format_swizzle(VkFormat vk_format);
|
||||
uint32_t pvr_get_tex_format(VkFormat vk_format);
|
||||
uint32_t pvr_get_tex_format_aspect(VkFormat vk_format,
|
||||
@@ -70,4 +220,13 @@ void pvr_get_hw_clear_color(VkFormat vk_format,
|
||||
VkClearColorValue value,
|
||||
uint32_t packed_out[static const 4]);
|
||||
|
||||
/* TODO: alpha_type is of 'enum pvr_int_pbe_pixel_num_loads' type. See if we can
|
||||
* move that in here. It's currently in pvr_common.h and it doesn't seem
|
||||
* appropriate including that in here. Also moving the definition in here would
|
||||
* make pvr_common.h include this which would mean that the compiler would be
|
||||
* pulling in vulkan specific format stuff.
|
||||
*/
|
||||
uint32_t pvr_pbe_pixel_num_loads(enum pvr_transfer_pbe_pixel_src pbe_format,
|
||||
uint32_t alpha_type);
|
||||
|
||||
#endif /* PVR_FORMATS_H */
|
||||
|
@@ -24,9 +24,119 @@
|
||||
#ifndef PVR_USCGEN_H
|
||||
#define PVR_USCGEN_H
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "pvr_common.h"
|
||||
#include "pvr_formats.h"
|
||||
#include "util/u_dynarray.h"
|
||||
|
||||
#include <stdint.h>
|
||||
enum pvr_int_coord_set_floats {
|
||||
PVR_INT_COORD_SET_FLOATS_0 = 0,
|
||||
PVR_INT_COORD_SET_FLOATS_4 = 1,
|
||||
/* For rate changes to 0 base screen space. */
|
||||
PVR_INT_COORD_SET_FLOATS_6 = 2,
|
||||
PVR_INT_COORD_SET_FLOATS_NUM = 3
|
||||
};
|
||||
|
||||
struct pvr_tq_shader_properties {
|
||||
/* Controls whether this is an iterated shader. */
|
||||
bool iterated;
|
||||
|
||||
/* Controls whether this is meant to be running at full rate. */
|
||||
bool full_rate;
|
||||
|
||||
/* Sample specific channel of pixel. */
|
||||
bool pick_component;
|
||||
|
||||
/* Alpha type from transfer API. */
|
||||
uint32_t alpha_type;
|
||||
|
||||
struct pvr_tq_layer_properties {
|
||||
/* Controls whether we need to send the sample count to the TPU. */
|
||||
bool msaa;
|
||||
|
||||
/* In case we run pixel rate, to do an USC resolve - but still in MSAA TPU
|
||||
* samples.
|
||||
*/
|
||||
uint32_t sample_count;
|
||||
|
||||
enum pvr_resolve_op resolve_op;
|
||||
|
||||
/* Selects the pixel conversion that we have to perform. */
|
||||
enum pvr_transfer_pbe_pixel_src pbe_format;
|
||||
|
||||
/* Sampling from a 3D texture with a constant Z position. */
|
||||
bool sample;
|
||||
|
||||
/* Number of float coefficients to get from screen space to texture space.
|
||||
*/
|
||||
enum pvr_int_coord_set_floats layer_floats;
|
||||
|
||||
/* Unaligned texture address in bytes. */
|
||||
uint32_t byte_unwind;
|
||||
|
||||
/* Enable bilinear filter in shader. */
|
||||
bool linear;
|
||||
} layer_props;
|
||||
};
|
||||
|
||||
/* All offsets are in dwords. */
|
||||
/* Devices may have more than 256 sh regs but we're expecting to use vary few so
|
||||
* let's use uint8_t.
|
||||
*/
|
||||
struct pvr_tq_frag_sh_reg_layout {
|
||||
struct {
|
||||
/* How many image sampler descriptors are present. */
|
||||
uint8_t count;
|
||||
/* TODO: See if we ever need more than one combined image sampler
|
||||
* descriptor. If this is linked to the amount of layers used, we only
|
||||
* ever use one layer so this wouldn't need to be an array.
|
||||
*/
|
||||
struct {
|
||||
uint8_t image;
|
||||
uint8_t sampler;
|
||||
} offsets[PVR_TRANSFER_MAX_IMAGES];
|
||||
} combined_image_samplers;
|
||||
|
||||
/* TODO: Dynamic consts are used for various things so do this properly by
|
||||
* having an actual layout instead of chucking them all together using an
|
||||
* implicit layout.
|
||||
*/
|
||||
struct {
|
||||
/* How many dynamic consts regs have been allocated. */
|
||||
uint8_t count;
|
||||
uint8_t offset;
|
||||
} dynamic_consts;
|
||||
|
||||
/* Total sh regs allocated by the driver. It does not include the regs
|
||||
* necessary for compiler_out.
|
||||
*/
|
||||
uint8_t driver_total;
|
||||
|
||||
/* Provided by the compiler to the driver to be appended to the shareds. */
|
||||
/* No offset field since these will be appended at the end so driver_total
|
||||
* can be used instead.
|
||||
*/
|
||||
struct {
|
||||
struct {
|
||||
/* TODO: Remove this count and just use `compiler_out_total`? Or remove
|
||||
* that one and use this one?
|
||||
*/
|
||||
uint8_t count;
|
||||
/* TODO: The array size is chosen arbitrarily based on the max
|
||||
* constants currently produced by the compiler. Make this dynamic?
|
||||
*/
|
||||
/* Values to fill in into each shared reg used for usc constants. */
|
||||
uint32_t values[10];
|
||||
} usc_constants;
|
||||
} compiler_out;
|
||||
|
||||
/* Total extra sh regs needed by the compiler that need to be appended to the
|
||||
* shareds by the driver.
|
||||
*/
|
||||
uint8_t compiler_out_total;
|
||||
};
|
||||
|
||||
/* TODO: Shader caching (not pipeline caching) support. */
|
||||
|
||||
@@ -37,4 +147,9 @@ void pvr_uscgen_per_job_eot(uint32_t emit_count,
|
||||
|
||||
void pvr_uscgen_nop(struct util_dynarray *binary);
|
||||
|
||||
void pvr_uscgen_tq_frag(const struct pvr_tq_shader_properties *shader_props,
|
||||
struct pvr_tq_frag_sh_reg_layout *sh_reg_layout,
|
||||
unsigned *temps_used,
|
||||
struct util_dynarray *binary);
|
||||
|
||||
#endif /* PVR_USCGEN_H */
|
||||
|
210
src/imagination/vulkan/usc/pvr_uscgen_tq.c
Normal file
210
src/imagination/vulkan/usc/pvr_uscgen_tq.c
Normal file
@@ -0,0 +1,210 @@
|
||||
/*
|
||||
* Copyright © 2023 Imagination Technologies Ltd.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "pvr_uscgen.h"
|
||||
#include "rogue/rogue.h"
|
||||
#include "rogue/rogue_builder.h"
|
||||
#include "util/u_dynarray.h"
|
||||
|
||||
void pvr_uscgen_tq_frag(const struct pvr_tq_shader_properties *shader_props,
|
||||
struct pvr_tq_frag_sh_reg_layout *sh_reg_layout,
|
||||
unsigned *temps_used,
|
||||
struct util_dynarray *binary)
|
||||
{
|
||||
rogue_builder b;
|
||||
rogue_shader *shader = rogue_shader_create(NULL, MESA_SHADER_NONE);
|
||||
|
||||
unsigned smp_coord_size = 2;
|
||||
unsigned smp_coord_idx = 0;
|
||||
rogue_regarray *smp_coords;
|
||||
|
||||
unsigned channels = 0;
|
||||
unsigned output_idx = 1;
|
||||
rogue_regarray *outputs = NULL;
|
||||
|
||||
unsigned image_state_size = 4;
|
||||
unsigned image_state_idx;
|
||||
rogue_regarray *image_state;
|
||||
|
||||
unsigned smp_state_size = 4;
|
||||
unsigned smp_state_idx;
|
||||
rogue_regarray *smp_state;
|
||||
|
||||
rogue_set_shader_name(shader, "TQ (fragment)");
|
||||
rogue_builder_init(&b, shader);
|
||||
rogue_push_block(&b);
|
||||
|
||||
smp_coords =
|
||||
rogue_ssa_vec_regarray(b.shader, smp_coord_size, smp_coord_idx, 0);
|
||||
|
||||
/* TODO: Unrestrict. */
|
||||
assert(shader_props->full_rate == false);
|
||||
assert(shader_props->pick_component == false);
|
||||
assert(shader_props->alpha_type == 0);
|
||||
|
||||
const struct pvr_tq_layer_properties *layer_props =
|
||||
&shader_props->layer_props;
|
||||
uint32_t loads;
|
||||
|
||||
/* TODO: Unrestrict. */
|
||||
assert(layer_props->msaa == false);
|
||||
assert(layer_props->sample_count == 1U);
|
||||
assert(layer_props->resolve_op == PVR_RESOLVE_BLEND);
|
||||
assert(layer_props->pbe_format == PVR_TRANSFER_PBE_PIXEL_SRC_RAW64 ||
|
||||
layer_props->pbe_format == PVR_TRANSFER_PBE_PIXEL_SRC_RAW128);
|
||||
assert(layer_props->sample == false);
|
||||
assert(layer_props->layer_floats == PVR_INT_COORD_SET_FLOATS_0);
|
||||
assert(layer_props->byte_unwind == 0);
|
||||
assert(layer_props->linear == false);
|
||||
|
||||
loads = pvr_pbe_pixel_num_loads(layer_props->pbe_format,
|
||||
shader_props->alpha_type);
|
||||
for (uint32_t load = 0; load < loads; ++load) {
|
||||
if (shader_props->iterated) {
|
||||
/* TODO: feed{back,forward} the coeff index to/from shader_info. */
|
||||
unsigned coeff_index = 0;
|
||||
rogue_regarray *coeffs =
|
||||
rogue_coeff_regarray(b.shader, smp_coord_size * 4, coeff_index);
|
||||
|
||||
rogue_instr *instr = &rogue_FITR_PIXEL(&b,
|
||||
rogue_ref_regarray(smp_coords),
|
||||
rogue_ref_drc(0),
|
||||
rogue_ref_regarray(coeffs),
|
||||
rogue_ref_val(smp_coord_size))
|
||||
->instr;
|
||||
rogue_add_instr_comment(instr, "load_iterated");
|
||||
} else {
|
||||
rogue_instr *instr;
|
||||
rogue_regarray *smp_coord_x =
|
||||
rogue_ssa_vec_regarray(b.shader, 1, smp_coord_idx, 0);
|
||||
rogue_regarray *smp_coord_y =
|
||||
rogue_ssa_vec_regarray(b.shader, 1, smp_coord_idx, 1);
|
||||
|
||||
/* (X,Y).P, pixel (X,Y) coordinates, pixel mode. */
|
||||
rogue_reg *in_x = rogue_special_reg(b.shader, 97);
|
||||
rogue_reg *in_y = rogue_special_reg(b.shader, 100);
|
||||
|
||||
instr =
|
||||
&rogue_MOV(&b, rogue_ref_regarray(smp_coord_x), rogue_ref_reg(in_x))
|
||||
->instr;
|
||||
rogue_add_instr_comment(instr, "load_x");
|
||||
|
||||
instr =
|
||||
&rogue_MOV(&b, rogue_ref_regarray(smp_coord_y), rogue_ref_reg(in_y))
|
||||
->instr;
|
||||
rogue_add_instr_comment(instr, "load_y");
|
||||
}
|
||||
|
||||
if (!layer_props->msaa) {
|
||||
} else {
|
||||
unreachable("Unsupported layer property (MSAA).");
|
||||
}
|
||||
}
|
||||
|
||||
/* Source conversion. */
|
||||
switch (layer_props->pbe_format) {
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_RAW64:
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_RAW128:
|
||||
break;
|
||||
|
||||
default:
|
||||
unreachable("Unsupported layer property (format).");
|
||||
}
|
||||
|
||||
/* TODO: Select the texture_regs index appropriately. */
|
||||
assert(sh_reg_layout->combined_image_samplers.count == 1);
|
||||
image_state_idx = sh_reg_layout->combined_image_samplers.offsets[0].image;
|
||||
image_state =
|
||||
rogue_shared_regarray(b.shader, image_state_size, image_state_idx);
|
||||
|
||||
smp_state_idx = sh_reg_layout->combined_image_samplers.offsets[0].sampler;
|
||||
smp_state = rogue_shared_regarray(b.shader, smp_state_size, smp_state_idx);
|
||||
|
||||
/* Pack/blend phase. */
|
||||
rogue_backend_instr *smp2d;
|
||||
|
||||
switch (layer_props->pbe_format) {
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_RAW64:
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_RAW128: {
|
||||
switch (layer_props->pbe_format) {
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_RAW64:
|
||||
channels = 2;
|
||||
break;
|
||||
|
||||
case PVR_TRANSFER_PBE_PIXEL_SRC_RAW128:
|
||||
channels = 4;
|
||||
break;
|
||||
|
||||
default:
|
||||
unreachable("Unsupported layer property (format).");
|
||||
}
|
||||
|
||||
outputs = rogue_ssa_vec_regarray(b.shader, channels, output_idx, 0);
|
||||
|
||||
smp2d = rogue_SMP2D(&b,
|
||||
rogue_ref_regarray(outputs),
|
||||
rogue_ref_drc(0),
|
||||
rogue_ref_regarray(image_state),
|
||||
rogue_ref_regarray(smp_coords),
|
||||
rogue_ref_regarray(smp_state),
|
||||
rogue_ref_io(ROGUE_IO_NONE),
|
||||
rogue_ref_val(channels));
|
||||
rogue_set_backend_op_mod(smp2d, ROGUE_BACKEND_OP_MOD_SLCWRITEBACK);
|
||||
rogue_add_instr_comment(&smp2d->instr, "pack/blend");
|
||||
|
||||
if (!shader_props->iterated)
|
||||
rogue_set_backend_op_mod(smp2d, ROGUE_BACKEND_OP_MOD_NNCOORDS);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
unreachable("Unsupported layer property (format).");
|
||||
}
|
||||
|
||||
assert(channels && outputs);
|
||||
|
||||
/* Copy outputs. */
|
||||
for (unsigned u = 0; u < channels; ++u) {
|
||||
rogue_regarray *output_elem =
|
||||
rogue_ssa_vec_regarray(b.shader, 1, output_idx, u);
|
||||
rogue_reg *pixout_elem = rogue_pixout_reg(b.shader, u);
|
||||
rogue_MOV(&b,
|
||||
rogue_ref_reg(pixout_elem),
|
||||
rogue_ref_regarray(output_elem));
|
||||
}
|
||||
|
||||
rogue_END(&b);
|
||||
|
||||
rogue_shader_passes(shader);
|
||||
rogue_encode_shader(NULL, shader, binary);
|
||||
|
||||
*temps_used = rogue_count_used_regs(shader, ROGUE_REG_CLASS_TEMP);
|
||||
|
||||
sh_reg_layout->compiler_out.usc_constants.count = 0;
|
||||
sh_reg_layout->compiler_out_total = 0;
|
||||
|
||||
ralloc_free(shader);
|
||||
}
|
Reference in New Issue
Block a user