pvr: Add support for generating transfer fragment programs

Co-authored-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com>
Co-authored-by: Rajnesh Kanwal <rajnesh.kanwal@imgtec.com>
Signed-off-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com>
Signed-off-by: Rajnesh Kanwal <rajnesh.kanwal@imgtec.com>
Signed-off-by: Simon Perretta <simon.perretta@imgtec.com>
Acked-by: Frank Binns <frank.binns@imgtec.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21550>
This commit is contained in:
Simon Perretta
2023-02-11 22:34:05 +00:00
committed by Marge Bot
parent eeac8336ef
commit f0b47cfd65
12 changed files with 626 additions and 13 deletions

View File

@@ -370,6 +370,12 @@ rogue_reg *rogue_pixout_reg(rogue_shader *shader, unsigned index)
return rogue_reg_cached(shader, ROGUE_REG_CLASS_PIXOUT, index);
}
PUBLIC
rogue_reg *rogue_special_reg(rogue_shader *shader, unsigned index)
{
return rogue_reg_cached(shader, ROGUE_REG_CLASS_SPECIAL, index);
}
PUBLIC
rogue_reg *rogue_vtxin_reg(rogue_shader *shader, unsigned index)
{
@@ -585,6 +591,16 @@ rogue_coeff_regarray(rogue_shader *shader, unsigned size, unsigned start_index)
start_index);
}
PUBLIC
rogue_regarray *
rogue_shared_regarray(rogue_shader *shader, unsigned size, unsigned start_index)
{
return rogue_regarray_cached(shader,
size,
ROGUE_REG_CLASS_SHARED,
start_index);
}
PUBLIC
rogue_regarray *rogue_ssa_vec_regarray(rogue_shader *shader,
unsigned size,

View File

@@ -1887,6 +1887,8 @@ rogue_reg *rogue_const_reg(rogue_shader *shader, unsigned index);
rogue_reg *rogue_pixout_reg(rogue_shader *shader, unsigned index);
rogue_reg *rogue_special_reg(rogue_shader *shader, unsigned index);
rogue_reg *rogue_vtxin_reg(rogue_shader *shader, unsigned index);
rogue_reg *rogue_vtxout_reg(rogue_shader *shader, unsigned index);
@@ -1905,6 +1907,10 @@ rogue_temp_regarray(rogue_shader *shader, unsigned size, unsigned start_index);
rogue_regarray *
rogue_coeff_regarray(rogue_shader *shader, unsigned size, unsigned start_index);
rogue_regarray *rogue_shared_regarray(rogue_shader *shader,
unsigned size,
unsigned start_index);
rogue_regarray *rogue_ssa_vec_regarray(rogue_shader *shader,
unsigned size,
unsigned start_index,

View File

@@ -682,11 +682,9 @@ static inline void rogue_feedback_used_regs(rogue_build_ctx *ctx,
{
/* TODO NEXT: Use this counting method elsewhere as well. */
ctx->common_data[shader->stage].temps =
__bitset_count(shader->regs_used[ROGUE_REG_CLASS_TEMP],
BITSET_WORDS(rogue_reg_infos[ROGUE_REG_CLASS_TEMP].num));
ctx->common_data[shader->stage].internals = __bitset_count(
shader->regs_used[ROGUE_REG_CLASS_INTERNAL],
BITSET_WORDS(rogue_reg_infos[ROGUE_REG_CLASS_INTERNAL].num));
rogue_count_used_regs(shader, ROGUE_REG_CLASS_TEMP);
ctx->common_data[shader->stage].internals =
rogue_count_used_regs(shader, ROGUE_REG_CLASS_INTERNAL);
}
static bool ssa_def_cb(nir_ssa_def *ssa, void *state)

View File

@@ -1304,14 +1304,14 @@ static void rogue_encode_instr_group(rogue_instr_group *group,
}
PUBLIC
void rogue_encode_shader(UNUSED rogue_build_ctx *ctx,
void rogue_encode_shader(rogue_build_ctx *ctx,
rogue_shader *shader,
struct util_dynarray *binary)
{
if (!shader->is_grouped)
unreachable("Can't encode shader with ungrouped instructions.");
util_dynarray_init(binary, shader);
util_dynarray_init(binary, ctx);
rogue_foreach_instr_group_in_shader (group, shader)
rogue_encode_instr_group(group, binary);

View File

@@ -581,7 +581,7 @@ const rogue_alu_op_info rogue_alu_op_infos[ROGUE_ALU_OP_COUNT] = {
},
.supported_dst_types = { [0] = T(REG) | T(REGARRAY) | T(IO), },
.supported_src_types = {
[0] = T(REG),
[0] = T(REG) | T(REGARRAY),
},
},
[ROGUE_ALU_OP_FADD] = { .str = "fadd", .num_dsts = 1, .num_srcs = 2,
@@ -685,7 +685,7 @@ const rogue_alu_op_info rogue_alu_op_infos[ROGUE_ALU_OP_COUNT] = {
[ROGUE_ALU_OP_MOV] = { .str = "mov", .num_dsts = 1, .num_srcs = 1,
.supported_dst_types = { [0] = T(REG) | T(REGARRAY), },
.supported_src_types = {
[0] = T(REG) | T(IMM),
[0] = T(REG) | T(REGARRAY) | T(IMM),
},
},
[ROGUE_ALU_OP_CMOV] = { .str = "cmov", .num_dsts = 1, .num_srcs = 3,

View File

@@ -643,10 +643,15 @@ void rogue_print_shader(FILE *fp, const rogue_shader *shader)
{
fputs("/*", fp);
if (shader->name)
fprintf(fp, " \"%s\":", shader->name);
if (shader->stage == MESA_SHADER_NONE)
fputs(" USC program", fp);
else
fprintf(fp, " %s shader", _mesa_shader_stage_to_string(shader->stage));
fprintf(fp, " %s shader */\n", _mesa_shader_stage_to_string(shader->stage));
if (shader->name)
fprintf(fp, " - %s", shader->name);
fputs(" */\n", fp);
rogue_foreach_block (block, shader)
rogue_print_block(fp, block);

View File

@@ -67,6 +67,7 @@ pvr_files = files(
'pvr_wsi.c',
'usc/pvr_uscgen.c',
'usc/pvr_uscgen_tq.c',
)
pvr_includes = [

View File

@@ -57,6 +57,11 @@
#define PVR_PIPELINE_LAYOUT_SUPPORTED_DESCRIPTOR_TYPE_COUNT \
(uint32_t)(VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT + 1U)
#define PVR_TRANSFER_MAX_LAYERS 1U
#define PVR_TRANSFER_MAX_LOADS 4U
#define PVR_TRANSFER_MAX_IMAGES \
(PVR_TRANSFER_MAX_LAYERS * PVR_TRANSFER_MAX_LOADS)
/* TODO: move into a common surface library? */
enum pvr_memlayout {
PVR_MEMLAYOUT_UNDEFINED = 0, /* explicitly treat 0 as undefined */
@@ -141,6 +146,30 @@ enum pvr_stage_allocation {
PVR_STAGE_ALLOCATION_COUNT
};
enum pvr_resolve_op {
PVR_RESOLVE_BLEND,
PVR_RESOLVE_MIN,
PVR_RESOLVE_MAX,
PVR_RESOLVE_SAMPLE0,
PVR_RESOLVE_SAMPLE1,
PVR_RESOLVE_SAMPLE2,
PVR_RESOLVE_SAMPLE3,
PVR_RESOLVE_SAMPLE4,
PVR_RESOLVE_SAMPLE5,
PVR_RESOLVE_SAMPLE6,
PVR_RESOLVE_SAMPLE7,
};
enum pvr_alpha_type {
PVR_ALPHA_NONE,
PVR_ALPHA_SOURCE,
PVR_ALPHA_PREMUL_SOURCE,
PVR_ALPHA_GLOBAL,
PVR_ALPHA_PREMUL_SOURCE_WITH_GLOBAL,
PVR_ALPHA_CUSTOM,
PVR_ALPHA_AATEXT,
};
enum pvr_event_state {
PVR_EVENT_STATE_SET_BY_HOST,
PVR_EVENT_STATE_RESET_BY_HOST,

View File

@@ -28,6 +28,7 @@
#include <vulkan/vulkan.h>
#include "hwdef/rogue_hw_utils.h"
#include "pvr_common.h"
#include "pvr_formats.h"
#include "pvr_private.h"
#include "util/bitpack_helpers.h"
@@ -1031,3 +1032,76 @@ bool pvr_format_is_pbe_downscalable(VkFormat vk_format)
return false;
}
}
uint32_t pvr_pbe_pixel_num_loads(enum pvr_transfer_pbe_pixel_src pbe_format,
uint32_t alpha_type)
{
switch (alpha_type) {
default:
case PVR_ALPHA_NONE:
break;
case PVR_ALPHA_SOURCE:
case PVR_ALPHA_PREMUL_SOURCE:
case PVR_ALPHA_PREMUL_SOURCE_WITH_GLOBAL:
case PVR_ALPHA_GLOBAL:
return 2U;
}
switch (pbe_format) {
case PVR_TRANSFER_PBE_PIXEL_SRC_UU8888:
case PVR_TRANSFER_PBE_PIXEL_SRC_US8888:
case PVR_TRANSFER_PBE_PIXEL_SRC_UU16U16:
case PVR_TRANSFER_PBE_PIXEL_SRC_US16S16:
case PVR_TRANSFER_PBE_PIXEL_SRC_SU8888:
case PVR_TRANSFER_PBE_PIXEL_SRC_SS8888:
case PVR_TRANSFER_PBE_PIXEL_SRC_SU16U16:
case PVR_TRANSFER_PBE_PIXEL_SRC_SS16S16:
case PVR_TRANSFER_PBE_PIXEL_SRC_UU1010102:
case PVR_TRANSFER_PBE_PIXEL_SRC_SU1010102:
case PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_UU1010102:
case PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_SU1010102:
case PVR_TRANSFER_PBE_PIXEL_SRC_SU32U32:
case PVR_TRANSFER_PBE_PIXEL_SRC_S4XU32:
case PVR_TRANSFER_PBE_PIXEL_SRC_US32S32:
case PVR_TRANSFER_PBE_PIXEL_SRC_U4XS32:
case PVR_TRANSFER_PBE_PIXEL_SRC_F16F16:
case PVR_TRANSFER_PBE_PIXEL_SRC_U16NORM:
case PVR_TRANSFER_PBE_PIXEL_SRC_S16NORM:
case PVR_TRANSFER_PBE_PIXEL_SRC_F32:
case PVR_TRANSFER_PBE_PIXEL_SRC_F32X2:
case PVR_TRANSFER_PBE_PIXEL_SRC_F32X4:
case PVR_TRANSFER_PBE_PIXEL_SRC_RAW32:
case PVR_TRANSFER_PBE_PIXEL_SRC_RAW64:
case PVR_TRANSFER_PBE_PIXEL_SRC_RAW128:
case PVR_TRANSFER_PBE_PIXEL_SRC_F16_U8:
case PVR_TRANSFER_PBE_PIXEL_SRC_SWAP_LMSB:
case PVR_TRANSFER_PBE_PIXEL_SRC_MOV_BY45:
case PVR_TRANSFER_PBE_PIXEL_SRC_D24S8:
case PVR_TRANSFER_PBE_PIXEL_SRC_S8D24:
case PVR_TRANSFER_PBE_PIXEL_SRC_D32S8:
case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D24_D32:
case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D32U_D32F:
case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D32_D24S8:
case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_S8D24_D24S8:
case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D32S8:
case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D32S8:
case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D32S8_D32S8:
case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32S8_D32S8:
case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D24S8:
case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D24S8:
case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D24S8_D24S8:
case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32_D24S8:
case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32U_D24S8:
case PVR_TRANSFER_PBE_PIXEL_SRC_Y_UV_INTERLEAVED:
case PVR_TRANSFER_PBE_PIXEL_SRC_Y_U_V:
case PVR_TRANSFER_PBE_PIXEL_SRC_YUV_PACKED:
case PVR_TRANSFER_PBE_PIXEL_SRC_YVU_PACKED:
return 1U;
case PVR_TRANSFER_PBE_PIXEL_SRC_NUM:
default:
return 0U;
}
}

View File

@@ -57,6 +57,156 @@ enum pvr_pbe_accum_format {
PVR_PBE_ACCUM_FORMAT_U24,
};
/**
* Pixel related shader selector. The logic selecting the shader has to take
* into account the pixel related properties (controlling the conversion path in
* the shader) and the geometry related properties (controlling the sample
* position calcs). These two can be orthogonal.
*
* integer format conversions, bit depth : 8, 16, 32 per ch formats : signed,
* unsigned. Strategy: convert everything to U32 or S32 then USC pack. PBE just
* pass through.
*
* fixed point format conversions, bit depth 565, 1555, 555 etc. Strategy:
* fcnorm to 4 F32, then USC pack to F16F16. PBE converts to destination
*
* float/fixed format conversions
* strategy: fcnorm, then pack to f16 _when_ destination is not f32.
* fmt | unorm | flt |
* 8 | x | |
* 16 | x | x |
* 32 | x | x |
*
*
* non-merge type DS blit table
* **********************************************
* * * S8 D16 D24S8 D32 D32S8 *
* **********************************************
* * S8 * cpy i i i i *
* * D16 * i cpy i - i *
* * D24S8 * swiz - cpy (1) - *
* * D32 * i - i cpy i *
* * D32S8 * (2) - - cpy cpy *
* **********************************************
*
* merge with stencil pick type DS blit table
* **********************************************
* * * S8 D16 D24S8 D32 D32S8 *
* **********************************************
* * S8 * i i (1) i (2) *
* * D16 * i i i i i *
* * D24S8 * i i (3) i (4) *
* * D32 * i i i i i *
* * D32S8 * i i (5) i (6) *
* **********************************************
*
* merge with depth pick type DS blit table
* **********************************************
* * * S8 D16 D24S8 D32 D32S8 *
* **********************************************
* * S8 * i i i i i *
* * D16 * - - - - - *
* * D24S8 * - - (s) - - *
* * D32 * - - (1) - (2) *
* * D32S8 * - - - - (s) *
* **********************************************
*
* D formats are unpacked into a single register according to their format
* S formats are unpacked into a single register in U8
* D24S8 is in a single 32 bit register (as the PBE can't read it from
* unpacked.)
*
* Swizzles are applied on the TPU not the PBE because of potential
* accumulation i.e. a non-iterated shader doesn't know if it writes the output
* buffer for PBE emit or a second pass blend.
*/
enum pvr_transfer_pbe_pixel_src {
PVR_TRANSFER_PBE_PIXEL_SRC_UU8888 = 0,
PVR_TRANSFER_PBE_PIXEL_SRC_US8888 = 1,
PVR_TRANSFER_PBE_PIXEL_SRC_UU16U16 = 2,
PVR_TRANSFER_PBE_PIXEL_SRC_US16S16 = 3,
PVR_TRANSFER_PBE_PIXEL_SRC_SU8888 = 4,
PVR_TRANSFER_PBE_PIXEL_SRC_SS8888 = 5,
PVR_TRANSFER_PBE_PIXEL_SRC_SU16U16 = 6,
PVR_TRANSFER_PBE_PIXEL_SRC_SS16S16 = 7,
PVR_TRANSFER_PBE_PIXEL_SRC_UU1010102 = 8,
PVR_TRANSFER_PBE_PIXEL_SRC_SU1010102 = 9,
PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_UU1010102 = 10,
PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_SU1010102 = 11,
PVR_TRANSFER_PBE_PIXEL_SRC_SU32U32 = 12,
PVR_TRANSFER_PBE_PIXEL_SRC_S4XU32 = 13,
PVR_TRANSFER_PBE_PIXEL_SRC_US32S32 = 14,
PVR_TRANSFER_PBE_PIXEL_SRC_U4XS32 = 15,
PVR_TRANSFER_PBE_PIXEL_SRC_F16F16 = 16,
PVR_TRANSFER_PBE_PIXEL_SRC_U16NORM = 17,
PVR_TRANSFER_PBE_PIXEL_SRC_S16NORM = 18,
PVR_TRANSFER_PBE_PIXEL_SRC_F32X4 = 19,
PVR_TRANSFER_PBE_PIXEL_SRC_F32X2 = 20,
PVR_TRANSFER_PBE_PIXEL_SRC_F32 = 21,
PVR_TRANSFER_PBE_PIXEL_SRC_RAW32 = 22,
PVR_TRANSFER_PBE_PIXEL_SRC_RAW64 = 23,
PVR_TRANSFER_PBE_PIXEL_SRC_RAW128 = 24,
/* f16 to U8 conversion in shader. */
PVR_TRANSFER_PBE_PIXEL_SRC_F16_U8 = 25,
PVR_TRANSFER_PBE_PIXEL_SRC_SWAP_LMSB = 26,
PVR_TRANSFER_PBE_PIXEL_SRC_MOV_BY45 = 27,
PVR_TRANSFER_PBE_PIXEL_SRC_D24S8 = 28,
PVR_TRANSFER_PBE_PIXEL_SRC_S8D24 = 29,
PVR_TRANSFER_PBE_PIXEL_SRC_D32S8 = 30,
/* D: D32_S8 */
PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D32S8 = 31,
PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D32S8 = 32,
PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D32S8_D32S8 = 33,
PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32S8_D32S8 = 34,
/* D: D32 */
PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D24_D32 = 35,
PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D32U_D32F = 36,
/* D : D24_S8 */
PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D24S8 = 37,
PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D24S8 = 38,
PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D24S8_D24S8 = 39,
PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D32_D24S8 = 40,
PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32_D24S8 = 41,
PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32U_D24S8 = 42,
/* ob0 holds Y and ob0 holds U or V. */
PVR_TRANSFER_PBE_PIXEL_SRC_YUV_PACKED = 43,
/* ob0 holds Y, ob1 holds U, ob2 holds V. */
PVR_TRANSFER_PBE_PIXEL_SRC_Y_U_V = 44,
PVR_TRANSFER_PBE_PIXEL_SRC_MASK16 = 45,
PVR_TRANSFER_PBE_PIXEL_SRC_MASK32 = 46,
PVR_TRANSFER_PBE_PIXEL_SRC_MASK48 = 47,
PVR_TRANSFER_PBE_PIXEL_SRC_MASK64 = 48,
PVR_TRANSFER_PBE_PIXEL_SRC_MASK96 = 49,
PVR_TRANSFER_PBE_PIXEL_SRC_MASK128 = 50,
PVR_TRANSFER_PBE_PIXEL_SRC_CONV_S8D24_D24S8 = 51,
/* ob0 holds Y and ob0 holds V or U. */
PVR_TRANSFER_PBE_PIXEL_SRC_YVU_PACKED = 52,
/* ob0 holds Y, ob1 holds UV interleaved. */
PVR_TRANSFER_PBE_PIXEL_SRC_Y_UV_INTERLEAVED = 53,
/* FIXME: This changes for other BVNC's which may change the hashing logic
* in pvr_hash_shader.
*/
PVR_TRANSFER_PBE_PIXEL_SRC_NUM = 54,
};
const uint8_t *pvr_get_format_swizzle(VkFormat vk_format);
uint32_t pvr_get_tex_format(VkFormat vk_format);
uint32_t pvr_get_tex_format_aspect(VkFormat vk_format,
@@ -70,4 +220,13 @@ void pvr_get_hw_clear_color(VkFormat vk_format,
VkClearColorValue value,
uint32_t packed_out[static const 4]);
/* TODO: alpha_type is of 'enum pvr_int_pbe_pixel_num_loads' type. See if we can
* move that in here. It's currently in pvr_common.h and it doesn't seem
* appropriate including that in here. Also moving the definition in here would
* make pvr_common.h include this which would mean that the compiler would be
* pulling in vulkan specific format stuff.
*/
uint32_t pvr_pbe_pixel_num_loads(enum pvr_transfer_pbe_pixel_src pbe_format,
uint32_t alpha_type);
#endif /* PVR_FORMATS_H */

View File

@@ -24,9 +24,119 @@
#ifndef PVR_USCGEN_H
#define PVR_USCGEN_H
#include <stdbool.h>
#include <stdint.h>
#include "pvr_common.h"
#include "pvr_formats.h"
#include "util/u_dynarray.h"
#include <stdint.h>
enum pvr_int_coord_set_floats {
PVR_INT_COORD_SET_FLOATS_0 = 0,
PVR_INT_COORD_SET_FLOATS_4 = 1,
/* For rate changes to 0 base screen space. */
PVR_INT_COORD_SET_FLOATS_6 = 2,
PVR_INT_COORD_SET_FLOATS_NUM = 3
};
struct pvr_tq_shader_properties {
/* Controls whether this is an iterated shader. */
bool iterated;
/* Controls whether this is meant to be running at full rate. */
bool full_rate;
/* Sample specific channel of pixel. */
bool pick_component;
/* Alpha type from transfer API. */
uint32_t alpha_type;
struct pvr_tq_layer_properties {
/* Controls whether we need to send the sample count to the TPU. */
bool msaa;
/* In case we run pixel rate, to do an USC resolve - but still in MSAA TPU
* samples.
*/
uint32_t sample_count;
enum pvr_resolve_op resolve_op;
/* Selects the pixel conversion that we have to perform. */
enum pvr_transfer_pbe_pixel_src pbe_format;
/* Sampling from a 3D texture with a constant Z position. */
bool sample;
/* Number of float coefficients to get from screen space to texture space.
*/
enum pvr_int_coord_set_floats layer_floats;
/* Unaligned texture address in bytes. */
uint32_t byte_unwind;
/* Enable bilinear filter in shader. */
bool linear;
} layer_props;
};
/* All offsets are in dwords. */
/* Devices may have more than 256 sh regs but we're expecting to use vary few so
* let's use uint8_t.
*/
struct pvr_tq_frag_sh_reg_layout {
struct {
/* How many image sampler descriptors are present. */
uint8_t count;
/* TODO: See if we ever need more than one combined image sampler
* descriptor. If this is linked to the amount of layers used, we only
* ever use one layer so this wouldn't need to be an array.
*/
struct {
uint8_t image;
uint8_t sampler;
} offsets[PVR_TRANSFER_MAX_IMAGES];
} combined_image_samplers;
/* TODO: Dynamic consts are used for various things so do this properly by
* having an actual layout instead of chucking them all together using an
* implicit layout.
*/
struct {
/* How many dynamic consts regs have been allocated. */
uint8_t count;
uint8_t offset;
} dynamic_consts;
/* Total sh regs allocated by the driver. It does not include the regs
* necessary for compiler_out.
*/
uint8_t driver_total;
/* Provided by the compiler to the driver to be appended to the shareds. */
/* No offset field since these will be appended at the end so driver_total
* can be used instead.
*/
struct {
struct {
/* TODO: Remove this count and just use `compiler_out_total`? Or remove
* that one and use this one?
*/
uint8_t count;
/* TODO: The array size is chosen arbitrarily based on the max
* constants currently produced by the compiler. Make this dynamic?
*/
/* Values to fill in into each shared reg used for usc constants. */
uint32_t values[10];
} usc_constants;
} compiler_out;
/* Total extra sh regs needed by the compiler that need to be appended to the
* shareds by the driver.
*/
uint8_t compiler_out_total;
};
/* TODO: Shader caching (not pipeline caching) support. */
@@ -37,4 +147,9 @@ void pvr_uscgen_per_job_eot(uint32_t emit_count,
void pvr_uscgen_nop(struct util_dynarray *binary);
void pvr_uscgen_tq_frag(const struct pvr_tq_shader_properties *shader_props,
struct pvr_tq_frag_sh_reg_layout *sh_reg_layout,
unsigned *temps_used,
struct util_dynarray *binary);
#endif /* PVR_USCGEN_H */

View File

@@ -0,0 +1,210 @@
/*
* Copyright © 2023 Imagination Technologies Ltd.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <assert.h>
#include <stdint.h>
#include "pvr_uscgen.h"
#include "rogue/rogue.h"
#include "rogue/rogue_builder.h"
#include "util/u_dynarray.h"
void pvr_uscgen_tq_frag(const struct pvr_tq_shader_properties *shader_props,
struct pvr_tq_frag_sh_reg_layout *sh_reg_layout,
unsigned *temps_used,
struct util_dynarray *binary)
{
rogue_builder b;
rogue_shader *shader = rogue_shader_create(NULL, MESA_SHADER_NONE);
unsigned smp_coord_size = 2;
unsigned smp_coord_idx = 0;
rogue_regarray *smp_coords;
unsigned channels = 0;
unsigned output_idx = 1;
rogue_regarray *outputs = NULL;
unsigned image_state_size = 4;
unsigned image_state_idx;
rogue_regarray *image_state;
unsigned smp_state_size = 4;
unsigned smp_state_idx;
rogue_regarray *smp_state;
rogue_set_shader_name(shader, "TQ (fragment)");
rogue_builder_init(&b, shader);
rogue_push_block(&b);
smp_coords =
rogue_ssa_vec_regarray(b.shader, smp_coord_size, smp_coord_idx, 0);
/* TODO: Unrestrict. */
assert(shader_props->full_rate == false);
assert(shader_props->pick_component == false);
assert(shader_props->alpha_type == 0);
const struct pvr_tq_layer_properties *layer_props =
&shader_props->layer_props;
uint32_t loads;
/* TODO: Unrestrict. */
assert(layer_props->msaa == false);
assert(layer_props->sample_count == 1U);
assert(layer_props->resolve_op == PVR_RESOLVE_BLEND);
assert(layer_props->pbe_format == PVR_TRANSFER_PBE_PIXEL_SRC_RAW64 ||
layer_props->pbe_format == PVR_TRANSFER_PBE_PIXEL_SRC_RAW128);
assert(layer_props->sample == false);
assert(layer_props->layer_floats == PVR_INT_COORD_SET_FLOATS_0);
assert(layer_props->byte_unwind == 0);
assert(layer_props->linear == false);
loads = pvr_pbe_pixel_num_loads(layer_props->pbe_format,
shader_props->alpha_type);
for (uint32_t load = 0; load < loads; ++load) {
if (shader_props->iterated) {
/* TODO: feed{back,forward} the coeff index to/from shader_info. */
unsigned coeff_index = 0;
rogue_regarray *coeffs =
rogue_coeff_regarray(b.shader, smp_coord_size * 4, coeff_index);
rogue_instr *instr = &rogue_FITR_PIXEL(&b,
rogue_ref_regarray(smp_coords),
rogue_ref_drc(0),
rogue_ref_regarray(coeffs),
rogue_ref_val(smp_coord_size))
->instr;
rogue_add_instr_comment(instr, "load_iterated");
} else {
rogue_instr *instr;
rogue_regarray *smp_coord_x =
rogue_ssa_vec_regarray(b.shader, 1, smp_coord_idx, 0);
rogue_regarray *smp_coord_y =
rogue_ssa_vec_regarray(b.shader, 1, smp_coord_idx, 1);
/* (X,Y).P, pixel (X,Y) coordinates, pixel mode. */
rogue_reg *in_x = rogue_special_reg(b.shader, 97);
rogue_reg *in_y = rogue_special_reg(b.shader, 100);
instr =
&rogue_MOV(&b, rogue_ref_regarray(smp_coord_x), rogue_ref_reg(in_x))
->instr;
rogue_add_instr_comment(instr, "load_x");
instr =
&rogue_MOV(&b, rogue_ref_regarray(smp_coord_y), rogue_ref_reg(in_y))
->instr;
rogue_add_instr_comment(instr, "load_y");
}
if (!layer_props->msaa) {
} else {
unreachable("Unsupported layer property (MSAA).");
}
}
/* Source conversion. */
switch (layer_props->pbe_format) {
case PVR_TRANSFER_PBE_PIXEL_SRC_RAW64:
case PVR_TRANSFER_PBE_PIXEL_SRC_RAW128:
break;
default:
unreachable("Unsupported layer property (format).");
}
/* TODO: Select the texture_regs index appropriately. */
assert(sh_reg_layout->combined_image_samplers.count == 1);
image_state_idx = sh_reg_layout->combined_image_samplers.offsets[0].image;
image_state =
rogue_shared_regarray(b.shader, image_state_size, image_state_idx);
smp_state_idx = sh_reg_layout->combined_image_samplers.offsets[0].sampler;
smp_state = rogue_shared_regarray(b.shader, smp_state_size, smp_state_idx);
/* Pack/blend phase. */
rogue_backend_instr *smp2d;
switch (layer_props->pbe_format) {
case PVR_TRANSFER_PBE_PIXEL_SRC_RAW64:
case PVR_TRANSFER_PBE_PIXEL_SRC_RAW128: {
switch (layer_props->pbe_format) {
case PVR_TRANSFER_PBE_PIXEL_SRC_RAW64:
channels = 2;
break;
case PVR_TRANSFER_PBE_PIXEL_SRC_RAW128:
channels = 4;
break;
default:
unreachable("Unsupported layer property (format).");
}
outputs = rogue_ssa_vec_regarray(b.shader, channels, output_idx, 0);
smp2d = rogue_SMP2D(&b,
rogue_ref_regarray(outputs),
rogue_ref_drc(0),
rogue_ref_regarray(image_state),
rogue_ref_regarray(smp_coords),
rogue_ref_regarray(smp_state),
rogue_ref_io(ROGUE_IO_NONE),
rogue_ref_val(channels));
rogue_set_backend_op_mod(smp2d, ROGUE_BACKEND_OP_MOD_SLCWRITEBACK);
rogue_add_instr_comment(&smp2d->instr, "pack/blend");
if (!shader_props->iterated)
rogue_set_backend_op_mod(smp2d, ROGUE_BACKEND_OP_MOD_NNCOORDS);
break;
}
default:
unreachable("Unsupported layer property (format).");
}
assert(channels && outputs);
/* Copy outputs. */
for (unsigned u = 0; u < channels; ++u) {
rogue_regarray *output_elem =
rogue_ssa_vec_regarray(b.shader, 1, output_idx, u);
rogue_reg *pixout_elem = rogue_pixout_reg(b.shader, u);
rogue_MOV(&b,
rogue_ref_reg(pixout_elem),
rogue_ref_regarray(output_elem));
}
rogue_END(&b);
rogue_shader_passes(shader);
rogue_encode_shader(NULL, shader, binary);
*temps_used = rogue_count_used_regs(shader, ROGUE_REG_CLASS_TEMP);
sh_reg_layout->compiler_out.usc_constants.count = 0;
sh_reg_layout->compiler_out_total = 0;
ralloc_free(shader);
}