intel/compiler: Add support for bindless shaders
The Intel bindless thread dispatch model is very simple. When a compute shader is to be used for bindless dispatch, it can request a set of stack IDs. These are allocated per-dual-subslice by the hardware and recycled automatically when the stack ID is returned. Passed to the bindless dispatch are a global argument address, a stack ID, and an address of the BINDLESS_SHADER_RECORD to invoke. When the bindless shader is dispatched, it is passed its stack ID as well as the global and local argument pointers. The local argument pointer is the address of the BINDLESS_SHADER_RECORD plus some offset which is specified as part of the BINDLESS_SHADER_RECORD. Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7356>
This commit is contained in:

committed by
Marge Bot

parent
27f44116fe
commit
7280b0911d
@@ -1081,3 +1081,12 @@ store("ssbo_block_intel", [-1, 1], [WRMASK, ACCESS, ALIGN_MUL, ALIGN_OFFSET])
|
|||||||
# src[] = { value, offset }.
|
# src[] = { value, offset }.
|
||||||
store("shared_block_intel", [1], [BASE, WRMASK, ALIGN_MUL, ALIGN_OFFSET])
|
store("shared_block_intel", [1], [BASE, WRMASK, ALIGN_MUL, ALIGN_OFFSET])
|
||||||
|
|
||||||
|
# Intrinsics for Intel bindless thread dispatch
|
||||||
|
system_value("btd_dss_id_intel", 1)
|
||||||
|
system_value("btd_stack_id_intel", 1)
|
||||||
|
system_value("btd_global_arg_addr_intel", 1, bit_sizes=[64])
|
||||||
|
system_value("btd_local_arg_addr_intel", 1, bit_sizes=[64])
|
||||||
|
# src[] = { global_arg_addr, btd_record }
|
||||||
|
intrinsic("btd_spawn_intel", src_comp=[1, 1])
|
||||||
|
# src[] = { }
|
||||||
|
intrinsic("btd_retire_intel")
|
||||||
|
@@ -118,6 +118,9 @@ brw_compiler_create(void *mem_ctx, const struct gen_device_info *devinfo)
|
|||||||
i == MESA_SHADER_FRAGMENT || i == MESA_SHADER_COMPUTE;
|
i == MESA_SHADER_FRAGMENT || i == MESA_SHADER_COMPUTE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (int i = MESA_SHADER_TASK; i < MESA_VULKAN_SHADER_STAGES; i++)
|
||||||
|
compiler->scalar_stage[i] = true;
|
||||||
|
|
||||||
nir_lower_int64_options int64_options =
|
nir_lower_int64_options int64_options =
|
||||||
nir_lower_imul64 |
|
nir_lower_imul64 |
|
||||||
nir_lower_isign64 |
|
nir_lower_isign64 |
|
||||||
@@ -236,6 +239,12 @@ brw_prog_data_size(gl_shader_stage stage)
|
|||||||
[MESA_SHADER_GEOMETRY] = sizeof(struct brw_gs_prog_data),
|
[MESA_SHADER_GEOMETRY] = sizeof(struct brw_gs_prog_data),
|
||||||
[MESA_SHADER_FRAGMENT] = sizeof(struct brw_wm_prog_data),
|
[MESA_SHADER_FRAGMENT] = sizeof(struct brw_wm_prog_data),
|
||||||
[MESA_SHADER_COMPUTE] = sizeof(struct brw_cs_prog_data),
|
[MESA_SHADER_COMPUTE] = sizeof(struct brw_cs_prog_data),
|
||||||
|
[MESA_SHADER_RAYGEN] = sizeof(struct brw_bs_prog_data),
|
||||||
|
[MESA_SHADER_ANY_HIT] = sizeof(struct brw_bs_prog_data),
|
||||||
|
[MESA_SHADER_CLOSEST_HIT] = sizeof(struct brw_bs_prog_data),
|
||||||
|
[MESA_SHADER_MISS] = sizeof(struct brw_bs_prog_data),
|
||||||
|
[MESA_SHADER_INTERSECTION] = sizeof(struct brw_bs_prog_data),
|
||||||
|
[MESA_SHADER_CALLABLE] = sizeof(struct brw_bs_prog_data),
|
||||||
[MESA_SHADER_KERNEL] = sizeof(struct brw_cs_prog_data),
|
[MESA_SHADER_KERNEL] = sizeof(struct brw_cs_prog_data),
|
||||||
};
|
};
|
||||||
assert((int)stage >= 0 && stage < ARRAY_SIZE(stage_sizes));
|
assert((int)stage >= 0 && stage < ARRAY_SIZE(stage_sizes));
|
||||||
@@ -252,6 +261,12 @@ brw_prog_key_size(gl_shader_stage stage)
|
|||||||
[MESA_SHADER_GEOMETRY] = sizeof(struct brw_gs_prog_key),
|
[MESA_SHADER_GEOMETRY] = sizeof(struct brw_gs_prog_key),
|
||||||
[MESA_SHADER_FRAGMENT] = sizeof(struct brw_wm_prog_key),
|
[MESA_SHADER_FRAGMENT] = sizeof(struct brw_wm_prog_key),
|
||||||
[MESA_SHADER_COMPUTE] = sizeof(struct brw_cs_prog_key),
|
[MESA_SHADER_COMPUTE] = sizeof(struct brw_cs_prog_key),
|
||||||
|
[MESA_SHADER_RAYGEN] = sizeof(struct brw_bs_prog_key),
|
||||||
|
[MESA_SHADER_ANY_HIT] = sizeof(struct brw_bs_prog_key),
|
||||||
|
[MESA_SHADER_CLOSEST_HIT] = sizeof(struct brw_bs_prog_key),
|
||||||
|
[MESA_SHADER_MISS] = sizeof(struct brw_bs_prog_key),
|
||||||
|
[MESA_SHADER_INTERSECTION] = sizeof(struct brw_bs_prog_key),
|
||||||
|
[MESA_SHADER_CALLABLE] = sizeof(struct brw_bs_prog_key),
|
||||||
[MESA_SHADER_KERNEL] = sizeof(struct brw_cs_prog_key),
|
[MESA_SHADER_KERNEL] = sizeof(struct brw_cs_prog_key),
|
||||||
};
|
};
|
||||||
assert((int)stage >= 0 && stage < ARRAY_SIZE(stage_sizes));
|
assert((int)stage >= 0 && stage < ARRAY_SIZE(stage_sizes));
|
||||||
|
@@ -152,6 +152,13 @@ struct brw_compiler {
|
|||||||
*/
|
*/
|
||||||
#define BRW_SUBGROUP_SIZE 32
|
#define BRW_SUBGROUP_SIZE 32
|
||||||
|
|
||||||
|
static inline bool
|
||||||
|
brw_shader_stage_is_bindless(gl_shader_stage stage)
|
||||||
|
{
|
||||||
|
return stage >= MESA_SHADER_RAYGEN &&
|
||||||
|
stage <= MESA_SHADER_CALLABLE;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Program key structures.
|
* Program key structures.
|
||||||
*
|
*
|
||||||
@@ -481,6 +488,10 @@ struct brw_cs_prog_key {
|
|||||||
struct brw_base_prog_key base;
|
struct brw_base_prog_key base;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct brw_bs_prog_key {
|
||||||
|
struct brw_base_prog_key base;
|
||||||
|
};
|
||||||
|
|
||||||
/* brw_any_prog_key is any of the keys that map to an API stage */
|
/* brw_any_prog_key is any of the keys that map to an API stage */
|
||||||
union brw_any_prog_key {
|
union brw_any_prog_key {
|
||||||
struct brw_base_prog_key base;
|
struct brw_base_prog_key base;
|
||||||
@@ -490,6 +501,7 @@ union brw_any_prog_key {
|
|||||||
struct brw_gs_prog_key gs;
|
struct brw_gs_prog_key gs;
|
||||||
struct brw_wm_prog_key wm;
|
struct brw_wm_prog_key wm;
|
||||||
struct brw_cs_prog_key cs;
|
struct brw_cs_prog_key cs;
|
||||||
|
struct brw_bs_prog_key bs;
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -986,6 +998,7 @@ struct brw_cs_prog_data {
|
|||||||
|
|
||||||
bool uses_barrier;
|
bool uses_barrier;
|
||||||
bool uses_num_work_groups;
|
bool uses_num_work_groups;
|
||||||
|
bool uses_btd_stack_ids;
|
||||||
|
|
||||||
struct {
|
struct {
|
||||||
struct brw_push_const_block cross_thread;
|
struct brw_push_const_block cross_thread;
|
||||||
@@ -1013,6 +1026,12 @@ brw_cs_prog_data_prog_offset(const struct brw_cs_prog_data *prog_data,
|
|||||||
return prog_data->prog_offset[index];
|
return prog_data->prog_offset[index];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct brw_bs_prog_data {
|
||||||
|
struct brw_stage_prog_data base;
|
||||||
|
uint8_t simd_size;
|
||||||
|
uint32_t stack_size;
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Enum representing the i965-specific vertex results that don't correspond
|
* Enum representing the i965-specific vertex results that don't correspond
|
||||||
* exactly to any element of gl_varying_slot. The values of this enum are
|
* exactly to any element of gl_varying_slot. The values of this enum are
|
||||||
@@ -1340,6 +1359,7 @@ union brw_any_prog_data {
|
|||||||
struct brw_gs_prog_data gs;
|
struct brw_gs_prog_data gs;
|
||||||
struct brw_wm_prog_data wm;
|
struct brw_wm_prog_data wm;
|
||||||
struct brw_cs_prog_data cs;
|
struct brw_cs_prog_data cs;
|
||||||
|
struct brw_bs_prog_data bs;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define DEFINE_PROG_DATA_DOWNCAST(STAGE, CHECK) \
|
#define DEFINE_PROG_DATA_DOWNCAST(STAGE, CHECK) \
|
||||||
@@ -1364,6 +1384,7 @@ DEFINE_PROG_DATA_DOWNCAST(tes, prog_data->stage == MESA_SHADER_TESS_EVAL)
|
|||||||
DEFINE_PROG_DATA_DOWNCAST(gs, prog_data->stage == MESA_SHADER_GEOMETRY)
|
DEFINE_PROG_DATA_DOWNCAST(gs, prog_data->stage == MESA_SHADER_GEOMETRY)
|
||||||
DEFINE_PROG_DATA_DOWNCAST(wm, prog_data->stage == MESA_SHADER_FRAGMENT)
|
DEFINE_PROG_DATA_DOWNCAST(wm, prog_data->stage == MESA_SHADER_FRAGMENT)
|
||||||
DEFINE_PROG_DATA_DOWNCAST(cs, prog_data->stage == MESA_SHADER_COMPUTE)
|
DEFINE_PROG_DATA_DOWNCAST(cs, prog_data->stage == MESA_SHADER_COMPUTE)
|
||||||
|
DEFINE_PROG_DATA_DOWNCAST(bs, brw_shader_stage_is_bindless(prog_data->stage))
|
||||||
|
|
||||||
DEFINE_PROG_DATA_DOWNCAST(vue, prog_data->stage == MESA_SHADER_VERTEX ||
|
DEFINE_PROG_DATA_DOWNCAST(vue, prog_data->stage == MESA_SHADER_VERTEX ||
|
||||||
prog_data->stage == MESA_SHADER_TESS_CTRL ||
|
prog_data->stage == MESA_SHADER_TESS_CTRL ||
|
||||||
@@ -1541,6 +1562,20 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
|
|||||||
struct brw_compile_stats *stats,
|
struct brw_compile_stats *stats,
|
||||||
char **error_str);
|
char **error_str);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compile a Ray Tracing shader.
|
||||||
|
*
|
||||||
|
* Returns the final assembly and the program's size.
|
||||||
|
*/
|
||||||
|
const unsigned *
|
||||||
|
brw_compile_bs(const struct brw_compiler *compiler, void *log_data,
|
||||||
|
void *mem_ctx,
|
||||||
|
const struct brw_bs_prog_key *key,
|
||||||
|
struct brw_bs_prog_data *prog_data,
|
||||||
|
struct nir_shader *shader,
|
||||||
|
struct brw_compile_stats *stats,
|
||||||
|
char **error_str);
|
||||||
|
|
||||||
void brw_debug_key_recompile(const struct brw_compiler *c, void *log,
|
void brw_debug_key_recompile(const struct brw_compiler *c, void *log,
|
||||||
gl_shader_stage stage,
|
gl_shader_stage stage,
|
||||||
const struct brw_base_prog_key *old_key,
|
const struct brw_base_prog_key *old_key,
|
||||||
|
@@ -1011,6 +1011,45 @@ brw_dp_typed_surface_rw_desc(const struct gen_device_info *devinfo,
|
|||||||
return brw_dp_surface_desc(devinfo, msg_type, msg_control);
|
return brw_dp_surface_desc(devinfo, msg_type, msg_control);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline uint32_t
|
||||||
|
brw_mdc_sm2(unsigned exec_size)
|
||||||
|
{
|
||||||
|
assert(exec_size == 8 || exec_size == 16);
|
||||||
|
return exec_size > 8;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uint32_t
|
||||||
|
brw_mdc_sm2_exec_size(uint32_t sm2)
|
||||||
|
{
|
||||||
|
assert(sm2 <= 1);
|
||||||
|
return 8 << sm2;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uint32_t
|
||||||
|
brw_btd_spawn_desc(const struct gen_device_info *devinfo,
|
||||||
|
unsigned exec_size, unsigned msg_type)
|
||||||
|
{
|
||||||
|
assert(devinfo->has_ray_tracing);
|
||||||
|
|
||||||
|
return SET_BITS(0, 19, 19) | /* No header */
|
||||||
|
SET_BITS(msg_type, 17, 14) |
|
||||||
|
SET_BITS(brw_mdc_sm2(exec_size), 8, 8);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uint32_t
|
||||||
|
brw_btd_spawn_msg_type(const struct gen_device_info *devinfo,
|
||||||
|
uint32_t desc)
|
||||||
|
{
|
||||||
|
return GET_BITS(desc, 17, 14);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uint32_t
|
||||||
|
brw_btd_spawn_exec_size(const struct gen_device_info *devinfo,
|
||||||
|
uint32_t desc)
|
||||||
|
{
|
||||||
|
return brw_mdc_sm2_exec_size(GET_BITS(desc, 8, 8));
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Construct a message descriptor immediate with the specified pixel
|
* Construct a message descriptor immediate with the specified pixel
|
||||||
* interpolator function controls.
|
* interpolator function controls.
|
||||||
|
@@ -793,6 +793,10 @@ enum opcode {
|
|||||||
TES_OPCODE_GET_PRIMITIVE_ID,
|
TES_OPCODE_GET_PRIMITIVE_ID,
|
||||||
TES_OPCODE_CREATE_INPUT_READ_HEADER,
|
TES_OPCODE_CREATE_INPUT_READ_HEADER,
|
||||||
TES_OPCODE_ADD_INDIRECT_URB_OFFSET,
|
TES_OPCODE_ADD_INDIRECT_URB_OFFSET,
|
||||||
|
|
||||||
|
SHADER_OPCODE_GET_DSS_ID,
|
||||||
|
SHADER_OPCODE_BTD_SPAWN_LOGICAL,
|
||||||
|
SHADER_OPCODE_BTD_RETIRE_LOGICAL,
|
||||||
};
|
};
|
||||||
|
|
||||||
enum brw_urb_write_flags {
|
enum brw_urb_write_flags {
|
||||||
@@ -1230,6 +1234,8 @@ enum brw_message_target {
|
|||||||
GEN7_SFID_PIXEL_INTERPOLATOR = 11,
|
GEN7_SFID_PIXEL_INTERPOLATOR = 11,
|
||||||
HSW_SFID_DATAPORT_DATA_CACHE_1 = 12,
|
HSW_SFID_DATAPORT_DATA_CACHE_1 = 12,
|
||||||
HSW_SFID_CRE = 13,
|
HSW_SFID_CRE = 13,
|
||||||
|
|
||||||
|
GEN_RT_SFID_BINDLESS_THREAD_DISPATCH = 7,
|
||||||
};
|
};
|
||||||
|
|
||||||
#define GEN7_MESSAGE_TARGET_DP_DATA_CACHE 10
|
#define GEN7_MESSAGE_TARGET_DP_DATA_CACHE 10
|
||||||
@@ -1622,4 +1628,6 @@ enum PACKED brw_rnd_mode {
|
|||||||
#define GEN7_BYTE_SCATTERED_DATA_ELEMENT_WORD 1
|
#define GEN7_BYTE_SCATTERED_DATA_ELEMENT_WORD 1
|
||||||
#define GEN7_BYTE_SCATTERED_DATA_ELEMENT_DWORD 2
|
#define GEN7_BYTE_SCATTERED_DATA_ELEMENT_DWORD 2
|
||||||
|
|
||||||
|
#define GEN_RT_BTD_MESSAGE_SPAWN 1
|
||||||
|
|
||||||
#endif /* BRW_EU_DEFINES_H */
|
#endif /* BRW_EU_DEFINES_H */
|
||||||
|
@@ -6033,6 +6033,76 @@ lower_math_logical_send(const fs_builder &bld, fs_inst *inst)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
lower_btd_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||||
|
{
|
||||||
|
const gen_device_info *devinfo = bld.shader->devinfo;
|
||||||
|
fs_reg global_addr = inst->src[0];
|
||||||
|
const fs_reg &btd_record = inst->src[1];
|
||||||
|
|
||||||
|
const unsigned mlen = 2;
|
||||||
|
const fs_builder ubld = bld.exec_all().group(8, 0);
|
||||||
|
fs_reg header = ubld.vgrf(BRW_REGISTER_TYPE_UD, 2);
|
||||||
|
|
||||||
|
ubld.MOV(header, brw_imm_ud(0));
|
||||||
|
switch (inst->opcode) {
|
||||||
|
case SHADER_OPCODE_BTD_SPAWN_LOGICAL:
|
||||||
|
assert(type_sz(global_addr.type) == 8 && global_addr.stride == 0);
|
||||||
|
global_addr.type = BRW_REGISTER_TYPE_UD;
|
||||||
|
global_addr.stride = 1;
|
||||||
|
ubld.group(2, 0).MOV(header, global_addr);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case SHADER_OPCODE_BTD_RETIRE_LOGICAL:
|
||||||
|
/* The bottom bit is the Stack ID release bit */
|
||||||
|
ubld.group(1, 0).MOV(header, brw_imm_ud(1));
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
unreachable("Invalid BTD message");
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Stack IDs are always in R1 regardless of whether we're coming from a
|
||||||
|
* bindless shader or a regular compute shader.
|
||||||
|
*/
|
||||||
|
fs_reg stack_ids =
|
||||||
|
retype(byte_offset(header, REG_SIZE), BRW_REGISTER_TYPE_UW);
|
||||||
|
bld.MOV(stack_ids, retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UW));
|
||||||
|
|
||||||
|
unsigned ex_mlen = 0;
|
||||||
|
fs_reg payload;
|
||||||
|
if (inst->opcode == SHADER_OPCODE_BTD_SPAWN_LOGICAL) {
|
||||||
|
ex_mlen = 2 * (inst->exec_size / 8);
|
||||||
|
payload = bld.move_to_vgrf(btd_record, 1);
|
||||||
|
} else {
|
||||||
|
assert(inst->opcode == SHADER_OPCODE_BTD_RETIRE_LOGICAL);
|
||||||
|
/* All these messages take a BTD and things complain if we don't provide
|
||||||
|
* one for RETIRE. However, it shouldn't ever actually get used so fill
|
||||||
|
* it with zero.
|
||||||
|
*/
|
||||||
|
ex_mlen = 2 * (inst->exec_size / 8);
|
||||||
|
payload = bld.move_to_vgrf(brw_imm_uq(0), 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Update the original instruction. */
|
||||||
|
inst->opcode = SHADER_OPCODE_SEND;
|
||||||
|
inst->mlen = mlen;
|
||||||
|
inst->ex_mlen = ex_mlen;
|
||||||
|
inst->header_size = 0; /* HW docs require has_header = false */
|
||||||
|
inst->send_has_side_effects = true;
|
||||||
|
inst->send_is_volatile = false;
|
||||||
|
|
||||||
|
/* Set up SFID and descriptors */
|
||||||
|
inst->sfid = GEN_RT_SFID_BINDLESS_THREAD_DISPATCH;
|
||||||
|
inst->desc = brw_btd_spawn_desc(devinfo, inst->exec_size,
|
||||||
|
GEN_RT_BTD_MESSAGE_SPAWN);
|
||||||
|
inst->resize_sources(4);
|
||||||
|
inst->src[0] = brw_imm_ud(0); /* desc */
|
||||||
|
inst->src[1] = brw_imm_ud(0); /* ex_desc */
|
||||||
|
inst->src[2] = header;
|
||||||
|
inst->src[3] = payload;
|
||||||
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
fs_visitor::lower_logical_sends()
|
fs_visitor::lower_logical_sends()
|
||||||
{
|
{
|
||||||
@@ -6177,6 +6247,11 @@ fs_visitor::lower_logical_sends()
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case SHADER_OPCODE_BTD_SPAWN_LOGICAL:
|
||||||
|
case SHADER_OPCODE_BTD_RETIRE_LOGICAL:
|
||||||
|
lower_btd_logical_send(ibld, inst);
|
||||||
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -7546,7 +7621,8 @@ void
|
|||||||
fs_visitor::setup_cs_payload()
|
fs_visitor::setup_cs_payload()
|
||||||
{
|
{
|
||||||
assert(devinfo->gen >= 7);
|
assert(devinfo->gen >= 7);
|
||||||
payload.num_regs = 1;
|
/* TODO: Fill out uses_btd_stack_ids automatically */
|
||||||
|
payload.num_regs = 1 + brw_cs_prog_data(prog_data)->uses_btd_stack_ids;
|
||||||
}
|
}
|
||||||
|
|
||||||
brw::register_pressure::register_pressure(const fs_visitor *v)
|
brw::register_pressure::register_pressure(const fs_visitor *v)
|
||||||
@@ -8474,6 +8550,43 @@ fs_visitor::run_cs(bool allow_spilling)
|
|||||||
return !failed;
|
return !failed;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
fs_visitor::run_bs(bool allow_spilling)
|
||||||
|
{
|
||||||
|
assert(stage >= MESA_SHADER_RAYGEN && stage <= MESA_SHADER_CALLABLE);
|
||||||
|
|
||||||
|
/* R0: thread header, R1: stack IDs, R2: argument addresses */
|
||||||
|
payload.num_regs = 3;
|
||||||
|
|
||||||
|
if (shader_time_index >= 0)
|
||||||
|
emit_shader_time_begin();
|
||||||
|
|
||||||
|
emit_nir_code();
|
||||||
|
|
||||||
|
if (failed)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/* TODO(RT): Perhaps rename this? */
|
||||||
|
emit_cs_terminate();
|
||||||
|
|
||||||
|
if (shader_time_index >= 0)
|
||||||
|
emit_shader_time_end();
|
||||||
|
|
||||||
|
calculate_cfg();
|
||||||
|
|
||||||
|
optimize();
|
||||||
|
|
||||||
|
assign_curb_setup();
|
||||||
|
|
||||||
|
fixup_3src_null_dest();
|
||||||
|
allocate_registers(allow_spilling);
|
||||||
|
|
||||||
|
if (failed)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return !failed;
|
||||||
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
is_used_in_not_interp_frag_coord(nir_ssa_def *def)
|
is_used_in_not_interp_frag_coord(nir_ssa_def *def)
|
||||||
{
|
{
|
||||||
@@ -9423,6 +9536,103 @@ brw_cs_simd_size_for_group_size(const struct gen_device_info *devinfo,
|
|||||||
return 32;
|
return 32;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const unsigned *
|
||||||
|
brw_compile_bs(const struct brw_compiler *compiler, void *log_data,
|
||||||
|
void *mem_ctx,
|
||||||
|
const struct brw_bs_prog_key *key,
|
||||||
|
struct brw_bs_prog_data *prog_data,
|
||||||
|
nir_shader *shader,
|
||||||
|
struct brw_compile_stats *stats,
|
||||||
|
char **error_str)
|
||||||
|
{
|
||||||
|
prog_data->base.stage = shader->info.stage;
|
||||||
|
prog_data->stack_size = shader->scratch_size;
|
||||||
|
|
||||||
|
const unsigned max_dispatch_width = 16;
|
||||||
|
brw_nir_apply_key(shader, compiler, &key->base, max_dispatch_width, true);
|
||||||
|
brw_postprocess_nir(shader, compiler, true);
|
||||||
|
|
||||||
|
fs_visitor *v = NULL, *v8 = NULL, *v16 = NULL;
|
||||||
|
bool has_spilled = false;
|
||||||
|
|
||||||
|
if (likely(!(INTEL_DEBUG & DEBUG_NO8))) {
|
||||||
|
v8 = new fs_visitor(compiler, log_data, mem_ctx, &key->base,
|
||||||
|
&prog_data->base, shader,
|
||||||
|
8, -1 /* shader time */);
|
||||||
|
const bool allow_spilling = true;
|
||||||
|
if (!v8->run_bs(allow_spilling)) {
|
||||||
|
if (error_str)
|
||||||
|
*error_str = ralloc_strdup(mem_ctx, v8->fail_msg);
|
||||||
|
delete v8;
|
||||||
|
return NULL;
|
||||||
|
} else {
|
||||||
|
v = v8;
|
||||||
|
prog_data->simd_size = 8;
|
||||||
|
if (v8->spilled_any_registers)
|
||||||
|
has_spilled = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!has_spilled && likely(!(INTEL_DEBUG & DEBUG_NO16))) {
|
||||||
|
v16 = new fs_visitor(compiler, log_data, mem_ctx, &key->base,
|
||||||
|
&prog_data->base, shader,
|
||||||
|
16, -1 /* shader time */);
|
||||||
|
const bool allow_spilling = (v == NULL);
|
||||||
|
if (!v16->run_bs(allow_spilling)) {
|
||||||
|
compiler->shader_perf_log(log_data,
|
||||||
|
"SIMD16 shader failed to compile: %s",
|
||||||
|
v16->fail_msg);
|
||||||
|
if (v == NULL) {
|
||||||
|
assert(v8 == NULL);
|
||||||
|
if (error_str) {
|
||||||
|
*error_str = ralloc_asprintf(
|
||||||
|
mem_ctx, "SIMD8 disabled and couldn't generate SIMD16: %s",
|
||||||
|
v16->fail_msg);
|
||||||
|
}
|
||||||
|
delete v16;
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
v = v16;
|
||||||
|
prog_data->simd_size = 16;
|
||||||
|
if (v16->spilled_any_registers)
|
||||||
|
has_spilled = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (unlikely(v == NULL)) {
|
||||||
|
assert(INTEL_DEBUG & (DEBUG_NO8 | DEBUG_NO16));
|
||||||
|
if (error_str) {
|
||||||
|
*error_str = ralloc_strdup(mem_ctx,
|
||||||
|
"Cannot satisfy INTEL_DEBUG flags SIMD restrictions");
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(v);
|
||||||
|
|
||||||
|
fs_generator g(compiler, log_data, mem_ctx, &prog_data->base,
|
||||||
|
v->runtime_check_aads_emit, shader->info.stage);
|
||||||
|
if (INTEL_DEBUG & DEBUG_RT) {
|
||||||
|
char *name = ralloc_asprintf(mem_ctx, "%s %s shader %s",
|
||||||
|
shader->info.label ?
|
||||||
|
shader->info.label : "unnamed",
|
||||||
|
gl_shader_stage_name(shader->info.stage),
|
||||||
|
shader->info.name);
|
||||||
|
g.enable_debug(name);
|
||||||
|
}
|
||||||
|
|
||||||
|
g.generate_code(v->cfg, prog_data->simd_size, v->shader_stats,
|
||||||
|
v->performance_analysis.require(), stats);
|
||||||
|
|
||||||
|
delete v8;
|
||||||
|
delete v16;
|
||||||
|
|
||||||
|
g.add_const_data(shader->constant_data, shader->constant_data_size);
|
||||||
|
|
||||||
|
return g.get_assembly();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test the dispatch mask packing assumptions of
|
* Test the dispatch mask packing assumptions of
|
||||||
* brw_stage_has_packed_dispatch(). Call this from e.g. the top of
|
* brw_stage_has_packed_dispatch(). Call this from e.g. the top of
|
||||||
|
@@ -127,6 +127,7 @@ public:
|
|||||||
bool run_tes();
|
bool run_tes();
|
||||||
bool run_gs();
|
bool run_gs();
|
||||||
bool run_cs(bool allow_spilling);
|
bool run_cs(bool allow_spilling);
|
||||||
|
bool run_bs(bool allow_spilling);
|
||||||
void optimize();
|
void optimize();
|
||||||
void allocate_registers(bool allow_spilling);
|
void allocate_registers(bool allow_spilling);
|
||||||
void setup_fs_payload_gen4();
|
void setup_fs_payload_gen4();
|
||||||
@@ -250,6 +251,8 @@ public:
|
|||||||
nir_intrinsic_instr *instr);
|
nir_intrinsic_instr *instr);
|
||||||
void nir_emit_cs_intrinsic(const brw::fs_builder &bld,
|
void nir_emit_cs_intrinsic(const brw::fs_builder &bld,
|
||||||
nir_intrinsic_instr *instr);
|
nir_intrinsic_instr *instr);
|
||||||
|
void nir_emit_bs_intrinsic(const brw::fs_builder &bld,
|
||||||
|
nir_intrinsic_instr *instr);
|
||||||
fs_reg get_nir_image_intrinsic_image(const brw::fs_builder &bld,
|
fs_reg get_nir_image_intrinsic_image(const brw::fs_builder &bld,
|
||||||
nir_intrinsic_instr *instr);
|
nir_intrinsic_instr *instr);
|
||||||
fs_reg get_nir_ssbo_intrinsic_index(const brw::fs_builder &bld,
|
fs_reg get_nir_ssbo_intrinsic_index(const brw::fs_builder &bld,
|
||||||
|
@@ -2593,6 +2593,35 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
|
|||||||
brw_float_controls_mode(p, src[0].d, src[1].d);
|
brw_float_controls_mode(p, src[0].d, src[1].d);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case SHADER_OPCODE_GET_DSS_ID:
|
||||||
|
/* The Slice, Dual-SubSlice, SubSlice, EU, and Thread IDs are all
|
||||||
|
* stored in sr0.0. Normally, for reading from HW regs, we'd just do
|
||||||
|
* this in the IR and let the back-end generate some code but these
|
||||||
|
* live in the state register which tends to have special rules.
|
||||||
|
*
|
||||||
|
* For convenience, we combine Slice ID and Dual-SubSlice ID into a
|
||||||
|
* single ID.
|
||||||
|
*/
|
||||||
|
if (devinfo->gen == 12) {
|
||||||
|
/* There is a SWSB restriction that requires that any time sr0 is
|
||||||
|
* accessed both the instruction doing the access and the next one
|
||||||
|
* have SWSB set to RegDist(1).
|
||||||
|
*/
|
||||||
|
if (brw_get_default_swsb(p).mode != TGL_SBID_NULL)
|
||||||
|
brw_SYNC(p, TGL_SYNC_NOP);
|
||||||
|
brw_set_default_swsb(p, tgl_swsb_regdist(1));
|
||||||
|
brw_SHR(p, dst, brw_sr0_reg(0), brw_imm_ud(9));
|
||||||
|
brw_set_default_swsb(p, tgl_swsb_regdist(1));
|
||||||
|
brw_AND(p, dst, dst, brw_imm_ud(0x1f));
|
||||||
|
} else {
|
||||||
|
/* These move around basically every hardware generation, so don't
|
||||||
|
* do any >= checks and fail if the platform hasn't explicitly
|
||||||
|
* been enabled here.
|
||||||
|
*/
|
||||||
|
unreachable("Unsupported platform");
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
unreachable("Unsupported opcode");
|
unreachable("Unsupported opcode");
|
||||||
|
|
||||||
|
@@ -497,6 +497,14 @@ fs_visitor::nir_emit_instr(nir_instr *instr)
|
|||||||
case MESA_SHADER_KERNEL:
|
case MESA_SHADER_KERNEL:
|
||||||
nir_emit_cs_intrinsic(abld, nir_instr_as_intrinsic(instr));
|
nir_emit_cs_intrinsic(abld, nir_instr_as_intrinsic(instr));
|
||||||
break;
|
break;
|
||||||
|
case MESA_SHADER_RAYGEN:
|
||||||
|
case MESA_SHADER_ANY_HIT:
|
||||||
|
case MESA_SHADER_CLOSEST_HIT:
|
||||||
|
case MESA_SHADER_MISS:
|
||||||
|
case MESA_SHADER_INTERSECTION:
|
||||||
|
case MESA_SHADER_CALLABLE:
|
||||||
|
nir_emit_bs_intrinsic(abld, nir_instr_as_intrinsic(instr));
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
unreachable("unsupported shader stage");
|
unreachable("unsupported shader stage");
|
||||||
}
|
}
|
||||||
@@ -3871,6 +3879,31 @@ fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
fs_visitor::nir_emit_bs_intrinsic(const fs_builder &bld,
|
||||||
|
nir_intrinsic_instr *instr)
|
||||||
|
{
|
||||||
|
assert(brw_shader_stage_is_bindless(stage));
|
||||||
|
|
||||||
|
fs_reg dest;
|
||||||
|
if (nir_intrinsic_infos[instr->intrinsic].has_dest)
|
||||||
|
dest = get_nir_dest(instr->dest);
|
||||||
|
|
||||||
|
switch (instr->intrinsic) {
|
||||||
|
case nir_intrinsic_load_btd_global_arg_addr_intel:
|
||||||
|
bld.MOV(dest, retype(brw_vec1_grf(2, 0), dest.type));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case nir_intrinsic_load_btd_local_arg_addr_intel:
|
||||||
|
bld.MOV(dest, retype(brw_vec1_grf(2, 2), dest.type));
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
nir_emit_intrinsic(bld, instr);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static fs_reg
|
static fs_reg
|
||||||
brw_nir_reduction_op_identity(const fs_builder &bld,
|
brw_nir_reduction_op_identity(const fs_builder &bld,
|
||||||
nir_op op, brw_reg_type type)
|
nir_op op, brw_reg_type type)
|
||||||
@@ -5436,6 +5469,44 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case nir_intrinsic_load_btd_dss_id_intel:
|
||||||
|
bld.emit(SHADER_OPCODE_GET_DSS_ID,
|
||||||
|
retype(dest, BRW_REGISTER_TYPE_UD));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case nir_intrinsic_load_btd_stack_id_intel:
|
||||||
|
if (stage == MESA_SHADER_COMPUTE) {
|
||||||
|
assert(brw_cs_prog_data(prog_data)->uses_btd_stack_ids);
|
||||||
|
} else {
|
||||||
|
assert(brw_shader_stage_is_bindless(stage));
|
||||||
|
}
|
||||||
|
/* Stack IDs are always in R1 regardless of whether we're coming from a
|
||||||
|
* bindless shader or a regular compute shader.
|
||||||
|
*/
|
||||||
|
bld.MOV(retype(dest, BRW_REGISTER_TYPE_UD),
|
||||||
|
retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UW));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case nir_intrinsic_btd_spawn_intel:
|
||||||
|
if (stage == MESA_SHADER_COMPUTE) {
|
||||||
|
assert(brw_cs_prog_data(prog_data)->uses_btd_stack_ids);
|
||||||
|
} else {
|
||||||
|
assert(brw_shader_stage_is_bindless(stage));
|
||||||
|
}
|
||||||
|
bld.emit(SHADER_OPCODE_BTD_SPAWN_LOGICAL, bld.null_reg_ud(),
|
||||||
|
bld.emit_uniformize(get_nir_src(instr->src[0])),
|
||||||
|
get_nir_src(instr->src[1]));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case nir_intrinsic_btd_retire_intel:
|
||||||
|
if (stage == MESA_SHADER_COMPUTE) {
|
||||||
|
assert(brw_cs_prog_data(prog_data)->uses_btd_stack_ids);
|
||||||
|
} else {
|
||||||
|
assert(brw_shader_stage_is_bindless(stage));
|
||||||
|
}
|
||||||
|
bld.emit(SHADER_OPCODE_BTD_RETIRE_LOGICAL);
|
||||||
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
unreachable("unknown intrinsic");
|
unreachable("unknown intrinsic");
|
||||||
}
|
}
|
||||||
|
@@ -844,9 +844,6 @@ fs_visitor::emit_cs_terminate()
|
|||||||
{
|
{
|
||||||
assert(devinfo->gen >= 7);
|
assert(devinfo->gen >= 7);
|
||||||
|
|
||||||
/* We are getting the thread ID from the compute shader header */
|
|
||||||
assert(stage == MESA_SHADER_COMPUTE || stage == MESA_SHADER_KERNEL);
|
|
||||||
|
|
||||||
/* We can't directly send from g0, since sends with EOT have to use
|
/* We can't directly send from g0, since sends with EOT have to use
|
||||||
* g112-127. So, copy it to a virtual register, The register allocator will
|
* g112-127. So, copy it to a virtual register, The register allocator will
|
||||||
* make sure it uses the appropriate register range.
|
* make sure it uses the appropriate register range.
|
||||||
|
@@ -355,6 +355,7 @@ namespace {
|
|||||||
case TCS_OPCODE_SRC0_010_IS_ZERO:
|
case TCS_OPCODE_SRC0_010_IS_ZERO:
|
||||||
case TCS_OPCODE_GET_PRIMITIVE_ID:
|
case TCS_OPCODE_GET_PRIMITIVE_ID:
|
||||||
case TES_OPCODE_GET_PRIMITIVE_ID:
|
case TES_OPCODE_GET_PRIMITIVE_ID:
|
||||||
|
case SHADER_OPCODE_GET_DSS_ID:
|
||||||
if (devinfo->gen >= 11) {
|
if (devinfo->gen >= 11) {
|
||||||
return calculate_desc(info, unit_fpu, 0, 2, 0, 0, 2,
|
return calculate_desc(info, unit_fpu, 0, 2, 0, 0, 2,
|
||||||
0, 10, 6 /* XXX */, 14, 0, 0);
|
0, 10, 6 /* XXX */, 14, 0, 0);
|
||||||
@@ -1086,6 +1087,11 @@ namespace {
|
|||||||
} else {
|
} else {
|
||||||
abort();
|
abort();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case GEN_RT_SFID_BINDLESS_THREAD_DISPATCH:
|
||||||
|
return calculate_desc(info, unit_spawner, 2, 0, 0, 0 /* XXX */, 0,
|
||||||
|
10 /* XXX */, 0, 0, 0, 0, 0);
|
||||||
|
|
||||||
default:
|
default:
|
||||||
abort();
|
abort();
|
||||||
}
|
}
|
||||||
|
@@ -524,6 +524,15 @@ schedule_node::set_latency_gen7(bool is_haswell)
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case GEN_RT_SFID_BINDLESS_THREAD_DISPATCH:
|
||||||
|
/* TODO.
|
||||||
|
*
|
||||||
|
* We'll assume for the moment that this is pretty quick as it
|
||||||
|
* doesn't actually return any data.
|
||||||
|
*/
|
||||||
|
latency = 200;
|
||||||
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
unreachable("Unknown SFID");
|
unreachable("Unknown SFID");
|
||||||
}
|
}
|
||||||
|
@@ -547,6 +547,12 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op)
|
|||||||
return "rnd_mode";
|
return "rnd_mode";
|
||||||
case SHADER_OPCODE_FLOAT_CONTROL_MODE:
|
case SHADER_OPCODE_FLOAT_CONTROL_MODE:
|
||||||
return "float_control_mode";
|
return "float_control_mode";
|
||||||
|
case SHADER_OPCODE_GET_DSS_ID:
|
||||||
|
return "get_dss_id";
|
||||||
|
case SHADER_OPCODE_BTD_SPAWN_LOGICAL:
|
||||||
|
return "btd_spawn_logical";
|
||||||
|
case SHADER_OPCODE_BTD_RETIRE_LOGICAL:
|
||||||
|
return "btd_retire_logical";
|
||||||
}
|
}
|
||||||
|
|
||||||
unreachable("not reached");
|
unreachable("not reached");
|
||||||
@@ -1104,6 +1110,8 @@ backend_instruction::has_side_effects() const
|
|||||||
case FS_OPCODE_SCHEDULING_FENCE:
|
case FS_OPCODE_SCHEDULING_FENCE:
|
||||||
case SHADER_OPCODE_OWORD_BLOCK_WRITE_LOGICAL:
|
case SHADER_OPCODE_OWORD_BLOCK_WRITE_LOGICAL:
|
||||||
case SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL:
|
case SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL:
|
||||||
|
case SHADER_OPCODE_BTD_SPAWN_LOGICAL:
|
||||||
|
case SHADER_OPCODE_BTD_RETIRE_LOGICAL:
|
||||||
return true;
|
return true;
|
||||||
default:
|
default:
|
||||||
return eot;
|
return eot;
|
||||||
|
Reference in New Issue
Block a user