anv: Implement Mesh Shading pipeline
The Mesh pipeline is implemented as a variant of the regular (primitive) Graphics Pipeline. Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Marcin Ślusarz <marcin.slusarz@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13662>
This commit is contained in:
@@ -92,9 +92,8 @@ upload_blorp_shader(struct blorp_batch *batch, uint32_t stage,
|
||||
void
|
||||
anv_device_init_blorp(struct anv_device *device)
|
||||
{
|
||||
const struct intel_device_info *devinfo = &device->info;
|
||||
const struct blorp_config config = {
|
||||
.use_mesh_shading = devinfo->has_mesh_shading,
|
||||
.use_mesh_shading = device->physical->vk.supported_extensions.NV_mesh_shader,
|
||||
};
|
||||
|
||||
blorp_init(&device->blorp, device, &device->isl_dev, &config);
|
||||
|
@@ -1979,6 +1979,10 @@ anv_get_physical_device_properties_1_1(struct anv_physical_device *pdevice,
|
||||
VK_SHADER_STAGE_INTERSECTION_BIT_KHR |
|
||||
VK_SHADER_STAGE_CALLABLE_BIT_KHR;
|
||||
}
|
||||
if (pdevice->vk.supported_extensions.NV_mesh_shader) {
|
||||
scalar_stages |= VK_SHADER_STAGE_TASK_BIT_NV |
|
||||
VK_SHADER_STAGE_MESH_BIT_NV;
|
||||
}
|
||||
p->subgroupSupportedStages = scalar_stages;
|
||||
p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT |
|
||||
VK_SUBGROUP_FEATURE_VOTE_BIT |
|
||||
|
@@ -135,6 +135,7 @@ anv_shader_compile_to_nir(struct anv_device *device,
|
||||
.int64 = pdevice->info.ver >= 8,
|
||||
.int64_atomics = pdevice->info.ver >= 9 && pdevice->use_softpin,
|
||||
.integer_functions2 = pdevice->info.ver >= 8,
|
||||
.mesh_shading_nv = pdevice->vk.supported_extensions.NV_mesh_shader,
|
||||
.min_lod = true,
|
||||
.multiview = true,
|
||||
.physical_storage_buffer_address = pdevice->has_a64_buffer_access,
|
||||
@@ -526,6 +527,28 @@ pipeline_has_coarse_pixel(const struct anv_graphics_pipeline *pipeline,
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
populate_task_prog_key(const struct intel_device_info *devinfo,
|
||||
enum brw_subgroup_size_type subgroup_size_type,
|
||||
bool robust_buffer_access,
|
||||
struct brw_task_prog_key *key)
|
||||
{
|
||||
memset(key, 0, sizeof(*key));
|
||||
|
||||
populate_base_prog_key(devinfo, subgroup_size_type, robust_buffer_access, &key->base);
|
||||
}
|
||||
|
||||
static void
|
||||
populate_mesh_prog_key(const struct intel_device_info *devinfo,
|
||||
enum brw_subgroup_size_type subgroup_size_type,
|
||||
bool robust_buffer_access,
|
||||
struct brw_mesh_prog_key *key)
|
||||
{
|
||||
memset(key, 0, sizeof(*key));
|
||||
|
||||
populate_base_prog_key(devinfo, subgroup_size_type, robust_buffer_access, &key->base);
|
||||
}
|
||||
|
||||
static void
|
||||
populate_wm_prog_key(const struct anv_graphics_pipeline *pipeline,
|
||||
VkPipelineShaderStageCreateFlags flags,
|
||||
@@ -907,6 +930,10 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
|
||||
}
|
||||
}
|
||||
|
||||
if (gl_shader_stage_is_compute(nir->info.stage) ||
|
||||
gl_shader_stage_is_mesh(nir->info.stage))
|
||||
NIR_PASS_V(nir, brw_nir_lower_cs_intrinsics);
|
||||
|
||||
stage->nir = nir;
|
||||
}
|
||||
|
||||
@@ -1111,6 +1138,70 @@ anv_pipeline_compile_gs(const struct brw_compiler *compiler,
|
||||
gs_stage->code = brw_compile_gs(compiler, mem_ctx, ¶ms);
|
||||
}
|
||||
|
||||
static void
|
||||
anv_pipeline_link_task(const struct brw_compiler *compiler,
|
||||
struct anv_pipeline_stage *task_stage,
|
||||
struct anv_pipeline_stage *next_stage)
|
||||
{
|
||||
assert(next_stage);
|
||||
assert(next_stage->stage == MESA_SHADER_MESH);
|
||||
brw_nir_link_shaders(compiler, task_stage->nir, next_stage->nir);
|
||||
}
|
||||
|
||||
static void
|
||||
anv_pipeline_compile_task(const struct brw_compiler *compiler,
|
||||
void *mem_ctx,
|
||||
struct anv_device *device,
|
||||
struct anv_pipeline_stage *task_stage)
|
||||
{
|
||||
task_stage->num_stats = 1;
|
||||
|
||||
struct brw_compile_task_params params = {
|
||||
.nir = task_stage->nir,
|
||||
.key = &task_stage->key.task,
|
||||
.prog_data = &task_stage->prog_data.task,
|
||||
.stats = task_stage->stats,
|
||||
.log_data = device,
|
||||
};
|
||||
|
||||
task_stage->code = brw_compile_task(compiler, mem_ctx, ¶ms);
|
||||
}
|
||||
|
||||
static void
|
||||
anv_pipeline_link_mesh(const struct brw_compiler *compiler,
|
||||
struct anv_pipeline_stage *mesh_stage,
|
||||
struct anv_pipeline_stage *next_stage)
|
||||
{
|
||||
if (next_stage) {
|
||||
brw_nir_link_shaders(compiler, mesh_stage->nir, next_stage->nir);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
anv_pipeline_compile_mesh(const struct brw_compiler *compiler,
|
||||
void *mem_ctx,
|
||||
struct anv_device *device,
|
||||
struct anv_pipeline_stage *mesh_stage,
|
||||
struct anv_pipeline_stage *prev_stage)
|
||||
{
|
||||
mesh_stage->num_stats = 1;
|
||||
|
||||
struct brw_compile_mesh_params params = {
|
||||
.nir = mesh_stage->nir,
|
||||
.key = &mesh_stage->key.mesh,
|
||||
.prog_data = &mesh_stage->prog_data.mesh,
|
||||
.stats = mesh_stage->stats,
|
||||
.log_data = device,
|
||||
};
|
||||
|
||||
if (prev_stage) {
|
||||
assert(prev_stage->stage == MESA_SHADER_TASK);
|
||||
params.tue_map = &prev_stage->prog_data.task.map;
|
||||
}
|
||||
|
||||
mesh_stage->code = brw_compile_mesh(compiler, mem_ctx, ¶ms);
|
||||
}
|
||||
|
||||
static void
|
||||
anv_pipeline_link_fs(const struct brw_compiler *compiler,
|
||||
struct anv_pipeline_stage *stage)
|
||||
@@ -1226,8 +1317,6 @@ anv_pipeline_compile_fs(const struct brw_compiler *compiler,
|
||||
* we need this before we call spirv_to_nir.
|
||||
*/
|
||||
assert(prev_stage);
|
||||
fs_stage->key.wm.input_slots_valid =
|
||||
prev_stage->prog_data.vue.vue_map.slots_valid;
|
||||
|
||||
struct brw_compile_fs_params params = {
|
||||
.nir = fs_stage->nir,
|
||||
@@ -1239,6 +1328,14 @@ anv_pipeline_compile_fs(const struct brw_compiler *compiler,
|
||||
.log_data = device,
|
||||
};
|
||||
|
||||
if (prev_stage->stage == MESA_SHADER_MESH) {
|
||||
params.mue_map = &prev_stage->prog_data.mesh.map;
|
||||
/* TODO(mesh): Slots valid, do we even use/rely on it? */
|
||||
} else {
|
||||
fs_stage->key.wm.input_slots_valid =
|
||||
prev_stage->prog_data.vue.vue_map.slots_valid;
|
||||
}
|
||||
|
||||
fs_stage->code = brw_compile_fs(compiler, mem_ctx, ¶ms);
|
||||
|
||||
fs_stage->num_stats = (uint32_t)fs_stage->prog_data.wm.dispatch_8 +
|
||||
@@ -1412,7 +1509,7 @@ anv_subgroup_size_type(gl_shader_stage stage,
|
||||
get_module_spirv_version(module) >= 0x10600;
|
||||
|
||||
if (rss_info) {
|
||||
assert(stage == MESA_SHADER_COMPUTE);
|
||||
assert(gl_shader_stage_uses_workgroup(stage));
|
||||
/* These enum values are expressly chosen to be equal to the subgroup
|
||||
* size that they require.
|
||||
*/
|
||||
@@ -1501,8 +1598,12 @@ anv_pipeline_compile_graphics(struct anv_graphics_pipeline *pipeline,
|
||||
stages[stage].spec_info,
|
||||
stages[stage].shader_sha1);
|
||||
|
||||
const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *rss_info =
|
||||
vk_find_struct_const(sinfo->pNext,
|
||||
PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT);
|
||||
|
||||
enum brw_subgroup_size_type subgroup_size_type =
|
||||
anv_subgroup_size_type(stage, stages[stage].module, sinfo->flags, NULL);
|
||||
anv_subgroup_size_type(stage, stages[stage].module, sinfo->flags, rss_info);
|
||||
|
||||
const struct intel_device_info *devinfo = &pipeline->base.device->info;
|
||||
switch (stage) {
|
||||
@@ -1540,6 +1641,16 @@ anv_pipeline_compile_graphics(struct anv_graphics_pipeline *pipeline,
|
||||
&stages[stage].key.wm);
|
||||
break;
|
||||
}
|
||||
case MESA_SHADER_TASK:
|
||||
populate_task_prog_key(devinfo, subgroup_size_type,
|
||||
pipeline->base.device->robust_buffer_access,
|
||||
&stages[stage].key.task);
|
||||
break;
|
||||
case MESA_SHADER_MESH:
|
||||
populate_mesh_prog_key(devinfo, subgroup_size_type,
|
||||
pipeline->base.device->robust_buffer_access,
|
||||
&stages[stage].key.mesh);
|
||||
break;
|
||||
default:
|
||||
unreachable("Invalid graphics shader stage");
|
||||
}
|
||||
@@ -1548,7 +1659,8 @@ anv_pipeline_compile_graphics(struct anv_graphics_pipeline *pipeline,
|
||||
stages[stage].feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
|
||||
}
|
||||
|
||||
assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT);
|
||||
assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT ||
|
||||
pipeline->active_stages & VK_SHADER_STAGE_MESH_BIT_NV);
|
||||
|
||||
ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
|
||||
|
||||
@@ -1715,6 +1827,12 @@ anv_pipeline_compile_graphics(struct anv_graphics_pipeline *pipeline,
|
||||
case MESA_SHADER_GEOMETRY:
|
||||
anv_pipeline_link_gs(compiler, &stages[s], next_stage);
|
||||
break;
|
||||
case MESA_SHADER_TASK:
|
||||
anv_pipeline_link_task(compiler, &stages[s], next_stage);
|
||||
break;
|
||||
case MESA_SHADER_MESH:
|
||||
anv_pipeline_link_mesh(compiler, &stages[s], next_stage);
|
||||
break;
|
||||
case MESA_SHADER_FRAGMENT:
|
||||
anv_pipeline_link_fs(compiler, &stages[s]);
|
||||
break;
|
||||
@@ -1832,6 +1950,14 @@ anv_pipeline_compile_graphics(struct anv_graphics_pipeline *pipeline,
|
||||
anv_pipeline_compile_gs(compiler, stage_ctx, pipeline->base.device,
|
||||
&stages[s], prev_stage);
|
||||
break;
|
||||
case MESA_SHADER_TASK:
|
||||
anv_pipeline_compile_task(compiler, stage_ctx, pipeline->base.device,
|
||||
&stages[s]);
|
||||
break;
|
||||
case MESA_SHADER_MESH:
|
||||
anv_pipeline_compile_mesh(compiler, stage_ctx, pipeline->base.device,
|
||||
&stages[s], prev_stage);
|
||||
break;
|
||||
case MESA_SHADER_FRAGMENT:
|
||||
anv_pipeline_compile_fs(compiler, stage_ctx, pipeline->base.device,
|
||||
&stages[s], prev_stage);
|
||||
@@ -2006,8 +2132,6 @@ anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
|
||||
|
||||
anv_pipeline_lower_nir(&pipeline->base, mem_ctx, &stage, layout);
|
||||
|
||||
NIR_PASS_V(stage.nir, brw_nir_lower_cs_intrinsics);
|
||||
|
||||
stage.num_stats = 1;
|
||||
|
||||
struct brw_compile_cs_params params = {
|
||||
@@ -2529,6 +2653,9 @@ anv_graphics_pipeline_init(struct anv_graphics_pipeline *pipeline,
|
||||
if (pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
|
||||
pipeline->active_stages |= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
|
||||
|
||||
if (anv_pipeline_is_mesh(pipeline))
|
||||
assert(device->physical->vk.supported_extensions.NV_mesh_shader);
|
||||
|
||||
copy_non_dynamic_state(pipeline, pCreateInfo);
|
||||
|
||||
pipeline->depth_clamp_enable = pCreateInfo->pRasterizationState->depthClampEnable;
|
||||
@@ -2621,6 +2748,9 @@ anv_graphics_pipeline_init(struct anv_graphics_pipeline *pipeline,
|
||||
pipeline->topology = _3DPRIM_PATCHLIST(tess_info->patchControlPoints);
|
||||
else
|
||||
pipeline->topology = vk_to_intel_primitive_type[ia_info->topology];
|
||||
} else {
|
||||
assert(anv_pipeline_is_mesh(pipeline));
|
||||
/* TODO(mesh): Mesh vs. Multiview with Instancing. */
|
||||
}
|
||||
|
||||
/* If rasterization is not enabled, ms_info must be ignored. */
|
||||
|
@@ -3052,9 +3052,9 @@ struct anv_cmd_state {
|
||||
struct anv_state binding_tables[MESA_VULKAN_SHADER_STAGES];
|
||||
struct anv_state samplers[MESA_VULKAN_SHADER_STAGES];
|
||||
|
||||
unsigned char sampler_sha1s[MESA_SHADER_STAGES][20];
|
||||
unsigned char surface_sha1s[MESA_SHADER_STAGES][20];
|
||||
unsigned char push_sha1s[MESA_SHADER_STAGES][20];
|
||||
unsigned char sampler_sha1s[MESA_VULKAN_SHADER_STAGES][20];
|
||||
unsigned char surface_sha1s[MESA_VULKAN_SHADER_STAGES][20];
|
||||
unsigned char push_sha1s[MESA_VULKAN_SHADER_STAGES][20];
|
||||
|
||||
/**
|
||||
* Whether or not the gfx8 PMA fix is enabled. We ensure that, at the top
|
||||
@@ -3617,6 +3617,12 @@ anv_pipeline_is_primitive(const struct anv_graphics_pipeline *pipeline)
|
||||
return anv_pipeline_has_stage(pipeline, MESA_SHADER_VERTEX);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
anv_pipeline_is_mesh(const struct anv_graphics_pipeline *pipeline)
|
||||
{
|
||||
return anv_pipeline_has_stage(pipeline, MESA_SHADER_MESH);
|
||||
}
|
||||
|
||||
#define ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(prefix, stage) \
|
||||
static inline const struct brw_##prefix##_prog_data * \
|
||||
get_##prefix##_prog_data(const struct anv_graphics_pipeline *pipeline) \
|
||||
@@ -3634,6 +3640,8 @@ ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(tcs, MESA_SHADER_TESS_CTRL)
|
||||
ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(tes, MESA_SHADER_TESS_EVAL)
|
||||
ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(gs, MESA_SHADER_GEOMETRY)
|
||||
ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(wm, MESA_SHADER_FRAGMENT)
|
||||
ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(mesh, MESA_SHADER_MESH)
|
||||
ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(task, MESA_SHADER_TASK)
|
||||
|
||||
static inline const struct brw_cs_prog_data *
|
||||
get_cs_prog_data(const struct anv_compute_pipeline *pipeline)
|
||||
|
@@ -2569,8 +2569,6 @@ void genX(CmdPipelineBarrier2KHR)(
|
||||
static void
|
||||
cmd_buffer_alloc_push_constants(struct anv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
assert(anv_pipeline_is_primitive(cmd_buffer->state.gfx.pipeline));
|
||||
|
||||
VkShaderStageFlags stages =
|
||||
cmd_buffer->state.gfx.pipeline->active_stages;
|
||||
|
||||
@@ -2579,7 +2577,9 @@ cmd_buffer_alloc_push_constants(struct anv_cmd_buffer *cmd_buffer)
|
||||
* uses push concstants, this may be suboptimal. However, avoiding stalls
|
||||
* seems more important.
|
||||
*/
|
||||
stages |= VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_VERTEX_BIT;
|
||||
stages |= VK_SHADER_STAGE_FRAGMENT_BIT;
|
||||
if (anv_pipeline_is_primitive(cmd_buffer->state.gfx.pipeline))
|
||||
stages |= VK_SHADER_STAGE_VERTEX_BIT;
|
||||
|
||||
if (stages == cmd_buffer->state.gfx.push_constant_stages)
|
||||
return;
|
||||
@@ -3576,6 +3576,7 @@ cmd_buffer_emit_clip(struct anv_cmd_buffer *cmd_buffer)
|
||||
};
|
||||
uint32_t dwords[GENX(3DSTATE_CLIP_length)];
|
||||
|
||||
/* TODO(mesh): Multiview. */
|
||||
struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
|
||||
if (anv_pipeline_is_primitive(pipeline)) {
|
||||
const struct brw_vue_prog_data *last =
|
||||
@@ -4838,13 +4839,29 @@ void genX(CmdEndTransformFeedbackEXT)(
|
||||
cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_XFB_ENABLE;
|
||||
}
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
void
|
||||
genX(CmdDrawMeshTasksNV)(
|
||||
VkCommandBuffer commandBuffer,
|
||||
uint32_t taskCount,
|
||||
uint32_t firstTask)
|
||||
{
|
||||
unreachable("Unimplemented");
|
||||
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
|
||||
if (anv_batch_has_error(&cmd_buffer->batch))
|
||||
return;
|
||||
|
||||
/* TODO(mesh): Check if this is not emitting more packets than we need. */
|
||||
genX(cmd_buffer_flush_state)(cmd_buffer);
|
||||
|
||||
/* TODO(mesh): Emit conditional render predicate. */
|
||||
|
||||
/* TODO(mesh): Support non-zero firstTask. */
|
||||
assert(firstTask == 0);
|
||||
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(3DMESH_1D), m) {
|
||||
m.ThreadGroupCountX = taskCount;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
@@ -4870,6 +4887,7 @@ genX(CmdDrawMeshTasksIndirectCountNV)(
|
||||
{
|
||||
unreachable("Unimplemented");
|
||||
}
|
||||
#endif /* GFX_VERx10 >= 125 */
|
||||
|
||||
void
|
||||
genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer)
|
||||
|
@@ -306,12 +306,71 @@ genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch,
|
||||
urb.VSNumberofURBEntries = entries[i];
|
||||
}
|
||||
}
|
||||
#if GFX_VERx10 >= 125
|
||||
if (device->physical->vk.supported_extensions.NV_mesh_shader) {
|
||||
anv_batch_emit(batch, GENX(3DSTATE_URB_ALLOC_MESH), zero);
|
||||
anv_batch_emit(batch, GENX(3DSTATE_URB_ALLOC_TASK), zero);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
static void
|
||||
emit_urb_setup_mesh(struct anv_graphics_pipeline *pipeline,
|
||||
enum intel_urb_deref_block_size *deref_block_size)
|
||||
{
|
||||
const struct intel_device_info *devinfo = &pipeline->base.device->info;
|
||||
|
||||
const struct brw_task_prog_data *task_prog_data =
|
||||
anv_pipeline_has_stage(pipeline, MESA_SHADER_TASK) ?
|
||||
get_task_prog_data(pipeline) : NULL;
|
||||
const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline);
|
||||
|
||||
const struct intel_mesh_urb_allocation alloc =
|
||||
intel_get_mesh_urb_config(devinfo, pipeline->base.l3_config,
|
||||
task_prog_data ? task_prog_data->map.size_dw : 0,
|
||||
mesh_prog_data->map.size_dw);
|
||||
|
||||
/* Zero out the primitive pipeline URB allocations. */
|
||||
for (int i = 0; i <= MESA_SHADER_GEOMETRY; i++) {
|
||||
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_URB_VS), urb) {
|
||||
urb._3DCommandSubOpcode += i;
|
||||
}
|
||||
}
|
||||
|
||||
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_URB_ALLOC_TASK), urb) {
|
||||
if (task_prog_data) {
|
||||
urb.TASKURBEntryAllocationSize = alloc.task_entry_size_64b - 1;
|
||||
urb.TASKNumberofURBEntriesSlice0 = alloc.task_entries;
|
||||
urb.TASKNumberofURBEntriesSliceN = alloc.task_entries;
|
||||
urb.TASKURBStartingAddressSlice0 = alloc.task_starting_address_8kb;
|
||||
urb.TASKURBStartingAddressSliceN = alloc.task_starting_address_8kb;
|
||||
}
|
||||
}
|
||||
|
||||
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_URB_ALLOC_MESH), urb) {
|
||||
urb.MESHURBEntryAllocationSize = alloc.mesh_entry_size_64b - 1;
|
||||
urb.MESHNumberofURBEntriesSlice0 = alloc.mesh_entries;
|
||||
urb.MESHNumberofURBEntriesSliceN = alloc.mesh_entries;
|
||||
urb.MESHURBStartingAddressSlice0 = alloc.mesh_starting_address_8kb;
|
||||
urb.MESHURBStartingAddressSliceN = alloc.mesh_starting_address_8kb;
|
||||
}
|
||||
|
||||
*deref_block_size = alloc.deref_block_size;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void
|
||||
emit_urb_setup(struct anv_graphics_pipeline *pipeline,
|
||||
enum intel_urb_deref_block_size *deref_block_size)
|
||||
{
|
||||
#if GFX_VERx10 >= 125
|
||||
if (anv_pipeline_is_mesh(pipeline)) {
|
||||
emit_urb_setup_mesh(pipeline, deref_block_size);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
unsigned entry_size[4];
|
||||
for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
|
||||
const struct brw_vue_prog_data *prog_data =
|
||||
@@ -336,13 +395,20 @@ emit_3dstate_sbe(struct anv_graphics_pipeline *pipeline)
|
||||
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_SBE), sbe);
|
||||
#if GFX_VER >= 8
|
||||
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_SBE_SWIZ), sbe);
|
||||
#endif
|
||||
#if GFX_VERx10 >= 125
|
||||
if (anv_pipeline_is_mesh(pipeline))
|
||||
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_SBE_MESH), sbe_mesh);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
struct GENX(3DSTATE_SBE) sbe = {
|
||||
GENX(3DSTATE_SBE_header),
|
||||
.AttributeSwizzleEnable = true,
|
||||
/* TODO(mesh): Figure out cases where we need attribute swizzling. See also
|
||||
* calculate_urb_setup() and related functions.
|
||||
*/
|
||||
.AttributeSwizzleEnable = anv_pipeline_is_primitive(pipeline),
|
||||
.PointSpriteTextureCoordinateOrigin = UPPERLEFT,
|
||||
.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs,
|
||||
.ConstantInterpolationEnable = wm_prog_data->flat_inputs,
|
||||
@@ -431,6 +497,22 @@ emit_3dstate_sbe(struct anv_graphics_pipeline *pipeline)
|
||||
#if GFX_VER >= 8
|
||||
sbe.ForceVertexURBEntryReadOffset = true;
|
||||
sbe.ForceVertexURBEntryReadLength = true;
|
||||
#endif
|
||||
} else {
|
||||
assert(anv_pipeline_is_mesh(pipeline));
|
||||
#if GFX_VERx10 >= 125
|
||||
const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline);
|
||||
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_SBE_MESH), sbe_mesh) {
|
||||
const struct brw_mue_map *mue = &mesh_prog_data->map;
|
||||
|
||||
assert(mue->per_vertex_header_size_dw % 8 == 0);
|
||||
sbe_mesh.PerVertexURBEntryOutputReadOffset = mue->per_vertex_header_size_dw / 8;
|
||||
sbe_mesh.PerVertexURBEntryOutputReadLength = DIV_ROUND_UP(mue->per_vertex_data_size_dw, 8);
|
||||
|
||||
assert(mue->per_primitive_header_size_dw % 8 == 0);
|
||||
sbe_mesh.PerPrimitiveURBEntryOutputReadOffset = mue->per_primitive_header_size_dw / 8;
|
||||
sbe_mesh.PerPrimitiveURBEntryOutputReadLength = DIV_ROUND_UP(mue->per_primitive_data_size_dw, 8);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -457,7 +539,18 @@ VkPolygonMode
|
||||
genX(raster_polygon_mode)(struct anv_graphics_pipeline *pipeline,
|
||||
VkPrimitiveTopology primitive_topology)
|
||||
{
|
||||
if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) {
|
||||
if (anv_pipeline_is_mesh(pipeline)) {
|
||||
switch (get_mesh_prog_data(pipeline)->primitive_type) {
|
||||
case SHADER_PRIM_POINTS:
|
||||
return VK_POLYGON_MODE_POINT;
|
||||
case SHADER_PRIM_LINES:
|
||||
return VK_POLYGON_MODE_LINE;
|
||||
case SHADER_PRIM_TRIANGLES:
|
||||
return pipeline->polygon_mode;
|
||||
default:
|
||||
unreachable("invalid primitive type for mesh");
|
||||
}
|
||||
} else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) {
|
||||
switch (get_gs_prog_data(pipeline)->output_topology) {
|
||||
case _3DPRIM_POINTLIST:
|
||||
return VK_POLYGON_MODE_POINT;
|
||||
@@ -678,16 +771,22 @@ emit_rs_state(struct anv_graphics_pipeline *pipeline,
|
||||
sf.DerefBlockSize = urb_deref_block_size;
|
||||
#endif
|
||||
|
||||
bool point_from_shader;
|
||||
if (anv_pipeline_is_primitive(pipeline)) {
|
||||
const struct brw_vue_prog_data *last_vue_prog_data =
|
||||
anv_pipeline_get_last_vue_prog_data(pipeline);
|
||||
point_from_shader = last_vue_prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ;
|
||||
} else {
|
||||
assert(anv_pipeline_is_mesh(pipeline));
|
||||
const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline);
|
||||
point_from_shader = mesh_prog_data->map.start_dw[VARYING_SLOT_PSIZ] >= 0;
|
||||
}
|
||||
|
||||
if (last_vue_prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) {
|
||||
sf.PointWidthSource = Vertex;
|
||||
} else {
|
||||
sf.PointWidthSource = State;
|
||||
sf.PointWidth = 1.0;
|
||||
}
|
||||
if (point_from_shader) {
|
||||
sf.PointWidthSource = Vertex;
|
||||
} else {
|
||||
sf.PointWidthSource = State;
|
||||
sf.PointWidth = 1.0;
|
||||
}
|
||||
|
||||
#if GFX_VER >= 8
|
||||
@@ -699,7 +798,7 @@ emit_rs_state(struct anv_graphics_pipeline *pipeline,
|
||||
#endif
|
||||
|
||||
VkPolygonMode raster_mode =
|
||||
genX(raster_polygon_mode)(pipeline, ia_info->topology);
|
||||
genX(raster_polygon_mode)(pipeline, ia_info ? ia_info->topology : VK_PRIMITIVE_TOPOLOGY_MAX_ENUM);
|
||||
bool dynamic_primitive_topology =
|
||||
dynamic_states & ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY;
|
||||
|
||||
@@ -1425,7 +1524,7 @@ emit_3dstate_clip(struct anv_graphics_pipeline *pipeline,
|
||||
* points and lines so we get "pop-free" clipping.
|
||||
*/
|
||||
VkPolygonMode raster_mode =
|
||||
genX(raster_polygon_mode)(pipeline, ia_info->topology);
|
||||
genX(raster_polygon_mode)(pipeline, ia_info ? ia_info->topology : VK_PRIMITIVE_TOPOLOGY_MAX_ENUM);
|
||||
clip.ViewportXYClipTestEnable =
|
||||
dynamic_states & ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY ?
|
||||
0 : (raster_mode == VK_POLYGON_MODE_FILL);
|
||||
@@ -1455,6 +1554,7 @@ emit_3dstate_clip(struct anv_graphics_pipeline *pipeline,
|
||||
clip.MinimumPointWidth = 0.125;
|
||||
clip.MaximumPointWidth = 255.875;
|
||||
|
||||
/* TODO(mesh): Multiview. */
|
||||
if (anv_pipeline_is_primitive(pipeline)) {
|
||||
const struct brw_vue_prog_data *last =
|
||||
anv_pipeline_get_last_vue_prog_data(pipeline);
|
||||
@@ -1498,6 +1598,17 @@ emit_3dstate_clip(struct anv_graphics_pipeline *pipeline,
|
||||
#endif
|
||||
|
||||
GENX(3DSTATE_CLIP_pack)(NULL, pipeline->gfx7.clip, &clip);
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
if (anv_pipeline_is_mesh(pipeline)) {
|
||||
const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline);
|
||||
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_CLIP_MESH), clip_mesh) {
|
||||
clip_mesh.PrimitiveHeaderEnable = mesh_prog_data->map.per_primitive_header_size_dw > 0;
|
||||
/* TODO(mesh): UserClipDistanceClipTestEnableBitmask. */
|
||||
/* TODO(mesh): UserClipDistanceCullTestEnableBitmask. */
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -2203,7 +2314,7 @@ emit_3dstate_wm(struct anv_graphics_pipeline *pipeline, struct anv_subpass *subp
|
||||
}
|
||||
|
||||
VkPolygonMode raster_mode =
|
||||
genX(raster_polygon_mode)(pipeline, ia->topology);
|
||||
genX(raster_polygon_mode)(pipeline, ia ? ia->topology : VK_PRIMITIVE_TOPOLOGY_MAX_ENUM);
|
||||
|
||||
wm.MultisampleRasterizationMode =
|
||||
dynamic_states & ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY ? 0 :
|
||||
@@ -2486,6 +2597,122 @@ emit_3dstate_primitive_replication(struct anv_graphics_pipeline *pipeline)
|
||||
}
|
||||
#endif
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
static void
|
||||
emit_task_state(struct anv_graphics_pipeline *pipeline)
|
||||
{
|
||||
assert(anv_pipeline_is_mesh(pipeline));
|
||||
|
||||
if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_TASK)) {
|
||||
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_TASK_CONTROL), zero);
|
||||
return;
|
||||
}
|
||||
|
||||
const struct anv_shader_bin *task_bin = pipeline->shaders[MESA_SHADER_TASK];
|
||||
|
||||
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_TASK_CONTROL), tc) {
|
||||
tc.TaskShaderEnable = true;
|
||||
tc.ScratchSpaceBuffer =
|
||||
get_scratch_surf(&pipeline->base, MESA_SHADER_TASK, task_bin);
|
||||
}
|
||||
|
||||
const struct intel_device_info *devinfo = &pipeline->base.device->info;
|
||||
const struct brw_task_prog_data *task_prog_data = get_task_prog_data(pipeline);
|
||||
const struct brw_cs_dispatch_info task_dispatch =
|
||||
brw_cs_get_dispatch_info(devinfo, &task_prog_data->base, NULL);
|
||||
|
||||
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_TASK_SHADER), task) {
|
||||
task.KernelStartPointer = task_bin->kernel.offset;
|
||||
task.SIMDSize = task_dispatch.simd_size / 16;
|
||||
task.MessageSIMD = task.SIMDSize;
|
||||
task.NumberofThreadsinGPGPUThreadGroup = task_dispatch.threads;
|
||||
task.ExecutionMask = task_dispatch.right_mask;
|
||||
task.LocalXMaximum = task_dispatch.group_size - 1;
|
||||
task.EmitLocalIDX = true;
|
||||
|
||||
task.NumberofBarriers = task_prog_data->base.uses_barrier;
|
||||
task.SharedLocalMemorySize =
|
||||
encode_slm_size(GFX_VER, task_prog_data->base.base.total_shared);
|
||||
}
|
||||
|
||||
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_TASK_SHADER_DATA), zero);
|
||||
|
||||
/* Recommended values from "Task and Mesh Distribution Programming". */
|
||||
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_TASK_REDISTRIB), redistrib) {
|
||||
redistrib.LocalBOTAccumulatorThreshold = MULTIPLIER_1;
|
||||
redistrib.SmallTaskThreshold = MULTIPLIER_2;
|
||||
redistrib.TargetMeshBatchSize = MULTIPLIER_4;
|
||||
redistrib.TaskRedistributionLevel = TASKREDISTRIB_BOM;
|
||||
redistrib.TaskRedistributionMode = TASKREDISTRIB_RR_FREE;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
emit_mesh_state(struct anv_graphics_pipeline *pipeline)
|
||||
{
|
||||
assert(anv_pipeline_is_mesh(pipeline));
|
||||
|
||||
const struct anv_shader_bin *mesh_bin = pipeline->shaders[MESA_SHADER_MESH];
|
||||
|
||||
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_MESH_CONTROL), mc) {
|
||||
mc.MeshShaderEnable = true;
|
||||
mc.ScratchSpaceBuffer =
|
||||
get_scratch_surf(&pipeline->base, MESA_SHADER_MESH, mesh_bin);
|
||||
|
||||
/* TODO(mesh): MaximumNumberofThreadGroups. */
|
||||
}
|
||||
|
||||
const struct intel_device_info *devinfo = &pipeline->base.device->info;
|
||||
const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline);
|
||||
const struct brw_cs_dispatch_info mesh_dispatch =
|
||||
brw_cs_get_dispatch_info(devinfo, &mesh_prog_data->base, NULL);
|
||||
|
||||
const unsigned output_topology =
|
||||
mesh_prog_data->primitive_type == SHADER_PRIM_POINTS ? OUTPUT_POINT :
|
||||
mesh_prog_data->primitive_type == SHADER_PRIM_LINES ? OUTPUT_LINE :
|
||||
OUTPUT_TRI;
|
||||
|
||||
uint32_t index_format;
|
||||
switch (mesh_prog_data->index_format) {
|
||||
case BRW_INDEX_FORMAT_U32:
|
||||
index_format = INDEX_U32;
|
||||
break;
|
||||
default:
|
||||
unreachable("invalid index format");
|
||||
}
|
||||
|
||||
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_MESH_SHADER), mesh) {
|
||||
mesh.KernelStartPointer = mesh_bin->kernel.offset;
|
||||
mesh.SIMDSize = mesh_dispatch.simd_size / 16;
|
||||
mesh.MessageSIMD = mesh.SIMDSize;
|
||||
mesh.NumberofThreadsinGPGPUThreadGroup = mesh_dispatch.threads;
|
||||
mesh.ExecutionMask = mesh_dispatch.right_mask;
|
||||
mesh.LocalXMaximum = mesh_dispatch.group_size - 1;
|
||||
mesh.EmitLocalIDX = true;
|
||||
|
||||
mesh.MaximumPrimitiveCount = mesh_prog_data->map.max_primitives - 1;
|
||||
mesh.OutputTopology = output_topology;
|
||||
mesh.PerVertexDataPitch = mesh_prog_data->map.per_vertex_pitch_dw / 8;
|
||||
mesh.PerPrimitiveDataPresent = mesh_prog_data->map.per_primitive_pitch_dw > 0;
|
||||
mesh.PerPrimitiveDataPitch = mesh_prog_data->map.per_primitive_pitch_dw / 8;
|
||||
mesh.IndexFormat = index_format;
|
||||
|
||||
mesh.NumberofBarriers = mesh_prog_data->base.uses_barrier;
|
||||
mesh.SharedLocalMemorySize =
|
||||
encode_slm_size(GFX_VER, mesh_prog_data->base.base.total_shared);
|
||||
}
|
||||
|
||||
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_MESH_SHADER_DATA), zero);
|
||||
|
||||
/* Recommended values from "Task and Mesh Distribution Programming". */
|
||||
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_MESH_DISTRIB), distrib) {
|
||||
distrib.DistributionMode = MESH_RR_FREE;
|
||||
distrib.TaskDistributionBatchSize = 2; /* 2^2 thread groups */
|
||||
distrib.MeshDistributionBatchSize = 3; /* 2^3 thread groups */
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static VkResult
|
||||
genX(graphics_pipeline_create)(
|
||||
VkDevice _device,
|
||||
@@ -2616,6 +2843,19 @@ genX(graphics_pipeline_create)(
|
||||
|
||||
emit_3dstate_streamout(pipeline, pCreateInfo->pRasterizationState,
|
||||
dynamic_states);
|
||||
#if GFX_VERx10 >= 125
|
||||
/* Disable Mesh. */
|
||||
if (device->physical->vk.supported_extensions.NV_mesh_shader) {
|
||||
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_MESH_CONTROL), zero);
|
||||
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_TASK_CONTROL), zero);
|
||||
}
|
||||
#endif
|
||||
} else {
|
||||
assert(anv_pipeline_is_mesh(pipeline));
|
||||
#if GFX_VERx10 >= 125
|
||||
emit_task_state(pipeline);
|
||||
emit_mesh_state(pipeline);
|
||||
#endif
|
||||
}
|
||||
|
||||
emit_3dstate_sbe(pipeline);
|
||||
|
Reference in New Issue
Block a user