diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c index b4d3c5043e4..9f0fe902d7c 100644 --- a/src/intel/vulkan/anv_blorp.c +++ b/src/intel/vulkan/anv_blorp.c @@ -92,9 +92,8 @@ upload_blorp_shader(struct blorp_batch *batch, uint32_t stage, void anv_device_init_blorp(struct anv_device *device) { - const struct intel_device_info *devinfo = &device->info; const struct blorp_config config = { - .use_mesh_shading = devinfo->has_mesh_shading, + .use_mesh_shading = device->physical->vk.supported_extensions.NV_mesh_shader, }; blorp_init(&device->blorp, device, &device->isl_dev, &config); diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index b043589f626..5fda4e5970f 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -1979,6 +1979,10 @@ anv_get_physical_device_properties_1_1(struct anv_physical_device *pdevice, VK_SHADER_STAGE_INTERSECTION_BIT_KHR | VK_SHADER_STAGE_CALLABLE_BIT_KHR; } + if (pdevice->vk.supported_extensions.NV_mesh_shader) { + scalar_stages |= VK_SHADER_STAGE_TASK_BIT_NV | + VK_SHADER_STAGE_MESH_BIT_NV; + } p->subgroupSupportedStages = scalar_stages; p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_VOTE_BIT | diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index cf6b06c6f5c..57637ee4cc0 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -135,6 +135,7 @@ anv_shader_compile_to_nir(struct anv_device *device, .int64 = pdevice->info.ver >= 8, .int64_atomics = pdevice->info.ver >= 9 && pdevice->use_softpin, .integer_functions2 = pdevice->info.ver >= 8, + .mesh_shading_nv = pdevice->vk.supported_extensions.NV_mesh_shader, .min_lod = true, .multiview = true, .physical_storage_buffer_address = pdevice->has_a64_buffer_access, @@ -526,6 +527,28 @@ pipeline_has_coarse_pixel(const struct anv_graphics_pipeline *pipeline, return true; } +static void +populate_task_prog_key(const struct intel_device_info *devinfo, + enum brw_subgroup_size_type subgroup_size_type, + bool robust_buffer_access, + struct brw_task_prog_key *key) +{ + memset(key, 0, sizeof(*key)); + + populate_base_prog_key(devinfo, subgroup_size_type, robust_buffer_access, &key->base); +} + +static void +populate_mesh_prog_key(const struct intel_device_info *devinfo, + enum brw_subgroup_size_type subgroup_size_type, + bool robust_buffer_access, + struct brw_mesh_prog_key *key) +{ + memset(key, 0, sizeof(*key)); + + populate_base_prog_key(devinfo, subgroup_size_type, robust_buffer_access, &key->base); +} + static void populate_wm_prog_key(const struct anv_graphics_pipeline *pipeline, VkPipelineShaderStageCreateFlags flags, @@ -907,6 +930,10 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline, } } + if (gl_shader_stage_is_compute(nir->info.stage) || + gl_shader_stage_is_mesh(nir->info.stage)) + NIR_PASS_V(nir, brw_nir_lower_cs_intrinsics); + stage->nir = nir; } @@ -1111,6 +1138,70 @@ anv_pipeline_compile_gs(const struct brw_compiler *compiler, gs_stage->code = brw_compile_gs(compiler, mem_ctx, ¶ms); } +static void +anv_pipeline_link_task(const struct brw_compiler *compiler, + struct anv_pipeline_stage *task_stage, + struct anv_pipeline_stage *next_stage) +{ + assert(next_stage); + assert(next_stage->stage == MESA_SHADER_MESH); + brw_nir_link_shaders(compiler, task_stage->nir, next_stage->nir); +} + +static void +anv_pipeline_compile_task(const struct brw_compiler *compiler, + void *mem_ctx, + struct anv_device *device, + struct anv_pipeline_stage *task_stage) +{ + task_stage->num_stats = 1; + + struct brw_compile_task_params params = { + .nir = task_stage->nir, + .key = &task_stage->key.task, + .prog_data = &task_stage->prog_data.task, + .stats = task_stage->stats, + .log_data = device, + }; + + task_stage->code = brw_compile_task(compiler, mem_ctx, ¶ms); +} + +static void +anv_pipeline_link_mesh(const struct brw_compiler *compiler, + struct anv_pipeline_stage *mesh_stage, + struct anv_pipeline_stage *next_stage) +{ + if (next_stage) { + brw_nir_link_shaders(compiler, mesh_stage->nir, next_stage->nir); + } +} + +static void +anv_pipeline_compile_mesh(const struct brw_compiler *compiler, + void *mem_ctx, + struct anv_device *device, + struct anv_pipeline_stage *mesh_stage, + struct anv_pipeline_stage *prev_stage) +{ + mesh_stage->num_stats = 1; + + struct brw_compile_mesh_params params = { + .nir = mesh_stage->nir, + .key = &mesh_stage->key.mesh, + .prog_data = &mesh_stage->prog_data.mesh, + .stats = mesh_stage->stats, + .log_data = device, + }; + + if (prev_stage) { + assert(prev_stage->stage == MESA_SHADER_TASK); + params.tue_map = &prev_stage->prog_data.task.map; + } + + mesh_stage->code = brw_compile_mesh(compiler, mem_ctx, ¶ms); +} + static void anv_pipeline_link_fs(const struct brw_compiler *compiler, struct anv_pipeline_stage *stage) @@ -1226,8 +1317,6 @@ anv_pipeline_compile_fs(const struct brw_compiler *compiler, * we need this before we call spirv_to_nir. */ assert(prev_stage); - fs_stage->key.wm.input_slots_valid = - prev_stage->prog_data.vue.vue_map.slots_valid; struct brw_compile_fs_params params = { .nir = fs_stage->nir, @@ -1239,6 +1328,14 @@ anv_pipeline_compile_fs(const struct brw_compiler *compiler, .log_data = device, }; + if (prev_stage->stage == MESA_SHADER_MESH) { + params.mue_map = &prev_stage->prog_data.mesh.map; + /* TODO(mesh): Slots valid, do we even use/rely on it? */ + } else { + fs_stage->key.wm.input_slots_valid = + prev_stage->prog_data.vue.vue_map.slots_valid; + } + fs_stage->code = brw_compile_fs(compiler, mem_ctx, ¶ms); fs_stage->num_stats = (uint32_t)fs_stage->prog_data.wm.dispatch_8 + @@ -1412,7 +1509,7 @@ anv_subgroup_size_type(gl_shader_stage stage, get_module_spirv_version(module) >= 0x10600; if (rss_info) { - assert(stage == MESA_SHADER_COMPUTE); + assert(gl_shader_stage_uses_workgroup(stage)); /* These enum values are expressly chosen to be equal to the subgroup * size that they require. */ @@ -1501,8 +1598,12 @@ anv_pipeline_compile_graphics(struct anv_graphics_pipeline *pipeline, stages[stage].spec_info, stages[stage].shader_sha1); + const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *rss_info = + vk_find_struct_const(sinfo->pNext, + PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT); + enum brw_subgroup_size_type subgroup_size_type = - anv_subgroup_size_type(stage, stages[stage].module, sinfo->flags, NULL); + anv_subgroup_size_type(stage, stages[stage].module, sinfo->flags, rss_info); const struct intel_device_info *devinfo = &pipeline->base.device->info; switch (stage) { @@ -1540,6 +1641,16 @@ anv_pipeline_compile_graphics(struct anv_graphics_pipeline *pipeline, &stages[stage].key.wm); break; } + case MESA_SHADER_TASK: + populate_task_prog_key(devinfo, subgroup_size_type, + pipeline->base.device->robust_buffer_access, + &stages[stage].key.task); + break; + case MESA_SHADER_MESH: + populate_mesh_prog_key(devinfo, subgroup_size_type, + pipeline->base.device->robust_buffer_access, + &stages[stage].key.mesh); + break; default: unreachable("Invalid graphics shader stage"); } @@ -1548,7 +1659,8 @@ anv_pipeline_compile_graphics(struct anv_graphics_pipeline *pipeline, stages[stage].feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT; } - assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT); + assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT || + pipeline->active_stages & VK_SHADER_STAGE_MESH_BIT_NV); ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout); @@ -1715,6 +1827,12 @@ anv_pipeline_compile_graphics(struct anv_graphics_pipeline *pipeline, case MESA_SHADER_GEOMETRY: anv_pipeline_link_gs(compiler, &stages[s], next_stage); break; + case MESA_SHADER_TASK: + anv_pipeline_link_task(compiler, &stages[s], next_stage); + break; + case MESA_SHADER_MESH: + anv_pipeline_link_mesh(compiler, &stages[s], next_stage); + break; case MESA_SHADER_FRAGMENT: anv_pipeline_link_fs(compiler, &stages[s]); break; @@ -1832,6 +1950,14 @@ anv_pipeline_compile_graphics(struct anv_graphics_pipeline *pipeline, anv_pipeline_compile_gs(compiler, stage_ctx, pipeline->base.device, &stages[s], prev_stage); break; + case MESA_SHADER_TASK: + anv_pipeline_compile_task(compiler, stage_ctx, pipeline->base.device, + &stages[s]); + break; + case MESA_SHADER_MESH: + anv_pipeline_compile_mesh(compiler, stage_ctx, pipeline->base.device, + &stages[s], prev_stage); + break; case MESA_SHADER_FRAGMENT: anv_pipeline_compile_fs(compiler, stage_ctx, pipeline->base.device, &stages[s], prev_stage); @@ -2006,8 +2132,6 @@ anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline, anv_pipeline_lower_nir(&pipeline->base, mem_ctx, &stage, layout); - NIR_PASS_V(stage.nir, brw_nir_lower_cs_intrinsics); - stage.num_stats = 1; struct brw_compile_cs_params params = { @@ -2529,6 +2653,9 @@ anv_graphics_pipeline_init(struct anv_graphics_pipeline *pipeline, if (pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) pipeline->active_stages |= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; + if (anv_pipeline_is_mesh(pipeline)) + assert(device->physical->vk.supported_extensions.NV_mesh_shader); + copy_non_dynamic_state(pipeline, pCreateInfo); pipeline->depth_clamp_enable = pCreateInfo->pRasterizationState->depthClampEnable; @@ -2621,6 +2748,9 @@ anv_graphics_pipeline_init(struct anv_graphics_pipeline *pipeline, pipeline->topology = _3DPRIM_PATCHLIST(tess_info->patchControlPoints); else pipeline->topology = vk_to_intel_primitive_type[ia_info->topology]; + } else { + assert(anv_pipeline_is_mesh(pipeline)); + /* TODO(mesh): Mesh vs. Multiview with Instancing. */ } /* If rasterization is not enabled, ms_info must be ignored. */ diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index e5247634d1d..60ed616d904 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -3052,9 +3052,9 @@ struct anv_cmd_state { struct anv_state binding_tables[MESA_VULKAN_SHADER_STAGES]; struct anv_state samplers[MESA_VULKAN_SHADER_STAGES]; - unsigned char sampler_sha1s[MESA_SHADER_STAGES][20]; - unsigned char surface_sha1s[MESA_SHADER_STAGES][20]; - unsigned char push_sha1s[MESA_SHADER_STAGES][20]; + unsigned char sampler_sha1s[MESA_VULKAN_SHADER_STAGES][20]; + unsigned char surface_sha1s[MESA_VULKAN_SHADER_STAGES][20]; + unsigned char push_sha1s[MESA_VULKAN_SHADER_STAGES][20]; /** * Whether or not the gfx8 PMA fix is enabled. We ensure that, at the top @@ -3617,6 +3617,12 @@ anv_pipeline_is_primitive(const struct anv_graphics_pipeline *pipeline) return anv_pipeline_has_stage(pipeline, MESA_SHADER_VERTEX); } +static inline bool +anv_pipeline_is_mesh(const struct anv_graphics_pipeline *pipeline) +{ + return anv_pipeline_has_stage(pipeline, MESA_SHADER_MESH); +} + #define ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(prefix, stage) \ static inline const struct brw_##prefix##_prog_data * \ get_##prefix##_prog_data(const struct anv_graphics_pipeline *pipeline) \ @@ -3634,6 +3640,8 @@ ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(tcs, MESA_SHADER_TESS_CTRL) ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(tes, MESA_SHADER_TESS_EVAL) ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(gs, MESA_SHADER_GEOMETRY) ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(wm, MESA_SHADER_FRAGMENT) +ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(mesh, MESA_SHADER_MESH) +ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(task, MESA_SHADER_TASK) static inline const struct brw_cs_prog_data * get_cs_prog_data(const struct anv_compute_pipeline *pipeline) diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index f22399b625a..9714d50c6bb 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -2569,8 +2569,6 @@ void genX(CmdPipelineBarrier2KHR)( static void cmd_buffer_alloc_push_constants(struct anv_cmd_buffer *cmd_buffer) { - assert(anv_pipeline_is_primitive(cmd_buffer->state.gfx.pipeline)); - VkShaderStageFlags stages = cmd_buffer->state.gfx.pipeline->active_stages; @@ -2579,7 +2577,9 @@ cmd_buffer_alloc_push_constants(struct anv_cmd_buffer *cmd_buffer) * uses push concstants, this may be suboptimal. However, avoiding stalls * seems more important. */ - stages |= VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_VERTEX_BIT; + stages |= VK_SHADER_STAGE_FRAGMENT_BIT; + if (anv_pipeline_is_primitive(cmd_buffer->state.gfx.pipeline)) + stages |= VK_SHADER_STAGE_VERTEX_BIT; if (stages == cmd_buffer->state.gfx.push_constant_stages) return; @@ -3576,6 +3576,7 @@ cmd_buffer_emit_clip(struct anv_cmd_buffer *cmd_buffer) }; uint32_t dwords[GENX(3DSTATE_CLIP_length)]; + /* TODO(mesh): Multiview. */ struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; if (anv_pipeline_is_primitive(pipeline)) { const struct brw_vue_prog_data *last = @@ -4838,13 +4839,29 @@ void genX(CmdEndTransformFeedbackEXT)( cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_XFB_ENABLE; } +#if GFX_VERx10 >= 125 void genX(CmdDrawMeshTasksNV)( VkCommandBuffer commandBuffer, uint32_t taskCount, uint32_t firstTask) { - unreachable("Unimplemented"); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + if (anv_batch_has_error(&cmd_buffer->batch)) + return; + + /* TODO(mesh): Check if this is not emitting more packets than we need. */ + genX(cmd_buffer_flush_state)(cmd_buffer); + + /* TODO(mesh): Emit conditional render predicate. */ + + /* TODO(mesh): Support non-zero firstTask. */ + assert(firstTask == 0); + + anv_batch_emit(&cmd_buffer->batch, GENX(3DMESH_1D), m) { + m.ThreadGroupCountX = taskCount; + } } void @@ -4870,6 +4887,7 @@ genX(CmdDrawMeshTasksIndirectCountNV)( { unreachable("Unimplemented"); } +#endif /* GFX_VERx10 >= 125 */ void genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c index 8640d9a1ac2..44f1760384c 100644 --- a/src/intel/vulkan/genX_pipeline.c +++ b/src/intel/vulkan/genX_pipeline.c @@ -306,12 +306,71 @@ genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch, urb.VSNumberofURBEntries = entries[i]; } } +#if GFX_VERx10 >= 125 + if (device->physical->vk.supported_extensions.NV_mesh_shader) { + anv_batch_emit(batch, GENX(3DSTATE_URB_ALLOC_MESH), zero); + anv_batch_emit(batch, GENX(3DSTATE_URB_ALLOC_TASK), zero); + } +#endif } +#if GFX_VERx10 >= 125 +static void +emit_urb_setup_mesh(struct anv_graphics_pipeline *pipeline, + enum intel_urb_deref_block_size *deref_block_size) +{ + const struct intel_device_info *devinfo = &pipeline->base.device->info; + + const struct brw_task_prog_data *task_prog_data = + anv_pipeline_has_stage(pipeline, MESA_SHADER_TASK) ? + get_task_prog_data(pipeline) : NULL; + const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline); + + const struct intel_mesh_urb_allocation alloc = + intel_get_mesh_urb_config(devinfo, pipeline->base.l3_config, + task_prog_data ? task_prog_data->map.size_dw : 0, + mesh_prog_data->map.size_dw); + + /* Zero out the primitive pipeline URB allocations. */ + for (int i = 0; i <= MESA_SHADER_GEOMETRY; i++) { + anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_URB_VS), urb) { + urb._3DCommandSubOpcode += i; + } + } + + anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_URB_ALLOC_TASK), urb) { + if (task_prog_data) { + urb.TASKURBEntryAllocationSize = alloc.task_entry_size_64b - 1; + urb.TASKNumberofURBEntriesSlice0 = alloc.task_entries; + urb.TASKNumberofURBEntriesSliceN = alloc.task_entries; + urb.TASKURBStartingAddressSlice0 = alloc.task_starting_address_8kb; + urb.TASKURBStartingAddressSliceN = alloc.task_starting_address_8kb; + } + } + + anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_URB_ALLOC_MESH), urb) { + urb.MESHURBEntryAllocationSize = alloc.mesh_entry_size_64b - 1; + urb.MESHNumberofURBEntriesSlice0 = alloc.mesh_entries; + urb.MESHNumberofURBEntriesSliceN = alloc.mesh_entries; + urb.MESHURBStartingAddressSlice0 = alloc.mesh_starting_address_8kb; + urb.MESHURBStartingAddressSliceN = alloc.mesh_starting_address_8kb; + } + + *deref_block_size = alloc.deref_block_size; +} +#endif + static void emit_urb_setup(struct anv_graphics_pipeline *pipeline, enum intel_urb_deref_block_size *deref_block_size) { +#if GFX_VERx10 >= 125 + if (anv_pipeline_is_mesh(pipeline)) { + emit_urb_setup_mesh(pipeline, deref_block_size); + return; + } +#endif + unsigned entry_size[4]; for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) { const struct brw_vue_prog_data *prog_data = @@ -336,13 +395,20 @@ emit_3dstate_sbe(struct anv_graphics_pipeline *pipeline) anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_SBE), sbe); #if GFX_VER >= 8 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_SBE_SWIZ), sbe); +#endif +#if GFX_VERx10 >= 125 + if (anv_pipeline_is_mesh(pipeline)) + anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_SBE_MESH), sbe_mesh); #endif return; } struct GENX(3DSTATE_SBE) sbe = { GENX(3DSTATE_SBE_header), - .AttributeSwizzleEnable = true, + /* TODO(mesh): Figure out cases where we need attribute swizzling. See also + * calculate_urb_setup() and related functions. + */ + .AttributeSwizzleEnable = anv_pipeline_is_primitive(pipeline), .PointSpriteTextureCoordinateOrigin = UPPERLEFT, .NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs, .ConstantInterpolationEnable = wm_prog_data->flat_inputs, @@ -431,6 +497,22 @@ emit_3dstate_sbe(struct anv_graphics_pipeline *pipeline) #if GFX_VER >= 8 sbe.ForceVertexURBEntryReadOffset = true; sbe.ForceVertexURBEntryReadLength = true; +#endif + } else { + assert(anv_pipeline_is_mesh(pipeline)); +#if GFX_VERx10 >= 125 + const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline); + anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_SBE_MESH), sbe_mesh) { + const struct brw_mue_map *mue = &mesh_prog_data->map; + + assert(mue->per_vertex_header_size_dw % 8 == 0); + sbe_mesh.PerVertexURBEntryOutputReadOffset = mue->per_vertex_header_size_dw / 8; + sbe_mesh.PerVertexURBEntryOutputReadLength = DIV_ROUND_UP(mue->per_vertex_data_size_dw, 8); + + assert(mue->per_primitive_header_size_dw % 8 == 0); + sbe_mesh.PerPrimitiveURBEntryOutputReadOffset = mue->per_primitive_header_size_dw / 8; + sbe_mesh.PerPrimitiveURBEntryOutputReadLength = DIV_ROUND_UP(mue->per_primitive_data_size_dw, 8); + } #endif } @@ -457,7 +539,18 @@ VkPolygonMode genX(raster_polygon_mode)(struct anv_graphics_pipeline *pipeline, VkPrimitiveTopology primitive_topology) { - if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) { + if (anv_pipeline_is_mesh(pipeline)) { + switch (get_mesh_prog_data(pipeline)->primitive_type) { + case SHADER_PRIM_POINTS: + return VK_POLYGON_MODE_POINT; + case SHADER_PRIM_LINES: + return VK_POLYGON_MODE_LINE; + case SHADER_PRIM_TRIANGLES: + return pipeline->polygon_mode; + default: + unreachable("invalid primitive type for mesh"); + } + } else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) { switch (get_gs_prog_data(pipeline)->output_topology) { case _3DPRIM_POINTLIST: return VK_POLYGON_MODE_POINT; @@ -678,16 +771,22 @@ emit_rs_state(struct anv_graphics_pipeline *pipeline, sf.DerefBlockSize = urb_deref_block_size; #endif + bool point_from_shader; if (anv_pipeline_is_primitive(pipeline)) { const struct brw_vue_prog_data *last_vue_prog_data = anv_pipeline_get_last_vue_prog_data(pipeline); + point_from_shader = last_vue_prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ; + } else { + assert(anv_pipeline_is_mesh(pipeline)); + const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline); + point_from_shader = mesh_prog_data->map.start_dw[VARYING_SLOT_PSIZ] >= 0; + } - if (last_vue_prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) { - sf.PointWidthSource = Vertex; - } else { - sf.PointWidthSource = State; - sf.PointWidth = 1.0; - } + if (point_from_shader) { + sf.PointWidthSource = Vertex; + } else { + sf.PointWidthSource = State; + sf.PointWidth = 1.0; } #if GFX_VER >= 8 @@ -699,7 +798,7 @@ emit_rs_state(struct anv_graphics_pipeline *pipeline, #endif VkPolygonMode raster_mode = - genX(raster_polygon_mode)(pipeline, ia_info->topology); + genX(raster_polygon_mode)(pipeline, ia_info ? ia_info->topology : VK_PRIMITIVE_TOPOLOGY_MAX_ENUM); bool dynamic_primitive_topology = dynamic_states & ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY; @@ -1425,7 +1524,7 @@ emit_3dstate_clip(struct anv_graphics_pipeline *pipeline, * points and lines so we get "pop-free" clipping. */ VkPolygonMode raster_mode = - genX(raster_polygon_mode)(pipeline, ia_info->topology); + genX(raster_polygon_mode)(pipeline, ia_info ? ia_info->topology : VK_PRIMITIVE_TOPOLOGY_MAX_ENUM); clip.ViewportXYClipTestEnable = dynamic_states & ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY ? 0 : (raster_mode == VK_POLYGON_MODE_FILL); @@ -1455,6 +1554,7 @@ emit_3dstate_clip(struct anv_graphics_pipeline *pipeline, clip.MinimumPointWidth = 0.125; clip.MaximumPointWidth = 255.875; + /* TODO(mesh): Multiview. */ if (anv_pipeline_is_primitive(pipeline)) { const struct brw_vue_prog_data *last = anv_pipeline_get_last_vue_prog_data(pipeline); @@ -1498,6 +1598,17 @@ emit_3dstate_clip(struct anv_graphics_pipeline *pipeline, #endif GENX(3DSTATE_CLIP_pack)(NULL, pipeline->gfx7.clip, &clip); + +#if GFX_VERx10 >= 125 + if (anv_pipeline_is_mesh(pipeline)) { + const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline); + anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_CLIP_MESH), clip_mesh) { + clip_mesh.PrimitiveHeaderEnable = mesh_prog_data->map.per_primitive_header_size_dw > 0; + /* TODO(mesh): UserClipDistanceClipTestEnableBitmask. */ + /* TODO(mesh): UserClipDistanceCullTestEnableBitmask. */ + } + } +#endif } static void @@ -2203,7 +2314,7 @@ emit_3dstate_wm(struct anv_graphics_pipeline *pipeline, struct anv_subpass *subp } VkPolygonMode raster_mode = - genX(raster_polygon_mode)(pipeline, ia->topology); + genX(raster_polygon_mode)(pipeline, ia ? ia->topology : VK_PRIMITIVE_TOPOLOGY_MAX_ENUM); wm.MultisampleRasterizationMode = dynamic_states & ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY ? 0 : @@ -2486,6 +2597,122 @@ emit_3dstate_primitive_replication(struct anv_graphics_pipeline *pipeline) } #endif +#if GFX_VERx10 >= 125 +static void +emit_task_state(struct anv_graphics_pipeline *pipeline) +{ + assert(anv_pipeline_is_mesh(pipeline)); + + if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_TASK)) { + anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_TASK_CONTROL), zero); + return; + } + + const struct anv_shader_bin *task_bin = pipeline->shaders[MESA_SHADER_TASK]; + + anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_TASK_CONTROL), tc) { + tc.TaskShaderEnable = true; + tc.ScratchSpaceBuffer = + get_scratch_surf(&pipeline->base, MESA_SHADER_TASK, task_bin); + } + + const struct intel_device_info *devinfo = &pipeline->base.device->info; + const struct brw_task_prog_data *task_prog_data = get_task_prog_data(pipeline); + const struct brw_cs_dispatch_info task_dispatch = + brw_cs_get_dispatch_info(devinfo, &task_prog_data->base, NULL); + + anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_TASK_SHADER), task) { + task.KernelStartPointer = task_bin->kernel.offset; + task.SIMDSize = task_dispatch.simd_size / 16; + task.MessageSIMD = task.SIMDSize; + task.NumberofThreadsinGPGPUThreadGroup = task_dispatch.threads; + task.ExecutionMask = task_dispatch.right_mask; + task.LocalXMaximum = task_dispatch.group_size - 1; + task.EmitLocalIDX = true; + + task.NumberofBarriers = task_prog_data->base.uses_barrier; + task.SharedLocalMemorySize = + encode_slm_size(GFX_VER, task_prog_data->base.base.total_shared); + } + + anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_TASK_SHADER_DATA), zero); + + /* Recommended values from "Task and Mesh Distribution Programming". */ + anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_TASK_REDISTRIB), redistrib) { + redistrib.LocalBOTAccumulatorThreshold = MULTIPLIER_1; + redistrib.SmallTaskThreshold = MULTIPLIER_2; + redistrib.TargetMeshBatchSize = MULTIPLIER_4; + redistrib.TaskRedistributionLevel = TASKREDISTRIB_BOM; + redistrib.TaskRedistributionMode = TASKREDISTRIB_RR_FREE; + } +} + +static void +emit_mesh_state(struct anv_graphics_pipeline *pipeline) +{ + assert(anv_pipeline_is_mesh(pipeline)); + + const struct anv_shader_bin *mesh_bin = pipeline->shaders[MESA_SHADER_MESH]; + + anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_MESH_CONTROL), mc) { + mc.MeshShaderEnable = true; + mc.ScratchSpaceBuffer = + get_scratch_surf(&pipeline->base, MESA_SHADER_MESH, mesh_bin); + + /* TODO(mesh): MaximumNumberofThreadGroups. */ + } + + const struct intel_device_info *devinfo = &pipeline->base.device->info; + const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline); + const struct brw_cs_dispatch_info mesh_dispatch = + brw_cs_get_dispatch_info(devinfo, &mesh_prog_data->base, NULL); + + const unsigned output_topology = + mesh_prog_data->primitive_type == SHADER_PRIM_POINTS ? OUTPUT_POINT : + mesh_prog_data->primitive_type == SHADER_PRIM_LINES ? OUTPUT_LINE : + OUTPUT_TRI; + + uint32_t index_format; + switch (mesh_prog_data->index_format) { + case BRW_INDEX_FORMAT_U32: + index_format = INDEX_U32; + break; + default: + unreachable("invalid index format"); + } + + anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_MESH_SHADER), mesh) { + mesh.KernelStartPointer = mesh_bin->kernel.offset; + mesh.SIMDSize = mesh_dispatch.simd_size / 16; + mesh.MessageSIMD = mesh.SIMDSize; + mesh.NumberofThreadsinGPGPUThreadGroup = mesh_dispatch.threads; + mesh.ExecutionMask = mesh_dispatch.right_mask; + mesh.LocalXMaximum = mesh_dispatch.group_size - 1; + mesh.EmitLocalIDX = true; + + mesh.MaximumPrimitiveCount = mesh_prog_data->map.max_primitives - 1; + mesh.OutputTopology = output_topology; + mesh.PerVertexDataPitch = mesh_prog_data->map.per_vertex_pitch_dw / 8; + mesh.PerPrimitiveDataPresent = mesh_prog_data->map.per_primitive_pitch_dw > 0; + mesh.PerPrimitiveDataPitch = mesh_prog_data->map.per_primitive_pitch_dw / 8; + mesh.IndexFormat = index_format; + + mesh.NumberofBarriers = mesh_prog_data->base.uses_barrier; + mesh.SharedLocalMemorySize = + encode_slm_size(GFX_VER, mesh_prog_data->base.base.total_shared); + } + + anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_MESH_SHADER_DATA), zero); + + /* Recommended values from "Task and Mesh Distribution Programming". */ + anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_MESH_DISTRIB), distrib) { + distrib.DistributionMode = MESH_RR_FREE; + distrib.TaskDistributionBatchSize = 2; /* 2^2 thread groups */ + distrib.MeshDistributionBatchSize = 3; /* 2^3 thread groups */ + } +} +#endif + static VkResult genX(graphics_pipeline_create)( VkDevice _device, @@ -2616,6 +2843,19 @@ genX(graphics_pipeline_create)( emit_3dstate_streamout(pipeline, pCreateInfo->pRasterizationState, dynamic_states); +#if GFX_VERx10 >= 125 + /* Disable Mesh. */ + if (device->physical->vk.supported_extensions.NV_mesh_shader) { + anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_MESH_CONTROL), zero); + anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_TASK_CONTROL), zero); + } +#endif + } else { + assert(anv_pipeline_is_mesh(pipeline)); +#if GFX_VERx10 >= 125 + emit_task_state(pipeline); + emit_mesh_state(pipeline); +#endif } emit_3dstate_sbe(pipeline);