diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 4ba70751f59..2caf51c76c8 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -358,7 +358,10 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT: return 4; - + case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY: + if (!is_nir_enabled(&rscreen->b)) + return 140; + FALLTHROUGH; case PIPE_CAP_GLSL_FEATURE_LEVEL: if (family >= CHIP_CEDAR) return is_nir_enabled(&rscreen->b) ? 450 : 430; @@ -367,9 +370,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) return 330; return 140; - case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY: - return 140; - /* Supported except the original R600. */ case PIPE_CAP_INDEP_BLEND_ENABLE: case PIPE_CAP_INDEP_BLEND_FUNC: diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index c0ff0b8bd87..1e92ca087ac 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -284,6 +284,8 @@ static void r600_set_clip_state(struct pipe_context *ctx, rctx->clip_state.state = *state; r600_mark_atom_dirty(rctx, &rctx->clip_state.atom); rctx->driver_consts[PIPE_SHADER_VERTEX].vs_ucp_dirty = true; + rctx->driver_consts[PIPE_SHADER_TESS_EVAL].vs_ucp_dirty = true; + rctx->driver_consts[PIPE_SHADER_GEOMETRY].vs_ucp_dirty = true; } static void r600_set_stencil_ref(struct pipe_context *ctx, @@ -1350,6 +1352,12 @@ void r600_update_driver_const_buffers(struct r600_context *rctx, bool compute_on start = compute_only ? PIPE_SHADER_COMPUTE : 0; end = compute_only ? PIPE_SHADER_TYPES : PIPE_SHADER_COMPUTE; + int last_vertex_stage = PIPE_SHADER_VERTEX; + if (rctx->tes_shader) + last_vertex_stage = PIPE_SHADER_TESS_EVAL; + if (rctx->gs_shader) + last_vertex_stage = PIPE_SHADER_GEOMETRY; + for (sh = start; sh < end; sh++) { struct r600_shader_driver_constants_info *info = &rctx->driver_consts[sh]; if (!info->vs_ucp_dirty && @@ -1362,7 +1370,9 @@ void r600_update_driver_const_buffers(struct r600_context *rctx, bool compute_on ptr = info->constants; size = info->alloc_size; if (info->vs_ucp_dirty) { - assert(sh == PIPE_SHADER_VERTEX); + assert(sh == PIPE_SHADER_VERTEX || + sh == PIPE_SHADER_GEOMETRY || + sh == PIPE_SHADER_TESS_EVAL); if (!size) { ptr = rctx->clip_state.state.ucp; size = R600_UCP_SIZE; @@ -1411,7 +1421,7 @@ void r600_update_driver_const_buffers(struct r600_context *rctx, bool compute_on if (info->texture_const_dirty) { assert (ptr); assert (size); - if (sh == PIPE_SHADER_VERTEX) + if (sh == last_vertex_stage) memcpy(ptr, rctx->clip_state.state.ucp, R600_UCP_SIZE); if (sh == PIPE_SHADER_FRAGMENT) memcpy(ptr, rctx->sample_positions, R600_UCP_SIZE); diff --git a/src/gallium/drivers/r600/sfn/sfn_nir.cpp b/src/gallium/drivers/r600/sfn/sfn_nir.cpp index fe5397e12d7..8f813c22831 100644 --- a/src/gallium/drivers/r600/sfn/sfn_nir.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_nir.cpp @@ -54,7 +54,6 @@ using std::vector; NirLowerInstruction::NirLowerInstruction(): b(nullptr) { - } bool NirLowerInstruction::filter_instr(const nir_instr *instr, const void *data) @@ -191,6 +190,75 @@ void sort_fsoutput(nir_shader *shader) exec_list_append(&shader->variables, &new_list); } +class LowerClipvertexWrite : public NirLowerInstruction { + +public: + LowerClipvertexWrite(int noutputs, pipe_stream_output_info& so_info) : + m_clipplane1(noutputs), + m_clipvtx(noutputs + 1), + m_so_info(so_info){} +private: + bool filter(const nir_instr *instr) const override { + if (instr->type != nir_instr_type_intrinsic) + return false; + + auto intr = nir_instr_as_intrinsic(instr); + if (intr->intrinsic != nir_intrinsic_store_output) + return false; + + return nir_intrinsic_io_semantics(intr).location == VARYING_SLOT_CLIP_VERTEX; + } + + nir_ssa_def *lower(nir_instr *instr) override { + + auto intr = nir_instr_as_intrinsic(instr); + nir_ssa_def *output[8] = {nullptr}; + + // for UBO loads we correct the buffer ID by adding 1 + auto buf_id = nir_imm_int(b, R600_BUFFER_INFO_CONST_BUFFER - 1); + + assert(intr->src[0].is_ssa); + auto clip_vtx = intr->src[0].ssa; + + for (int i = 0; i < 8; ++i) { + auto sel = nir_imm_int(b, i); + auto mrow = nir_load_ubo_vec4(b, 4, 32, buf_id, sel); + output[i] = nir_fdot4(b, clip_vtx, mrow); + } + + unsigned clip_vertex_index = nir_intrinsic_base(intr); + + for (int i = 0; i < 2; ++i) { + auto clip_i = nir_vec(b, &output[4 * i], 4); + auto store = nir_store_output(b, clip_i, intr->src[1].ssa); + nir_intrinsic_set_write_mask(store, 0xf); + nir_intrinsic_set_base(store, clip_vertex_index); + nir_io_semantics semantic = nir_intrinsic_io_semantics(intr); + semantic.location = VARYING_SLOT_CLIP_DIST0 + i; + semantic.no_varying = 1; + + if (i > 0) + nir_intrinsic_set_base(store, m_clipplane1); + nir_intrinsic_set_write_mask(store, 0xf); + nir_intrinsic_set_io_semantics(store, semantic); + } + nir_intrinsic_set_base(intr, m_clipvtx); + + nir_ssa_def *result = NIR_LOWER_INSTR_PROGRESS_REPLACE; + for (unsigned i = 0; i < m_so_info.num_outputs; ++i) { + if (m_so_info.output[i].register_index == clip_vertex_index) { + m_so_info.output[i].register_index = m_clipvtx; + result = NIR_LOWER_INSTR_PROGRESS; + } + } + return result; + } + int m_clipplane1; + int m_clipvtx; + pipe_stream_output_info& m_so_info; +}; + + } static nir_intrinsic_op @@ -278,6 +346,19 @@ r600_lower_deref_instr(nir_builder *b, nir_instr *instr_, UNUSED void *cb_data) return true; } + +static bool +r600_lower_clipvertex_to_clipdist(nir_shader *sh, + pipe_stream_output_info& so_info) +{ + if (!(sh->info.outputs_written & VARYING_BIT_CLIP_VERTEX)) + return false; + + int noutputs = util_bitcount64(sh->info.outputs_written); + bool result = r600::LowerClipvertexWrite(noutputs, so_info).run(sh); + return result; +} + static bool r600_nir_lower_atomics(nir_shader *shader) { @@ -504,6 +585,22 @@ bool has_saturate(const nir_function *func) return false; } +static bool r600_is_last_vertex_stage(nir_shader *nir, const r600_shader_key& key) +{ + if (nir->info.stage == MESA_SHADER_GEOMETRY) + return true; + + if (nir->info.stage == MESA_SHADER_TESS_EVAL && + !key.tes.as_es) + return true; + + if (nir->info.stage == MESA_SHADER_VERTEX && + !key.vs.as_es && !key.vs.as_ls) + return true; + + return false; +} + extern "C" bool r600_lower_to_scalar_instr_filter(const nir_instr *instr, const void *) { @@ -618,6 +715,9 @@ int r600_shader_from_nir(struct r600_context *rctx, auto sh = nir_shader_clone(sel->nir, sel->nir); + if (r600_is_last_vertex_stage(sh, *key)) + r600_lower_clipvertex_to_clipdist(sh, sel->so); + if (sh->info.stage == MESA_SHADER_TESS_CTRL || sh->info.stage == MESA_SHADER_TESS_EVAL || (sh->info.stage == MESA_SHADER_VERTEX && key->vs.as_ls)) { @@ -645,7 +745,6 @@ int r600_shader_from_nir(struct r600_context *rctx, NIR_PASS_V(sh, nir_lower_ubo_vec4); - if (lower_64bit) NIR_PASS_V(sh, r600::r600_nir_64_to_vec2); diff --git a/src/gallium/drivers/r600/sfn/sfn_nir.h b/src/gallium/drivers/r600/sfn/sfn_nir.h index 0514cc3f77b..73ce5c4b109 100644 --- a/src/gallium/drivers/r600/sfn/sfn_nir.h +++ b/src/gallium/drivers/r600/sfn/sfn_nir.h @@ -67,6 +67,8 @@ bool r600_merge_vec2_stores(nir_shader *shader); bool r600_split_64bit_uniforms_and_ubo(nir_shader *sh); bool r600_lower_64bit_to_vec2(nir_shader *sh); bool r600_split_64bit_alu_and_phi(nir_shader *sh); +bool r600_lower_clipvertex_to_clipdist(nir_shader *sh); + class AssemblyFromShader { public: diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_gs.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_gs.cpp index 7d8de053e76..52ebc184871 100644 --- a/src/gallium/drivers/r600/sfn/sfn_shader_gs.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_shader_gs.cpp @@ -85,20 +85,26 @@ bool GeometryShader::process_store_output(nir_intrinsic_instr *instr) tgsi_semantic name = (tgsi_semantic)semantic.first; auto write_mask = nir_intrinsic_write_mask(instr); ShaderOutput output(driver_location, name, write_mask); - output.set_sid(semantic.second); - add_output(output); - if (location == VARYING_SLOT_CLIP_DIST0 || - location == VARYING_SLOT_CLIP_DIST1) { - m_clip_dist_mask |= 1 << (location - VARYING_SLOT_CLIP_DIST0); - } + if (!nir_intrinsic_io_semantics(instr).no_varying) + output.set_sid(semantic.second); + if (nir_intrinsic_io_semantics(instr).location != VARYING_SLOT_CLIP_VERTEX) + add_output(output); if (location == VARYING_SLOT_VIEWPORT) { m_out_viewport = true; m_out_misc_write = true; - } - if (m_noutputs <= driver_location) + + if (location == VARYING_SLOT_CLIP_DIST0 || + location == VARYING_SLOT_CLIP_DIST1) { + auto write_mask = nir_intrinsic_write_mask(instr); + m_cc_dist_mask |= write_mask << (4 * (location - VARYING_SLOT_CLIP_DIST0)); + m_clip_dist_write |= write_mask << (4 * (location - VARYING_SLOT_CLIP_DIST0)); + } + + if (m_noutputs <= driver_location && + nir_intrinsic_io_semantics(instr).location != VARYING_SLOT_CLIP_VERTEX) m_noutputs = driver_location + 1; return true; @@ -222,17 +228,17 @@ bool GeometryShader::emit_vertex(nir_intrinsic_instr* instr, bool cut) auto ir = new AluInstr(op2_add_int, m_export_base[stream], m_export_base[stream], value_factory().literal(m_noutputs), AluInstr::last_write); - //ir->add_required_instr(cut_instr); emit_instruction(ir); } - - return true; } bool GeometryShader::store_output(nir_intrinsic_instr* instr) { + if (nir_intrinsic_io_semantics(instr).location == VARYING_SLOT_CLIP_VERTEX) + return true; + auto location = nir_intrinsic_io_semantics(instr).location; auto index = nir_src_as_const_value(instr->src[1]); assert(index); @@ -300,9 +306,6 @@ bool GeometryShader::store_output(nir_intrinsic_instr* instr) } } - - - return true; } @@ -347,6 +350,8 @@ void GeometryShader::do_get_shader_info(r600_shader *sh_info) { sh_info->processor_type = PIPE_SHADER_GEOMETRY; sh_info->ring_item_sizes[0] = m_ring_item_sizes[0]; + sh_info->cc_dist_mask = m_cc_dist_mask; + sh_info->clip_dist_write = m_clip_dist_write; } bool GeometryShader::read_prop(std::istream& is) diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_gs.h b/src/gallium/drivers/r600/sfn/sfn_shader_gs.h index 61bc49476f4..f26d73adf1e 100644 --- a/src/gallium/drivers/r600/sfn/sfn_shader_gs.h +++ b/src/gallium/drivers/r600/sfn/sfn_shader_gs.h @@ -50,7 +50,8 @@ private: bool m_first_vertex_emitted{false}; int m_offset{0}; int m_next_input_ring_offset{0}; - int m_clip_dist_mask{0}; + int m_cc_dist_mask{0}; + int m_clip_dist_write{0}; int m_cur_ring_output{0}; bool m_gs_tri_strip_adj_fix{false}; uint64_t m_input_mask{0}; diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_vs.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_vs.cpp index 8d6e750fb1b..8ded3d7abb4 100644 --- a/src/gallium/drivers/r600/sfn/sfn_shader_vs.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_shader_vs.cpp @@ -97,9 +97,13 @@ bool VertexExportForFs::do_store_output(const store_loc &store_info, nir_intrins case VARYING_SLOT_CLIP_VERTEX: return emit_clip_vertices(store_info, intr); case VARYING_SLOT_CLIP_DIST0: - case VARYING_SLOT_CLIP_DIST1: + case VARYING_SLOT_CLIP_DIST1: { + bool success = emit_varying_pos(store_info, intr); m_num_clip_dist += 4; - return emit_varying_param(store_info, intr) && emit_varying_pos(store_info, intr); + if (!nir_intrinsic_io_semantics(&intr).no_varying) + success &= emit_varying_param(store_info, intr); + return success; + } case VARYING_SLOT_LAYER: { m_out_misc_write = 1; m_vs_out_layer = 1; @@ -128,29 +132,6 @@ bool VertexExportForFs::emit_clip_vertices(const store_loc &store_info, const ni m_output_registers[nir_intrinsic_base(&instr)] = &m_clip_vertex; - RegisterVec4 clip_dist[2] = { vf.temp_vec4(pin_group), vf.temp_vec4(pin_group)}; - - for (int i = 0; i < 8; i++) { - int oreg = i >> 2; - int ochan = i & 3; - AluInstr *ir = nullptr; - AluInstr::SrcValues src(8); - - for (int j = 0; j < 4; j++) { - src[2 * j] = m_clip_vertex[j]; - src[2 * j + 1] = vf.uniform(512 + i, j, R600_BUFFER_INFO_CONST_BUFFER); - } - - ir = new AluInstr(op2_dot4_ieee, clip_dist[oreg][ochan], src, AluInstr::last_write, 4); - m_parent->emit_instruction(ir); - } - - m_last_pos_export = new ExportInstr(ExportInstr::pos, m_cur_clip_pos++, clip_dist[0]); - m_parent->emit_instruction(m_last_pos_export); - - m_last_pos_export = new ExportInstr(ExportInstr::pos, m_cur_clip_pos++, clip_dist[1]); - m_parent->emit_instruction(m_last_pos_export); - return true; } @@ -450,22 +431,24 @@ bool VertexShader::do_scan_instruction(nir_instr *instr) output.set_sid(sid); switch (location) { - case VARYING_SLOT_PSIZ: - case VARYING_SLOT_POS: - case VARYING_SLOT_CLIP_VERTEX: - case VARYING_SLOT_EDGE: { - break; - } case VARYING_SLOT_CLIP_DIST0: case VARYING_SLOT_CLIP_DIST1: + if (nir_intrinsic_io_semantics(intr).no_varying) + break; + FALLTHROUGH; case VARYING_SLOT_VIEWPORT: case VARYING_SLOT_LAYER: case VARYING_SLOT_VIEW_INDEX: default: output.set_is_param(true); + FALLTHROUGH; + case VARYING_SLOT_PSIZ: + case VARYING_SLOT_POS: + case VARYING_SLOT_CLIP_VERTEX: + case VARYING_SLOT_EDGE: + add_output(output); + break; } - add_output(output); - break; } case nir_intrinsic_load_vertex_id: m_sv_values.set(es_vertexid);