i965: Add tessellation evaluation shaders
The TES is essentially a post-tessellator VS, which has access to the entire TCS output patch, and a special gl_TessCoord input. Otherwise, they're very straightforward. This patch implements SIMD8 tessellation evaluation shaders for Gen8+. The tessellator can generate a lot of geometry, so operating in SIMD8 mode (8 vertices per thread) is more efficient than SIMD4x2 mode (only 2 vertices per thread). I have another patch which implements SIMD4x2 mode for older hardware (or via an environment variable override). We currently handle all inputs via the pull model. v2: Improve comments (suggested by Jordan Justen). Signed-off-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
This commit is contained in:
@@ -151,6 +151,7 @@ i965_FILES = \
|
||||
brw_state_upload.c \
|
||||
brw_structs.h \
|
||||
brw_tcs_surface_state.c \
|
||||
brw_tes.c \
|
||||
brw_tes_surface_state.c \
|
||||
brw_tex.c \
|
||||
brw_tex_layout.c \
|
||||
|
@@ -191,6 +191,14 @@ struct brw_vs_prog_key {
|
||||
struct brw_sampler_prog_key_data tex;
|
||||
};
|
||||
|
||||
/** The program key for Tessellation Evaluation Shaders. */
|
||||
struct brw_tes_prog_key
|
||||
{
|
||||
unsigned program_string_id;
|
||||
|
||||
struct brw_sampler_prog_key_data tex;
|
||||
};
|
||||
|
||||
/** The program key for Geometry Shaders. */
|
||||
struct brw_gs_prog_key
|
||||
{
|
||||
@@ -668,6 +676,22 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
|
||||
unsigned *final_assembly_size,
|
||||
char **error_str);
|
||||
|
||||
/**
|
||||
* Compile a tessellation evaluation shader.
|
||||
*
|
||||
* Returns the final assembly and the program's size.
|
||||
*/
|
||||
const unsigned *
|
||||
brw_compile_tes(const struct brw_compiler *compiler, void *log_data,
|
||||
void *mem_ctx,
|
||||
const struct brw_tes_prog_key *key,
|
||||
struct brw_tes_prog_data *prog_data,
|
||||
const struct nir_shader *shader,
|
||||
struct gl_shader_program *shader_prog,
|
||||
int shader_time_index,
|
||||
unsigned *final_assembly_size,
|
||||
char **error_str);
|
||||
|
||||
/**
|
||||
* Compile a vertex shader.
|
||||
*
|
||||
|
@@ -1704,6 +1704,12 @@ brw_vertex_program_const(const struct gl_vertex_program *p)
|
||||
return (const struct brw_vertex_program *) p;
|
||||
}
|
||||
|
||||
static inline struct brw_tess_eval_program *
|
||||
brw_tess_eval_program(struct gl_tess_eval_program *p)
|
||||
{
|
||||
return (struct brw_tess_eval_program *) p;
|
||||
}
|
||||
|
||||
static inline struct brw_geometry_program *
|
||||
brw_geometry_program(struct gl_geometry_program *p)
|
||||
{
|
||||
|
@@ -1685,6 +1685,21 @@ fs_visitor::assign_vs_urb_setup()
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
fs_visitor::assign_tes_urb_setup()
|
||||
{
|
||||
assert(stage == MESA_SHADER_TESS_EVAL);
|
||||
|
||||
brw_vue_prog_data *vue_prog_data = (brw_vue_prog_data *) prog_data;
|
||||
|
||||
first_non_payload_grf += 8 * vue_prog_data->urb_read_length;
|
||||
|
||||
/* Rewrite all ATTR file references to HW_REGs. */
|
||||
foreach_block_and_inst(block, fs_inst, inst, cfg) {
|
||||
convert_attr_sources_to_hw_regs(inst);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
fs_visitor::assign_gs_urb_setup()
|
||||
{
|
||||
@@ -5231,6 +5246,40 @@ fs_visitor::run_vs(gl_clip_plane *clip_planes)
|
||||
return !failed;
|
||||
}
|
||||
|
||||
bool
|
||||
fs_visitor::run_tes()
|
||||
{
|
||||
assert(stage == MESA_SHADER_TESS_EVAL);
|
||||
|
||||
/* R0: thread header, R1-3: gl_TessCoord.xyz, R4: URB handles */
|
||||
payload.num_regs = 5;
|
||||
|
||||
if (shader_time_index >= 0)
|
||||
emit_shader_time_begin();
|
||||
|
||||
emit_nir_code();
|
||||
|
||||
if (failed)
|
||||
return false;
|
||||
|
||||
emit_urb_writes();
|
||||
|
||||
if (shader_time_index >= 0)
|
||||
emit_shader_time_end();
|
||||
|
||||
calculate_cfg();
|
||||
|
||||
optimize();
|
||||
|
||||
assign_curb_setup();
|
||||
assign_tes_urb_setup();
|
||||
|
||||
fixup_3src_null_dest();
|
||||
allocate_registers();
|
||||
|
||||
return !failed;
|
||||
}
|
||||
|
||||
bool
|
||||
fs_visitor::run_gs()
|
||||
{
|
||||
|
@@ -81,7 +81,8 @@ public:
|
||||
struct gl_program *prog,
|
||||
const nir_shader *shader,
|
||||
unsigned dispatch_width,
|
||||
int shader_time_index);
|
||||
int shader_time_index,
|
||||
const struct brw_vue_map *input_vue_map = NULL);
|
||||
fs_visitor(const struct brw_compiler *compiler, void *log_data,
|
||||
void *mem_ctx,
|
||||
struct brw_gs_compile *gs_compile,
|
||||
@@ -109,6 +110,7 @@ public:
|
||||
|
||||
bool run_fs(bool do_rep_send);
|
||||
bool run_vs(gl_clip_plane *clip_planes);
|
||||
bool run_tes();
|
||||
bool run_gs();
|
||||
bool run_cs();
|
||||
void optimize();
|
||||
@@ -124,6 +126,7 @@ public:
|
||||
void assign_urb_setup();
|
||||
void convert_attr_sources_to_hw_regs(fs_inst *inst);
|
||||
void assign_vs_urb_setup();
|
||||
void assign_tes_urb_setup();
|
||||
void assign_gs_urb_setup();
|
||||
bool assign_regs(bool allow_spilling);
|
||||
void assign_regs_trivial();
|
||||
@@ -249,6 +252,8 @@ public:
|
||||
nir_intrinsic_instr *instr);
|
||||
void nir_emit_intrinsic(const brw::fs_builder &bld,
|
||||
nir_intrinsic_instr *instr);
|
||||
void nir_emit_tes_intrinsic(const brw::fs_builder &bld,
|
||||
nir_intrinsic_instr *instr);
|
||||
void nir_emit_ssbo_atomic(const brw::fs_builder &bld,
|
||||
int op, nir_intrinsic_instr *instr);
|
||||
void nir_emit_shared_atomic(const brw::fs_builder &bld,
|
||||
@@ -260,6 +265,7 @@ public:
|
||||
fs_reg get_nir_src(nir_src src);
|
||||
fs_reg get_nir_dest(nir_dest dest);
|
||||
fs_reg get_nir_image_deref(const nir_deref_var *deref);
|
||||
fs_reg get_indirect_offset(nir_intrinsic_instr *instr);
|
||||
void emit_percomp(const brw::fs_builder &bld, const fs_inst &inst,
|
||||
unsigned wr_mask);
|
||||
|
||||
@@ -313,6 +319,8 @@ public:
|
||||
struct brw_stage_prog_data *prog_data;
|
||||
struct gl_program *prog;
|
||||
|
||||
const struct brw_vue_map *input_vue_map;
|
||||
|
||||
int *param_size;
|
||||
|
||||
int *virtual_grf_start;
|
||||
|
@@ -123,6 +123,7 @@ fs_visitor::nir_setup_outputs()
|
||||
|
||||
switch (stage) {
|
||||
case MESA_SHADER_VERTEX:
|
||||
case MESA_SHADER_TESS_EVAL:
|
||||
case MESA_SHADER_GEOMETRY: {
|
||||
unsigned location = var->data.location;
|
||||
nir_setup_single_output_varying(®, var->type, &location);
|
||||
@@ -443,6 +444,9 @@ fs_visitor::nir_emit_instr(nir_instr *instr)
|
||||
case MESA_SHADER_VERTEX:
|
||||
nir_emit_vs_intrinsic(abld, nir_instr_as_intrinsic(instr));
|
||||
break;
|
||||
case MESA_SHADER_TESS_EVAL:
|
||||
nir_emit_tes_intrinsic(abld, nir_instr_as_intrinsic(instr));
|
||||
break;
|
||||
case MESA_SHADER_GEOMETRY:
|
||||
nir_emit_gs_intrinsic(abld, nir_instr_as_intrinsic(instr));
|
||||
break;
|
||||
@@ -1709,6 +1713,24 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst,
|
||||
}
|
||||
}
|
||||
|
||||
fs_reg
|
||||
fs_visitor::get_indirect_offset(nir_intrinsic_instr *instr)
|
||||
{
|
||||
nir_src *offset_src = nir_get_io_offset_src(instr);
|
||||
nir_const_value *const_value = nir_src_as_const_value(*offset_src);
|
||||
|
||||
if (const_value) {
|
||||
/* The only constant offset we should find is 0. brw_nir.c's
|
||||
* add_const_offset_to_base() will fold other constant offsets
|
||||
* into instr->const_index[0].
|
||||
*/
|
||||
assert(const_value->u[0] == 0);
|
||||
return fs_reg();
|
||||
}
|
||||
|
||||
return get_nir_src(*offset_src);
|
||||
}
|
||||
|
||||
void
|
||||
fs_visitor::nir_emit_vs_intrinsic(const fs_builder &bld,
|
||||
nir_intrinsic_instr *instr)
|
||||
@@ -1740,6 +1762,106 @@ fs_visitor::nir_emit_vs_intrinsic(const fs_builder &bld,
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
fs_visitor::nir_emit_tes_intrinsic(const fs_builder &bld,
|
||||
nir_intrinsic_instr *instr)
|
||||
{
|
||||
assert(stage == MESA_SHADER_TESS_EVAL);
|
||||
struct brw_tes_prog_data *tes_prog_data = (struct brw_tes_prog_data *) prog_data;
|
||||
|
||||
fs_reg dest;
|
||||
if (nir_intrinsic_infos[instr->intrinsic].has_dest)
|
||||
dest = get_nir_dest(instr->dest);
|
||||
|
||||
switch (instr->intrinsic) {
|
||||
case nir_intrinsic_load_primitive_id:
|
||||
bld.MOV(dest, fs_reg(brw_vec1_grf(0, 1)));
|
||||
break;
|
||||
case nir_intrinsic_load_tess_coord:
|
||||
/* gl_TessCoord is part of the payload in g1-3 */
|
||||
for (unsigned i = 0; i < 3; i++) {
|
||||
bld.MOV(offset(dest, bld, i), fs_reg(brw_vec8_grf(1 + i, 0)));
|
||||
}
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_tess_level_outer:
|
||||
/* When the TES reads gl_TessLevelOuter, we ensure that the patch header
|
||||
* appears as a push-model input. So, we can simply use the ATTR file
|
||||
* rather than issuing URB read messages. The data is stored in the
|
||||
* high DWords in reverse order - DWord 7 contains .x, DWord 6 contains
|
||||
* .y, and so on.
|
||||
*/
|
||||
switch (tes_prog_data->domain) {
|
||||
case BRW_TESS_DOMAIN_QUAD:
|
||||
for (unsigned i = 0; i < 4; i++)
|
||||
bld.MOV(offset(dest, bld, i), component(fs_reg(ATTR, 0), 7 - i));
|
||||
break;
|
||||
case BRW_TESS_DOMAIN_TRI:
|
||||
for (unsigned i = 0; i < 3; i++)
|
||||
bld.MOV(offset(dest, bld, i), component(fs_reg(ATTR, 0), 7 - i));
|
||||
break;
|
||||
case BRW_TESS_DOMAIN_ISOLINE:
|
||||
for (unsigned i = 0; i < 2; i++)
|
||||
bld.MOV(offset(dest, bld, i), component(fs_reg(ATTR, 0), 7 - i));
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_tess_level_inner:
|
||||
/* When the TES reads gl_TessLevelInner, we ensure that the patch header
|
||||
* appears as a push-model input. So, we can simply use the ATTR file
|
||||
* rather than issuing URB read messages.
|
||||
*/
|
||||
switch (tes_prog_data->domain) {
|
||||
case BRW_TESS_DOMAIN_QUAD:
|
||||
bld.MOV(dest, component(fs_reg(ATTR, 0), 3));
|
||||
bld.MOV(offset(dest, bld, 1), component(fs_reg(ATTR, 0), 2));
|
||||
break;
|
||||
case BRW_TESS_DOMAIN_TRI:
|
||||
bld.MOV(dest, component(fs_reg(ATTR, 0), 4));
|
||||
break;
|
||||
case BRW_TESS_DOMAIN_ISOLINE:
|
||||
/* ignore - value is undefined */
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_input:
|
||||
case nir_intrinsic_load_per_vertex_input: {
|
||||
fs_reg indirect_offset = get_indirect_offset(instr);
|
||||
unsigned imm_offset = instr->const_index[0];
|
||||
|
||||
fs_inst *inst;
|
||||
if (indirect_offset.file == BAD_FILE) {
|
||||
/* Replicate the patch handle to all enabled channels */
|
||||
fs_reg patch_handle = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
|
||||
bld.MOV(patch_handle, retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD));
|
||||
|
||||
inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dest, patch_handle);
|
||||
inst->mlen = 1;
|
||||
} else {
|
||||
/* Indirect indexing - use per-slot offsets as well. */
|
||||
const fs_reg srcs[] = {
|
||||
retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD),
|
||||
indirect_offset
|
||||
};
|
||||
fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, 2);
|
||||
bld.LOAD_PAYLOAD(payload, srcs, ARRAY_SIZE(srcs), 0);
|
||||
|
||||
inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, dest, payload);
|
||||
inst->mlen = 2;
|
||||
}
|
||||
inst->offset = imm_offset;
|
||||
inst->base_mrf = -1;
|
||||
inst->regs_written = instr->num_components;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
nir_emit_intrinsic(bld, instr);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
fs_visitor::nir_emit_gs_intrinsic(const fs_builder &bld,
|
||||
nir_intrinsic_instr *instr)
|
||||
|
@@ -700,7 +700,10 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
|
||||
fs_reg sources[8];
|
||||
fs_reg urb_handle;
|
||||
|
||||
urb_handle = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
|
||||
if (stage == MESA_SHADER_TESS_EVAL)
|
||||
urb_handle = fs_reg(retype(brw_vec8_grf(4, 0), BRW_REGISTER_TYPE_UD));
|
||||
else
|
||||
urb_handle = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
|
||||
|
||||
/* If we don't have any valid slots to write, just do a minimal urb write
|
||||
* send to terminate the shader. This includes 1 slot of undefined data,
|
||||
@@ -934,9 +937,11 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data,
|
||||
struct gl_program *prog,
|
||||
const nir_shader *shader,
|
||||
unsigned dispatch_width,
|
||||
int shader_time_index)
|
||||
int shader_time_index,
|
||||
const struct brw_vue_map *input_vue_map)
|
||||
: backend_shader(compiler, log_data, mem_ctx, shader, prog_data),
|
||||
key(key), gs_compile(NULL), prog_data(prog_data), prog(prog),
|
||||
input_vue_map(input_vue_map),
|
||||
dispatch_width(dispatch_width),
|
||||
shader_time_index(shader_time_index),
|
||||
bld(fs_builder(this, dispatch_width).at_end())
|
||||
@@ -972,6 +977,9 @@ fs_visitor::init()
|
||||
case MESA_SHADER_VERTEX:
|
||||
key_tex = &((const brw_vs_prog_key *) key)->tex;
|
||||
break;
|
||||
case MESA_SHADER_TESS_EVAL:
|
||||
key_tex = &((const brw_tes_prog_key *) key)->tex;
|
||||
break;
|
||||
case MESA_SHADER_GEOMETRY:
|
||||
key_tex = &((const brw_gs_prog_key *) key)->tex;
|
||||
break;
|
||||
|
@@ -42,6 +42,7 @@ brw_shader_precompile(struct gl_context *ctx,
|
||||
struct gl_shader_program *sh_prog)
|
||||
{
|
||||
struct gl_shader *vs = sh_prog->_LinkedShaders[MESA_SHADER_VERTEX];
|
||||
struct gl_shader *tes = sh_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL];
|
||||
struct gl_shader *gs = sh_prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
|
||||
struct gl_shader *fs = sh_prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
|
||||
struct gl_shader *cs = sh_prog->_LinkedShaders[MESA_SHADER_COMPUTE];
|
||||
@@ -52,6 +53,9 @@ brw_shader_precompile(struct gl_context *ctx,
|
||||
if (gs && !brw_gs_precompile(ctx, sh_prog, gs->Program))
|
||||
return false;
|
||||
|
||||
if (tes && !brw_tes_precompile(ctx, sh_prog, tes->Program))
|
||||
return false;
|
||||
|
||||
if (vs && !brw_vs_precompile(ctx, sh_prog, vs->Program))
|
||||
return false;
|
||||
|
||||
|
@@ -56,6 +56,8 @@ void
|
||||
brw_dump_ir(const char *stage, struct gl_shader_program *shader_prog,
|
||||
struct gl_shader *shader, struct gl_program *prog);
|
||||
|
||||
void brw_upload_tes_prog(struct brw_context *brw);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
@@ -24,6 +24,7 @@
|
||||
#include "brw_context.h"
|
||||
#include "brw_cfg.h"
|
||||
#include "brw_eu.h"
|
||||
#include "brw_fs.h"
|
||||
#include "brw_nir.h"
|
||||
#include "glsl/glsl_parser_extras.h"
|
||||
#include "main/shaderobj.h"
|
||||
@@ -84,6 +85,7 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
|
||||
|
||||
compiler->scalar_stage[MESA_SHADER_VERTEX] =
|
||||
devinfo->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS);
|
||||
compiler->scalar_stage[MESA_SHADER_TESS_EVAL] = true;
|
||||
compiler->scalar_stage[MESA_SHADER_GEOMETRY] =
|
||||
devinfo->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_GS", false);
|
||||
compiler->scalar_stage[MESA_SHADER_FRAGMENT] = true;
|
||||
@@ -135,6 +137,8 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
|
||||
compiler->glsl_compiler_options[i].LowerBufferInterfaceBlocks = true;
|
||||
}
|
||||
|
||||
compiler->glsl_compiler_options[MESA_SHADER_TESS_EVAL].EmitNoIndirectInput = false;
|
||||
|
||||
if (compiler->scalar_stage[MESA_SHADER_GEOMETRY])
|
||||
compiler->glsl_compiler_options[MESA_SHADER_GEOMETRY].EmitNoIndirectInput = false;
|
||||
|
||||
@@ -1289,3 +1293,93 @@ gl_clip_plane *brw_select_clip_planes(struct gl_context *ctx)
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" const unsigned *
|
||||
brw_compile_tes(const struct brw_compiler *compiler,
|
||||
void *log_data,
|
||||
void *mem_ctx,
|
||||
const struct brw_tes_prog_key *key,
|
||||
struct brw_tes_prog_data *prog_data,
|
||||
const nir_shader *src_shader,
|
||||
struct gl_shader_program *shader_prog,
|
||||
int shader_time_index,
|
||||
unsigned *final_assembly_size,
|
||||
char **error_str)
|
||||
{
|
||||
const struct brw_device_info *devinfo = compiler->devinfo;
|
||||
struct gl_shader *shader =
|
||||
shader_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL];
|
||||
const bool is_scalar = compiler->scalar_stage[MESA_SHADER_TESS_EVAL];
|
||||
|
||||
nir_shader *nir = nir_shader_clone(mem_ctx, src_shader);
|
||||
nir = brw_nir_apply_sampler_key(nir, devinfo, &key->tex, is_scalar);
|
||||
nir = brw_postprocess_nir(nir, compiler->devinfo, is_scalar);
|
||||
|
||||
brw_compute_vue_map(devinfo, &prog_data->base.vue_map,
|
||||
nir->info.outputs_written,
|
||||
nir->info.separate_shader);
|
||||
|
||||
unsigned output_size_bytes = prog_data->base.vue_map.num_slots * 4 * 4;
|
||||
|
||||
assert(output_size_bytes >= 1);
|
||||
if (output_size_bytes > GEN7_MAX_DS_URB_ENTRY_SIZE_BYTES) {
|
||||
if (error_str)
|
||||
*error_str = ralloc_strdup(mem_ctx, "DS outputs exceed maximum size");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* URB entry sizes are stored as a multiple of 64 bytes. */
|
||||
prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
|
||||
|
||||
struct brw_vue_map input_vue_map;
|
||||
brw_compute_tess_vue_map(&input_vue_map,
|
||||
nir->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID,
|
||||
nir->info.patch_inputs_read);
|
||||
|
||||
bool need_patch_header = nir->info.system_values_read &
|
||||
(BITFIELD64_BIT(SYSTEM_VALUE_TESS_LEVEL_OUTER) |
|
||||
BITFIELD64_BIT(SYSTEM_VALUE_TESS_LEVEL_INNER));
|
||||
|
||||
/* The TES will pull most inputs using URB read messages.
|
||||
*
|
||||
* However, we push the patch header for TessLevel factors when required,
|
||||
* as it's a tiny amount of extra data.
|
||||
*/
|
||||
prog_data->base.urb_read_length = need_patch_header ? 1 : 0;
|
||||
|
||||
if (unlikely(INTEL_DEBUG & DEBUG_TES)) {
|
||||
fprintf(stderr, "TES Input ");
|
||||
brw_print_vue_map(stderr, &input_vue_map);
|
||||
fprintf(stderr, "TES Output ");
|
||||
brw_print_vue_map(stderr, &prog_data->base.vue_map);
|
||||
}
|
||||
|
||||
if (is_scalar) {
|
||||
fs_visitor v(compiler, log_data, mem_ctx, (void *) key,
|
||||
&prog_data->base.base, shader->Program, nir, 8,
|
||||
shader_time_index, &input_vue_map);
|
||||
if (!v.run_tes()) {
|
||||
if (error_str)
|
||||
*error_str = ralloc_strdup(mem_ctx, v.fail_msg);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8;
|
||||
|
||||
fs_generator g(compiler, log_data, mem_ctx, (void *) key,
|
||||
&prog_data->base.base, v.promoted_constants, false,
|
||||
"TES");
|
||||
if (unlikely(INTEL_DEBUG & DEBUG_TES)) {
|
||||
g.enable_debug(ralloc_asprintf(mem_ctx,
|
||||
"%s tessellation evaluation shader %s",
|
||||
nir->info.label ? nir->info.label
|
||||
: "unnamed",
|
||||
nir->info.name));
|
||||
}
|
||||
|
||||
g.generate_code(v.cfg, 8);
|
||||
|
||||
return g.get_assembly(final_assembly_size);
|
||||
} else {
|
||||
unreachable("XXX: vec4 tessellation evalation shaders not merged yet.");
|
||||
}
|
||||
}
|
||||
|
@@ -273,6 +273,9 @@ brw_assign_common_binding_table_offsets(gl_shader_stage stage,
|
||||
bool brw_vs_precompile(struct gl_context *ctx,
|
||||
struct gl_shader_program *shader_prog,
|
||||
struct gl_program *prog);
|
||||
bool brw_tes_precompile(struct gl_context *ctx,
|
||||
struct gl_shader_program *shader_prog,
|
||||
struct gl_program *prog);
|
||||
bool brw_gs_precompile(struct gl_context *ctx,
|
||||
struct gl_shader_program *shader_prog,
|
||||
struct gl_program *prog);
|
||||
|
@@ -678,6 +678,7 @@ brw_upload_programs(struct brw_context *brw,
|
||||
{
|
||||
if (pipeline == BRW_RENDER_PIPELINE) {
|
||||
brw_upload_vs_prog(brw);
|
||||
brw_upload_tes_prog(brw);
|
||||
|
||||
if (brw->gen < 6)
|
||||
brw_upload_ff_gs_prog(brw);
|
||||
@@ -691,6 +692,8 @@ brw_upload_programs(struct brw_context *brw,
|
||||
bool old_separate = brw->vue_map_geom_out.separate;
|
||||
if (brw->geometry_program)
|
||||
brw->vue_map_geom_out = brw->gs.prog_data->base.vue_map;
|
||||
else if (brw->tess_eval_program)
|
||||
brw->vue_map_geom_out = brw->tes.prog_data->base.vue_map;
|
||||
else
|
||||
brw->vue_map_geom_out = brw->vs.prog_data->base.vue_map;
|
||||
|
||||
|
300
src/mesa/drivers/dri/i965/brw_tes.c
Normal file
300
src/mesa/drivers/dri/i965/brw_tes.c
Normal file
@@ -0,0 +1,300 @@
|
||||
/*
|
||||
* Copyright © 2014 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file brw_tes.c
|
||||
*
|
||||
* Tessellation evaluation shader state upload code.
|
||||
*/
|
||||
|
||||
#include "brw_context.h"
|
||||
#include "brw_nir.h"
|
||||
#include "brw_program.h"
|
||||
#include "brw_shader.h"
|
||||
#include "brw_state.h"
|
||||
#include "program/prog_parameter.h"
|
||||
|
||||
static void
|
||||
brw_tes_debug_recompile(struct brw_context *brw,
|
||||
struct gl_shader_program *shader_prog,
|
||||
const struct brw_tes_prog_key *key)
|
||||
{
|
||||
struct brw_cache_item *c = NULL;
|
||||
const struct brw_tes_prog_key *old_key = NULL;
|
||||
bool found = false;
|
||||
|
||||
perf_debug("Recompiling tessellation evaluation shader for program %d\n",
|
||||
shader_prog->Name);
|
||||
|
||||
for (unsigned int i = 0; i < brw->cache.size; i++) {
|
||||
for (c = brw->cache.items[i]; c; c = c->next) {
|
||||
if (c->cache_id == BRW_CACHE_TES_PROG) {
|
||||
old_key = c->key;
|
||||
|
||||
if (old_key->program_string_id == key->program_string_id)
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (c)
|
||||
break;
|
||||
}
|
||||
|
||||
if (!c) {
|
||||
perf_debug(" Didn't find previous compile in the shader cache for "
|
||||
"debug\n");
|
||||
return;
|
||||
}
|
||||
|
||||
found |= brw_debug_recompile_sampler_key(brw, &old_key->tex, &key->tex);
|
||||
|
||||
if (!found) {
|
||||
perf_debug(" Something else\n");
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
brw_codegen_tes_prog(struct brw_context *brw,
|
||||
struct gl_shader_program *shader_prog,
|
||||
struct brw_tess_eval_program *tep,
|
||||
struct brw_tes_prog_key *key)
|
||||
{
|
||||
const struct brw_compiler *compiler = brw->intelScreen->compiler;
|
||||
const struct brw_device_info *devinfo = brw->intelScreen->devinfo;
|
||||
struct brw_stage_state *stage_state = &brw->tes.base;
|
||||
nir_shader *nir = tep->program.Base.nir;
|
||||
struct brw_tes_prog_data prog_data;
|
||||
bool start_busy = false;
|
||||
double start_time = 0;
|
||||
|
||||
memset(&prog_data, 0, sizeof(prog_data));
|
||||
|
||||
brw_assign_common_binding_table_offsets(MESA_SHADER_TESS_EVAL, devinfo,
|
||||
shader_prog, &tep->program.Base,
|
||||
&prog_data.base.base, 0);
|
||||
|
||||
switch (tep->program.Spacing) {
|
||||
case GL_EQUAL:
|
||||
prog_data.partitioning = BRW_TESS_PARTITIONING_INTEGER;
|
||||
break;
|
||||
case GL_FRACTIONAL_ODD:
|
||||
prog_data.partitioning = BRW_TESS_PARTITIONING_ODD_FRACTIONAL;
|
||||
break;
|
||||
case GL_FRACTIONAL_EVEN:
|
||||
prog_data.partitioning = BRW_TESS_PARTITIONING_EVEN_FRACTIONAL;
|
||||
break;
|
||||
default:
|
||||
unreachable("invalid domain shader spacing");
|
||||
}
|
||||
|
||||
switch (tep->program.PrimitiveMode) {
|
||||
case GL_QUADS:
|
||||
prog_data.domain = BRW_TESS_DOMAIN_QUAD;
|
||||
break;
|
||||
case GL_TRIANGLES:
|
||||
prog_data.domain = BRW_TESS_DOMAIN_TRI;
|
||||
break;
|
||||
case GL_ISOLINES:
|
||||
prog_data.domain = BRW_TESS_DOMAIN_ISOLINE;
|
||||
break;
|
||||
default:
|
||||
unreachable("invalid domain shader primitive mode");
|
||||
}
|
||||
|
||||
if (tep->program.PointMode) {
|
||||
prog_data.output_topology = BRW_TESS_OUTPUT_TOPOLOGY_POINT;
|
||||
} else if (tep->program.PrimitiveMode == GL_ISOLINES) {
|
||||
prog_data.output_topology = BRW_TESS_OUTPUT_TOPOLOGY_LINE;
|
||||
} else {
|
||||
/* Hardware winding order is backwards from OpenGL */
|
||||
switch (tep->program.VertexOrder) {
|
||||
case GL_CCW:
|
||||
prog_data.output_topology = BRW_TESS_OUTPUT_TOPOLOGY_TRI_CW;
|
||||
break;
|
||||
case GL_CW:
|
||||
prog_data.output_topology = BRW_TESS_OUTPUT_TOPOLOGY_TRI_CCW;
|
||||
break;
|
||||
default:
|
||||
unreachable("invalid domain shader vertex order");
|
||||
}
|
||||
}
|
||||
|
||||
/* Allocate the references to the uniforms that will end up in the
|
||||
* prog_data associated with the compiled program, and which will be freed
|
||||
* by the state cache.
|
||||
*
|
||||
* Note: param_count needs to be num_uniform_components * 4, since we add
|
||||
* padding around uniform values below vec4 size, so the worst case is that
|
||||
* every uniform is a float which gets padded to the size of a vec4.
|
||||
*/
|
||||
struct gl_shader *tes = shader_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL];
|
||||
int param_count = nir->num_uniforms;
|
||||
if (!compiler->scalar_stage[MESA_SHADER_TESS_EVAL])
|
||||
param_count *= 4;
|
||||
|
||||
prog_data.base.base.param =
|
||||
rzalloc_array(NULL, const gl_constant_value *, param_count);
|
||||
prog_data.base.base.pull_param =
|
||||
rzalloc_array(NULL, const gl_constant_value *, param_count);
|
||||
prog_data.base.base.image_param =
|
||||
rzalloc_array(NULL, struct brw_image_param, tes->NumImages);
|
||||
prog_data.base.base.nr_params = param_count;
|
||||
prog_data.base.base.nr_image_params = tes->NumImages;
|
||||
|
||||
brw_nir_setup_glsl_uniforms(nir, shader_prog, &tep->program.Base,
|
||||
&prog_data.base.base,
|
||||
compiler->scalar_stage[MESA_SHADER_TESS_EVAL]);
|
||||
|
||||
if (unlikely(INTEL_DEBUG & DEBUG_TES))
|
||||
brw_dump_ir("tessellation evaluation", shader_prog, tes, NULL);
|
||||
|
||||
int st_index = -1;
|
||||
if (unlikely(INTEL_DEBUG & DEBUG_SHADER_TIME))
|
||||
st_index = brw_get_shader_time_index(brw, shader_prog, NULL, ST_TES);
|
||||
|
||||
if (unlikely(brw->perf_debug)) {
|
||||
start_busy = brw->batch.last_bo && drm_intel_bo_busy(brw->batch.last_bo);
|
||||
start_time = get_time();
|
||||
}
|
||||
|
||||
void *mem_ctx = ralloc_context(NULL);
|
||||
unsigned program_size;
|
||||
char *error_str;
|
||||
const unsigned *program =
|
||||
brw_compile_tes(compiler, brw, mem_ctx, key, &prog_data, nir,
|
||||
shader_prog, st_index, &program_size, &error_str);
|
||||
if (program == NULL) {
|
||||
if (shader_prog) {
|
||||
shader_prog->LinkStatus = false;
|
||||
ralloc_strcat(&shader_prog->InfoLog, error_str);
|
||||
}
|
||||
|
||||
_mesa_problem(NULL, "Failed to compile tessellation evaluation shader: "
|
||||
"%s\n", error_str);
|
||||
|
||||
ralloc_free(mem_ctx);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (unlikely(brw->perf_debug)) {
|
||||
struct brw_shader *btes = (struct brw_shader *) tes;
|
||||
if (btes->compiled_once) {
|
||||
brw_tes_debug_recompile(brw, shader_prog, key);
|
||||
}
|
||||
if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) {
|
||||
perf_debug("TES compile took %.03f ms and stalled the GPU\n",
|
||||
(get_time() - start_time) * 1000);
|
||||
}
|
||||
btes->compiled_once = true;
|
||||
}
|
||||
|
||||
/* Scratch space is used for register spilling */
|
||||
if (prog_data.base.base.total_scratch) {
|
||||
brw_get_scratch_bo(brw, &stage_state->scratch_bo,
|
||||
prog_data.base.base.total_scratch *
|
||||
brw->max_ds_threads);
|
||||
}
|
||||
|
||||
brw_upload_cache(&brw->cache, BRW_CACHE_TES_PROG,
|
||||
key, sizeof(*key),
|
||||
program, program_size,
|
||||
&prog_data, sizeof(prog_data),
|
||||
&stage_state->prog_offset, &brw->tes.prog_data);
|
||||
ralloc_free(mem_ctx);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
brw_upload_tes_prog(struct brw_context *brw)
|
||||
{
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
struct gl_shader_program **current = ctx->_Shader->CurrentProgram;
|
||||
struct brw_stage_state *stage_state = &brw->tes.base;
|
||||
struct brw_tes_prog_key key;
|
||||
/* BRW_NEW_TESS_EVAL_PROGRAM */
|
||||
struct brw_tess_eval_program *tep =
|
||||
(struct brw_tess_eval_program *) brw->tess_eval_program;
|
||||
|
||||
if (!brw_state_dirty(brw,
|
||||
_NEW_TEXTURE,
|
||||
BRW_NEW_TESS_EVAL_PROGRAM))
|
||||
return;
|
||||
|
||||
if (tep == NULL) {
|
||||
/* Other state atoms had better not try to access prog_data, since
|
||||
* there's no TES program.
|
||||
*/
|
||||
brw->tes.prog_data = NULL;
|
||||
brw->tes.base.prog_data = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
struct gl_program *prog = &tep->program.Base;
|
||||
|
||||
memset(&key, 0, sizeof(key));
|
||||
|
||||
key.program_string_id = tep->id;
|
||||
|
||||
/* _NEW_TEXTURE */
|
||||
brw_populate_sampler_prog_key_data(ctx, prog, stage_state->sampler_count,
|
||||
&key.tex);
|
||||
|
||||
if (!brw_search_cache(&brw->cache, BRW_CACHE_TES_PROG,
|
||||
&key, sizeof(key),
|
||||
&stage_state->prog_offset, &brw->tes.prog_data)) {
|
||||
bool success = brw_codegen_tes_prog(brw, current[MESA_SHADER_TESS_EVAL],
|
||||
tep, &key);
|
||||
assert(success);
|
||||
(void)success;
|
||||
}
|
||||
brw->tes.base.prog_data = &brw->tes.prog_data->base.base;
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
brw_tes_precompile(struct gl_context *ctx,
|
||||
struct gl_shader_program *shader_prog,
|
||||
struct gl_program *prog)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct brw_tes_prog_key key;
|
||||
uint32_t old_prog_offset = brw->tes.base.prog_offset;
|
||||
struct brw_tes_prog_data *old_prog_data = brw->tes.prog_data;
|
||||
bool success;
|
||||
|
||||
struct gl_tess_eval_program *tep = (struct gl_tess_eval_program *)prog;
|
||||
struct brw_tess_eval_program *btep = brw_tess_eval_program(tep);
|
||||
|
||||
memset(&key, 0, sizeof(key));
|
||||
|
||||
key.program_string_id = btep->id;
|
||||
brw_setup_tex_for_precompile(brw, &key.tex, prog);
|
||||
|
||||
success = brw_codegen_tes_prog(brw, shader_prog, btep, &key);
|
||||
|
||||
brw->tes.base.prog_offset = old_prog_offset;
|
||||
brw->tes.prog_data = old_prog_data;
|
||||
|
||||
return success;
|
||||
}
|
Reference in New Issue
Block a user