2011-05-26 09:57:36 -07:00
|
|
|
/*
|
|
|
|
* Copyright © 2010 Intel Corporation
|
|
|
|
*
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
|
|
* to deal in the Software without restriction, including without limitation
|
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice (including the next
|
|
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
|
|
* Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
|
|
* IN THE SOFTWARE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "brw_context.h"
|
2014-05-19 10:20:37 -07:00
|
|
|
#include "brw_cfg.h"
|
2015-11-22 18:27:42 -08:00
|
|
|
#include "brw_eu.h"
|
2015-11-10 14:35:27 -08:00
|
|
|
#include "brw_fs.h"
|
2015-04-07 15:15:09 -07:00
|
|
|
#include "brw_nir.h"
|
2013-06-12 16:57:11 -07:00
|
|
|
#include "glsl/glsl_parser_extras.h"
|
2015-11-22 18:27:42 -08:00
|
|
|
#include "main/shaderobj.h"
|
|
|
|
#include "main/uniforms.h"
|
2015-11-18 16:43:31 -05:00
|
|
|
#include "util/debug.h"
|
2011-05-26 09:57:36 -07:00
|
|
|
|
2015-04-16 14:13:52 -07:00
|
|
|
static void
|
|
|
|
shader_debug_log_mesa(void *data, const char *fmt, ...)
|
|
|
|
{
|
|
|
|
struct brw_context *brw = (struct brw_context *)data;
|
|
|
|
va_list args;
|
|
|
|
|
|
|
|
va_start(args, fmt);
|
|
|
|
GLuint msg_id = 0;
|
|
|
|
_mesa_gl_vdebug(&brw->ctx, &msg_id,
|
|
|
|
MESA_DEBUG_SOURCE_SHADER_COMPILER,
|
|
|
|
MESA_DEBUG_TYPE_OTHER,
|
|
|
|
MESA_DEBUG_SEVERITY_NOTIFICATION, fmt, args);
|
|
|
|
va_end(args);
|
|
|
|
}
|
|
|
|
|
2015-06-22 17:01:22 -07:00
|
|
|
static void
|
|
|
|
shader_perf_log_mesa(void *data, const char *fmt, ...)
|
|
|
|
{
|
|
|
|
struct brw_context *brw = (struct brw_context *)data;
|
|
|
|
|
|
|
|
va_list args;
|
|
|
|
va_start(args, fmt);
|
|
|
|
|
|
|
|
if (unlikely(INTEL_DEBUG & DEBUG_PERF)) {
|
|
|
|
va_list args_copy;
|
|
|
|
va_copy(args_copy, args);
|
|
|
|
vfprintf(stderr, fmt, args_copy);
|
|
|
|
va_end(args_copy);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (brw->perf_debug) {
|
|
|
|
GLuint msg_id = 0;
|
|
|
|
_mesa_gl_vdebug(&brw->ctx, &msg_id,
|
|
|
|
MESA_DEBUG_SOURCE_SHADER_COMPILER,
|
|
|
|
MESA_DEBUG_TYPE_PERFORMANCE,
|
|
|
|
MESA_DEBUG_SEVERITY_MEDIUM, fmt, args);
|
|
|
|
}
|
|
|
|
va_end(args);
|
|
|
|
}
|
|
|
|
|
2015-04-16 12:01:09 -07:00
|
|
|
struct brw_compiler *
|
|
|
|
brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
|
|
|
|
{
|
|
|
|
struct brw_compiler *compiler = rzalloc(mem_ctx, struct brw_compiler);
|
|
|
|
|
|
|
|
compiler->devinfo = devinfo;
|
2015-04-16 14:13:52 -07:00
|
|
|
compiler->shader_debug_log = shader_debug_log_mesa;
|
2015-06-22 17:01:22 -07:00
|
|
|
compiler->shader_perf_log = shader_perf_log_mesa;
|
2015-04-16 12:01:09 -07:00
|
|
|
|
|
|
|
brw_fs_alloc_reg_sets(compiler);
|
|
|
|
brw_vec4_alloc_reg_set(compiler);
|
|
|
|
|
2015-11-12 13:32:13 -08:00
|
|
|
compiler->scalar_stage[MESA_SHADER_VERTEX] =
|
|
|
|
devinfo->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS);
|
2015-11-17 01:07:39 -08:00
|
|
|
compiler->scalar_stage[MESA_SHADER_TESS_CTRL] = false;
|
2015-11-10 14:35:27 -08:00
|
|
|
compiler->scalar_stage[MESA_SHADER_TESS_EVAL] = true;
|
2015-11-12 13:32:13 -08:00
|
|
|
compiler->scalar_stage[MESA_SHADER_GEOMETRY] =
|
2015-11-18 16:43:31 -05:00
|
|
|
devinfo->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_GS", false);
|
2015-11-12 13:32:13 -08:00
|
|
|
compiler->scalar_stage[MESA_SHADER_FRAGMENT] = true;
|
|
|
|
compiler->scalar_stage[MESA_SHADER_COMPUTE] = true;
|
2015-06-26 16:20:21 -07:00
|
|
|
|
2015-04-16 15:28:17 -07:00
|
|
|
nir_shader_compiler_options *nir_options =
|
|
|
|
rzalloc(compiler, nir_shader_compiler_options);
|
|
|
|
nir_options->native_integers = true;
|
|
|
|
/* In order to help allow for better CSE at the NIR level we tell NIR
|
|
|
|
* to split all ffma instructions during opt_algebraic and we then
|
|
|
|
* re-combine them as a later step.
|
|
|
|
*/
|
|
|
|
nir_options->lower_ffma = true;
|
|
|
|
nir_options->lower_sub = true;
|
2015-09-10 11:08:15 -07:00
|
|
|
/* In the vec4 backend, our dpN instruction replicates its result to all
|
|
|
|
* the components of a vec4. We would like NIR to give us replicated fdot
|
|
|
|
* instructions because it can optimize better for us.
|
|
|
|
*
|
|
|
|
* For the FS backend, it should be lowered away by the scalarizing pass so
|
|
|
|
* we should never see fdot anyway.
|
|
|
|
*/
|
|
|
|
nir_options->fdot_replicates = true;
|
2015-04-16 15:28:17 -07:00
|
|
|
|
|
|
|
/* We want the GLSL compiler to emit code that uses condition codes */
|
|
|
|
for (int i = 0; i < MESA_SHADER_STAGES; i++) {
|
|
|
|
compiler->glsl_compiler_options[i].MaxUnrollIterations = 32;
|
|
|
|
compiler->glsl_compiler_options[i].MaxIfDepth =
|
|
|
|
devinfo->gen < 6 ? 16 : UINT_MAX;
|
|
|
|
|
|
|
|
compiler->glsl_compiler_options[i].EmitCondCodes = true;
|
|
|
|
compiler->glsl_compiler_options[i].EmitNoNoise = true;
|
|
|
|
compiler->glsl_compiler_options[i].EmitNoMainReturn = true;
|
|
|
|
compiler->glsl_compiler_options[i].EmitNoIndirectInput = true;
|
|
|
|
compiler->glsl_compiler_options[i].EmitNoIndirectUniform = false;
|
|
|
|
compiler->glsl_compiler_options[i].LowerClipDistance = true;
|
2015-06-24 13:22:43 +03:00
|
|
|
|
2015-11-12 13:32:13 -08:00
|
|
|
bool is_scalar = compiler->scalar_stage[i];
|
2015-09-21 11:18:23 -07:00
|
|
|
|
|
|
|
compiler->glsl_compiler_options[i].EmitNoIndirectOutput = is_scalar;
|
|
|
|
compiler->glsl_compiler_options[i].EmitNoIndirectTemp = is_scalar;
|
|
|
|
compiler->glsl_compiler_options[i].OptimizeForAOS = !is_scalar;
|
|
|
|
|
2015-06-24 13:22:43 +03:00
|
|
|
/* !ARB_gpu_shader5 */
|
|
|
|
if (devinfo->gen < 7)
|
|
|
|
compiler->glsl_compiler_options[i].EmitNoIndirectSampler = true;
|
2015-04-16 15:28:17 -07:00
|
|
|
|
2015-09-21 10:42:19 -07:00
|
|
|
compiler->glsl_compiler_options[i].NirOptions = nir_options;
|
2015-11-04 14:55:32 -08:00
|
|
|
|
|
|
|
compiler->glsl_compiler_options[i].LowerBufferInterfaceBlocks = true;
|
2015-07-01 09:51:25 +02:00
|
|
|
}
|
|
|
|
|
2015-11-17 01:07:39 -08:00
|
|
|
compiler->glsl_compiler_options[MESA_SHADER_TESS_CTRL].EmitNoIndirectInput = false;
|
2015-11-10 14:35:27 -08:00
|
|
|
compiler->glsl_compiler_options[MESA_SHADER_TESS_EVAL].EmitNoIndirectInput = false;
|
|
|
|
|
2015-11-07 18:58:59 -08:00
|
|
|
if (compiler->scalar_stage[MESA_SHADER_GEOMETRY])
|
|
|
|
compiler->glsl_compiler_options[MESA_SHADER_GEOMETRY].EmitNoIndirectInput = false;
|
|
|
|
|
2015-07-28 14:55:00 -07:00
|
|
|
compiler->glsl_compiler_options[MESA_SHADER_COMPUTE]
|
|
|
|
.LowerShaderSharedVariables = true;
|
|
|
|
|
2015-04-16 12:01:09 -07:00
|
|
|
return compiler;
|
|
|
|
}
|
|
|
|
|
2015-11-22 21:54:28 -08:00
|
|
|
extern "C" struct gl_shader *
|
2011-05-26 09:57:36 -07:00
|
|
|
brw_new_shader(struct gl_context *ctx, GLuint name, GLuint type)
|
|
|
|
{
|
|
|
|
struct brw_shader *shader;
|
|
|
|
|
|
|
|
shader = rzalloc(NULL, struct brw_shader);
|
|
|
|
if (shader) {
|
|
|
|
shader->base.Type = type;
|
2014-01-07 10:58:56 -08:00
|
|
|
shader->base.Stage = _mesa_shader_enum_to_shader_stage(type);
|
2011-05-26 09:57:36 -07:00
|
|
|
shader->base.Name = name;
|
|
|
|
_mesa_init_shader(ctx, &shader->base);
|
|
|
|
}
|
|
|
|
|
|
|
|
return &shader->base;
|
|
|
|
}
|
|
|
|
|
2015-11-22 21:54:28 -08:00
|
|
|
extern "C" void
|
2015-10-06 16:11:08 -07:00
|
|
|
brw_mark_surface_used(struct brw_stage_prog_data *prog_data,
|
|
|
|
unsigned surf_index)
|
|
|
|
{
|
|
|
|
assert(surf_index < BRW_MAX_SURFACES);
|
|
|
|
|
|
|
|
prog_data->binding_table.size_bytes =
|
|
|
|
MAX2(prog_data->binding_table.size_bytes, (surf_index + 1) * 4);
|
|
|
|
}
|
|
|
|
|
2014-06-29 16:02:59 -07:00
|
|
|
enum brw_reg_type
|
2011-05-26 10:01:10 -07:00
|
|
|
brw_type_for_base_type(const struct glsl_type *type)
|
|
|
|
{
|
|
|
|
switch (type->base_type) {
|
|
|
|
case GLSL_TYPE_FLOAT:
|
|
|
|
return BRW_REGISTER_TYPE_F;
|
|
|
|
case GLSL_TYPE_INT:
|
2014-10-16 12:16:08 -07:00
|
|
|
case GLSL_TYPE_BOOL:
|
2015-07-21 14:22:11 +10:00
|
|
|
case GLSL_TYPE_SUBROUTINE:
|
2014-12-02 12:30:27 -08:00
|
|
|
return BRW_REGISTER_TYPE_D;
|
2011-05-26 10:01:10 -07:00
|
|
|
case GLSL_TYPE_UINT:
|
|
|
|
return BRW_REGISTER_TYPE_UD;
|
|
|
|
case GLSL_TYPE_ARRAY:
|
2011-11-08 19:26:38 -08:00
|
|
|
return brw_type_for_base_type(type->fields.array);
|
2011-05-26 10:01:10 -07:00
|
|
|
case GLSL_TYPE_STRUCT:
|
|
|
|
case GLSL_TYPE_SAMPLER:
|
2013-10-20 12:35:47 -07:00
|
|
|
case GLSL_TYPE_ATOMIC_UINT:
|
2011-05-26 10:01:10 -07:00
|
|
|
/* These should be overridden with the type of the member when
|
|
|
|
* dereferenced into. BRW_REGISTER_TYPE_UD seems like a likely
|
|
|
|
* way to trip up if we don't.
|
|
|
|
*/
|
|
|
|
return BRW_REGISTER_TYPE_UD;
|
2013-11-25 13:50:47 -08:00
|
|
|
case GLSL_TYPE_IMAGE:
|
|
|
|
return BRW_REGISTER_TYPE_UD;
|
2012-12-11 12:56:03 -08:00
|
|
|
case GLSL_TYPE_VOID:
|
|
|
|
case GLSL_TYPE_ERROR:
|
2012-12-11 12:11:16 -08:00
|
|
|
case GLSL_TYPE_INTERFACE:
|
2014-08-14 18:49:20 +10:00
|
|
|
case GLSL_TYPE_DOUBLE:
|
2014-06-29 14:54:01 -07:00
|
|
|
unreachable("not reached");
|
2011-05-26 10:01:10 -07:00
|
|
|
}
|
2012-12-11 12:56:03 -08:00
|
|
|
|
|
|
|
return BRW_REGISTER_TYPE_F;
|
2011-05-26 10:01:10 -07:00
|
|
|
}
|
|
|
|
|
2014-06-29 17:50:20 -07:00
|
|
|
enum brw_conditional_mod
|
2011-05-26 10:01:10 -07:00
|
|
|
brw_conditional_for_comparison(unsigned int op)
|
|
|
|
{
|
|
|
|
switch (op) {
|
|
|
|
case ir_binop_less:
|
|
|
|
return BRW_CONDITIONAL_L;
|
|
|
|
case ir_binop_greater:
|
|
|
|
return BRW_CONDITIONAL_G;
|
|
|
|
case ir_binop_lequal:
|
|
|
|
return BRW_CONDITIONAL_LE;
|
|
|
|
case ir_binop_gequal:
|
|
|
|
return BRW_CONDITIONAL_GE;
|
|
|
|
case ir_binop_equal:
|
|
|
|
case ir_binop_all_equal: /* same as equal for scalars */
|
|
|
|
return BRW_CONDITIONAL_Z;
|
|
|
|
case ir_binop_nequal:
|
|
|
|
case ir_binop_any_nequal: /* same as nequal for scalars */
|
|
|
|
return BRW_CONDITIONAL_NZ;
|
|
|
|
default:
|
2014-06-29 14:54:01 -07:00
|
|
|
unreachable("not reached: bad operation for comparison");
|
2011-05-26 10:01:10 -07:00
|
|
|
}
|
|
|
|
}
|
2011-05-02 09:45:40 -07:00
|
|
|
|
|
|
|
uint32_t
|
|
|
|
brw_math_function(enum opcode op)
|
|
|
|
{
|
|
|
|
switch (op) {
|
|
|
|
case SHADER_OPCODE_RCP:
|
|
|
|
return BRW_MATH_FUNCTION_INV;
|
|
|
|
case SHADER_OPCODE_RSQ:
|
|
|
|
return BRW_MATH_FUNCTION_RSQ;
|
|
|
|
case SHADER_OPCODE_SQRT:
|
|
|
|
return BRW_MATH_FUNCTION_SQRT;
|
|
|
|
case SHADER_OPCODE_EXP2:
|
|
|
|
return BRW_MATH_FUNCTION_EXP;
|
|
|
|
case SHADER_OPCODE_LOG2:
|
|
|
|
return BRW_MATH_FUNCTION_LOG;
|
|
|
|
case SHADER_OPCODE_POW:
|
|
|
|
return BRW_MATH_FUNCTION_POW;
|
|
|
|
case SHADER_OPCODE_SIN:
|
|
|
|
return BRW_MATH_FUNCTION_SIN;
|
|
|
|
case SHADER_OPCODE_COS:
|
|
|
|
return BRW_MATH_FUNCTION_COS;
|
2011-09-28 17:37:54 -07:00
|
|
|
case SHADER_OPCODE_INT_QUOTIENT:
|
|
|
|
return BRW_MATH_FUNCTION_INT_DIV_QUOTIENT;
|
|
|
|
case SHADER_OPCODE_INT_REMAINDER:
|
|
|
|
return BRW_MATH_FUNCTION_INT_DIV_REMAINDER;
|
2011-05-02 09:45:40 -07:00
|
|
|
default:
|
2014-06-29 14:54:01 -07:00
|
|
|
unreachable("not reached: unknown math function");
|
2011-05-02 09:45:40 -07:00
|
|
|
}
|
|
|
|
}
|
2011-10-26 13:51:28 -07:00
|
|
|
|
|
|
|
uint32_t
|
2015-04-14 14:23:40 -07:00
|
|
|
brw_texture_offset(int *offsets, unsigned num_components)
|
2011-10-26 13:51:28 -07:00
|
|
|
{
|
2014-08-04 15:20:38 -07:00
|
|
|
if (!offsets) return 0; /* nonconstant offset; caller will handle it. */
|
2011-10-26 13:51:28 -07:00
|
|
|
|
|
|
|
/* Combine all three offsets into a single unsigned dword:
|
|
|
|
*
|
|
|
|
* bits 11:8 - U Offset (X component)
|
|
|
|
* bits 7:4 - V Offset (Y component)
|
|
|
|
* bits 3:0 - R Offset (Z component)
|
|
|
|
*/
|
|
|
|
unsigned offset_bits = 0;
|
2014-08-04 15:20:38 -07:00
|
|
|
for (unsigned i = 0; i < num_components; i++) {
|
2011-10-26 13:51:28 -07:00
|
|
|
const unsigned shift = 4 * (2 - i);
|
|
|
|
offset_bits |= (offsets[i] << shift) & (0xF << shift);
|
|
|
|
}
|
|
|
|
return offset_bits;
|
|
|
|
}
|
2013-03-11 17:36:54 -07:00
|
|
|
|
|
|
|
const char *
|
|
|
|
brw_instruction_name(enum opcode op)
|
|
|
|
{
|
|
|
|
switch (op) {
|
2015-06-29 14:03:55 -07:00
|
|
|
case BRW_OPCODE_ILLEGAL ... BRW_OPCODE_NOP:
|
2014-12-06 14:18:21 -08:00
|
|
|
assert(opcode_descs[op].name);
|
|
|
|
return opcode_descs[op].name;
|
2013-03-11 17:36:54 -07:00
|
|
|
case FS_OPCODE_FB_WRITE:
|
|
|
|
return "fb_write";
|
2015-07-27 16:14:36 +03:00
|
|
|
case FS_OPCODE_FB_WRITE_LOGICAL:
|
|
|
|
return "fb_write_logical";
|
2015-10-20 14:29:39 -07:00
|
|
|
case FS_OPCODE_PACK_STENCIL_REF:
|
|
|
|
return "pack_stencil_ref";
|
2013-12-17 14:00:50 +02:00
|
|
|
case FS_OPCODE_BLORP_FB_WRITE:
|
|
|
|
return "blorp_fb_write";
|
2014-12-06 14:16:13 -08:00
|
|
|
case FS_OPCODE_REP_FB_WRITE:
|
|
|
|
return "rep_fb_write";
|
2013-03-11 17:36:54 -07:00
|
|
|
|
|
|
|
case SHADER_OPCODE_RCP:
|
|
|
|
return "rcp";
|
|
|
|
case SHADER_OPCODE_RSQ:
|
|
|
|
return "rsq";
|
|
|
|
case SHADER_OPCODE_SQRT:
|
|
|
|
return "sqrt";
|
|
|
|
case SHADER_OPCODE_EXP2:
|
|
|
|
return "exp2";
|
|
|
|
case SHADER_OPCODE_LOG2:
|
|
|
|
return "log2";
|
|
|
|
case SHADER_OPCODE_POW:
|
|
|
|
return "pow";
|
|
|
|
case SHADER_OPCODE_INT_QUOTIENT:
|
|
|
|
return "int_quot";
|
|
|
|
case SHADER_OPCODE_INT_REMAINDER:
|
|
|
|
return "int_rem";
|
|
|
|
case SHADER_OPCODE_SIN:
|
|
|
|
return "sin";
|
|
|
|
case SHADER_OPCODE_COS:
|
|
|
|
return "cos";
|
|
|
|
|
|
|
|
case SHADER_OPCODE_TEX:
|
|
|
|
return "tex";
|
i965/fs: Define logical texture sampling opcodes.
Each logical variant is largely equivalent to the original opcode but
instead of taking a single payload source it expects the arguments
separately as individual sources, like:
tex_logical dst, coordinates, shadow_c, lod, lod2,
sample_index, mcs, sampler, offset,
num_coordinate_components, num_grad_components
This patch defines the opcodes and usual instruction boilerplate,
including a placeholder lowering function provided mostly as
documentation for their source registers.
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2015-07-21 18:42:27 +03:00
|
|
|
case SHADER_OPCODE_TEX_LOGICAL:
|
|
|
|
return "tex_logical";
|
2013-03-11 17:36:54 -07:00
|
|
|
case SHADER_OPCODE_TXD:
|
|
|
|
return "txd";
|
i965/fs: Define logical texture sampling opcodes.
Each logical variant is largely equivalent to the original opcode but
instead of taking a single payload source it expects the arguments
separately as individual sources, like:
tex_logical dst, coordinates, shadow_c, lod, lod2,
sample_index, mcs, sampler, offset,
num_coordinate_components, num_grad_components
This patch defines the opcodes and usual instruction boilerplate,
including a placeholder lowering function provided mostly as
documentation for their source registers.
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2015-07-21 18:42:27 +03:00
|
|
|
case SHADER_OPCODE_TXD_LOGICAL:
|
|
|
|
return "txd_logical";
|
2013-03-11 17:36:54 -07:00
|
|
|
case SHADER_OPCODE_TXF:
|
|
|
|
return "txf";
|
i965/fs: Define logical texture sampling opcodes.
Each logical variant is largely equivalent to the original opcode but
instead of taking a single payload source it expects the arguments
separately as individual sources, like:
tex_logical dst, coordinates, shadow_c, lod, lod2,
sample_index, mcs, sampler, offset,
num_coordinate_components, num_grad_components
This patch defines the opcodes and usual instruction boilerplate,
including a placeholder lowering function provided mostly as
documentation for their source registers.
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2015-07-21 18:42:27 +03:00
|
|
|
case SHADER_OPCODE_TXF_LOGICAL:
|
|
|
|
return "txf_logical";
|
2013-03-11 17:36:54 -07:00
|
|
|
case SHADER_OPCODE_TXL:
|
|
|
|
return "txl";
|
i965/fs: Define logical texture sampling opcodes.
Each logical variant is largely equivalent to the original opcode but
instead of taking a single payload source it expects the arguments
separately as individual sources, like:
tex_logical dst, coordinates, shadow_c, lod, lod2,
sample_index, mcs, sampler, offset,
num_coordinate_components, num_grad_components
This patch defines the opcodes and usual instruction boilerplate,
including a placeholder lowering function provided mostly as
documentation for their source registers.
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2015-07-21 18:42:27 +03:00
|
|
|
case SHADER_OPCODE_TXL_LOGICAL:
|
|
|
|
return "txl_logical";
|
2013-03-11 17:36:54 -07:00
|
|
|
case SHADER_OPCODE_TXS:
|
|
|
|
return "txs";
|
i965/fs: Define logical texture sampling opcodes.
Each logical variant is largely equivalent to the original opcode but
instead of taking a single payload source it expects the arguments
separately as individual sources, like:
tex_logical dst, coordinates, shadow_c, lod, lod2,
sample_index, mcs, sampler, offset,
num_coordinate_components, num_grad_components
This patch defines the opcodes and usual instruction boilerplate,
including a placeholder lowering function provided mostly as
documentation for their source registers.
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2015-07-21 18:42:27 +03:00
|
|
|
case SHADER_OPCODE_TXS_LOGICAL:
|
|
|
|
return "txs_logical";
|
2013-03-11 17:36:54 -07:00
|
|
|
case FS_OPCODE_TXB:
|
|
|
|
return "txb";
|
i965/fs: Define logical texture sampling opcodes.
Each logical variant is largely equivalent to the original opcode but
instead of taking a single payload source it expects the arguments
separately as individual sources, like:
tex_logical dst, coordinates, shadow_c, lod, lod2,
sample_index, mcs, sampler, offset,
num_coordinate_components, num_grad_components
This patch defines the opcodes and usual instruction boilerplate,
including a placeholder lowering function provided mostly as
documentation for their source registers.
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2015-07-21 18:42:27 +03:00
|
|
|
case FS_OPCODE_TXB_LOGICAL:
|
|
|
|
return "txb_logical";
|
2013-12-10 16:36:31 +02:00
|
|
|
case SHADER_OPCODE_TXF_CMS:
|
|
|
|
return "txf_cms";
|
i965/fs: Define logical texture sampling opcodes.
Each logical variant is largely equivalent to the original opcode but
instead of taking a single payload source it expects the arguments
separately as individual sources, like:
tex_logical dst, coordinates, shadow_c, lod, lod2,
sample_index, mcs, sampler, offset,
num_coordinate_components, num_grad_components
This patch defines the opcodes and usual instruction boilerplate,
including a placeholder lowering function provided mostly as
documentation for their source registers.
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2015-07-21 18:42:27 +03:00
|
|
|
case SHADER_OPCODE_TXF_CMS_LOGICAL:
|
|
|
|
return "txf_cms_logical";
|
2015-09-08 15:52:09 +01:00
|
|
|
case SHADER_OPCODE_TXF_CMS_W:
|
|
|
|
return "txf_cms_w";
|
|
|
|
case SHADER_OPCODE_TXF_CMS_W_LOGICAL:
|
|
|
|
return "txf_cms_w_logical";
|
2013-12-10 16:38:15 +02:00
|
|
|
case SHADER_OPCODE_TXF_UMS:
|
|
|
|
return "txf_ums";
|
i965/fs: Define logical texture sampling opcodes.
Each logical variant is largely equivalent to the original opcode but
instead of taking a single payload source it expects the arguments
separately as individual sources, like:
tex_logical dst, coordinates, shadow_c, lod, lod2,
sample_index, mcs, sampler, offset,
num_coordinate_components, num_grad_components
This patch defines the opcodes and usual instruction boilerplate,
including a placeholder lowering function provided mostly as
documentation for their source registers.
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2015-07-21 18:42:27 +03:00
|
|
|
case SHADER_OPCODE_TXF_UMS_LOGICAL:
|
|
|
|
return "txf_ums_logical";
|
2013-11-30 10:32:16 +13:00
|
|
|
case SHADER_OPCODE_TXF_MCS:
|
|
|
|
return "txf_mcs";
|
i965/fs: Define logical texture sampling opcodes.
Each logical variant is largely equivalent to the original opcode but
instead of taking a single payload source it expects the arguments
separately as individual sources, like:
tex_logical dst, coordinates, shadow_c, lod, lod2,
sample_index, mcs, sampler, offset,
num_coordinate_components, num_grad_components
This patch defines the opcodes and usual instruction boilerplate,
including a placeholder lowering function provided mostly as
documentation for their source registers.
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2015-07-21 18:42:27 +03:00
|
|
|
case SHADER_OPCODE_TXF_MCS_LOGICAL:
|
|
|
|
return "txf_mcs_logical";
|
2014-12-06 14:16:13 -08:00
|
|
|
case SHADER_OPCODE_LOD:
|
|
|
|
return "lod";
|
i965/fs: Define logical texture sampling opcodes.
Each logical variant is largely equivalent to the original opcode but
instead of taking a single payload source it expects the arguments
separately as individual sources, like:
tex_logical dst, coordinates, shadow_c, lod, lod2,
sample_index, mcs, sampler, offset,
num_coordinate_components, num_grad_components
This patch defines the opcodes and usual instruction boilerplate,
including a placeholder lowering function provided mostly as
documentation for their source registers.
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2015-07-21 18:42:27 +03:00
|
|
|
case SHADER_OPCODE_LOD_LOGICAL:
|
|
|
|
return "lod_logical";
|
2013-10-08 21:34:22 +13:00
|
|
|
case SHADER_OPCODE_TG4:
|
|
|
|
return "tg4";
|
i965/fs: Define logical texture sampling opcodes.
Each logical variant is largely equivalent to the original opcode but
instead of taking a single payload source it expects the arguments
separately as individual sources, like:
tex_logical dst, coordinates, shadow_c, lod, lod2,
sample_index, mcs, sampler, offset,
num_coordinate_components, num_grad_components
This patch defines the opcodes and usual instruction boilerplate,
including a placeholder lowering function provided mostly as
documentation for their source registers.
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2015-07-21 18:42:27 +03:00
|
|
|
case SHADER_OPCODE_TG4_LOGICAL:
|
|
|
|
return "tg4_logical";
|
2013-10-08 21:42:10 +13:00
|
|
|
case SHADER_OPCODE_TG4_OFFSET:
|
|
|
|
return "tg4_offset";
|
i965/fs: Define logical texture sampling opcodes.
Each logical variant is largely equivalent to the original opcode but
instead of taking a single payload source it expects the arguments
separately as individual sources, like:
tex_logical dst, coordinates, shadow_c, lod, lod2,
sample_index, mcs, sampler, offset,
num_coordinate_components, num_grad_components
This patch defines the opcodes and usual instruction boilerplate,
including a placeholder lowering function provided mostly as
documentation for their source registers.
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2015-07-21 18:42:27 +03:00
|
|
|
case SHADER_OPCODE_TG4_OFFSET_LOGICAL:
|
|
|
|
return "tg4_offset_logical";
|
2015-08-11 20:37:32 -04:00
|
|
|
case SHADER_OPCODE_SAMPLEINFO:
|
|
|
|
return "sampleinfo";
|
i965/fs: Define logical texture sampling opcodes.
Each logical variant is largely equivalent to the original opcode but
instead of taking a single payload source it expects the arguments
separately as individual sources, like:
tex_logical dst, coordinates, shadow_c, lod, lod2,
sample_index, mcs, sampler, offset,
num_coordinate_components, num_grad_components
This patch defines the opcodes and usual instruction boilerplate,
including a placeholder lowering function provided mostly as
documentation for their source registers.
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2015-07-21 18:42:27 +03:00
|
|
|
|
2014-06-14 03:13:27 -07:00
|
|
|
case SHADER_OPCODE_SHADER_TIME_ADD:
|
|
|
|
return "shader_time_add";
|
2013-03-11 17:36:54 -07:00
|
|
|
|
2014-12-06 14:16:13 -08:00
|
|
|
case SHADER_OPCODE_UNTYPED_ATOMIC:
|
|
|
|
return "untyped_atomic";
|
2015-07-21 18:45:32 +03:00
|
|
|
case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
|
|
|
|
return "untyped_atomic_logical";
|
2014-12-06 14:16:13 -08:00
|
|
|
case SHADER_OPCODE_UNTYPED_SURFACE_READ:
|
|
|
|
return "untyped_surface_read";
|
2015-07-21 18:45:32 +03:00
|
|
|
case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
|
|
|
|
return "untyped_surface_read_logical";
|
2015-04-23 14:24:14 +03:00
|
|
|
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
|
|
|
|
return "untyped_surface_write";
|
2015-07-21 18:45:32 +03:00
|
|
|
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
|
|
|
|
return "untyped_surface_write_logical";
|
2015-04-23 14:28:25 +03:00
|
|
|
case SHADER_OPCODE_TYPED_ATOMIC:
|
|
|
|
return "typed_atomic";
|
2015-07-21 18:45:32 +03:00
|
|
|
case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
|
|
|
|
return "typed_atomic_logical";
|
2015-04-23 14:28:25 +03:00
|
|
|
case SHADER_OPCODE_TYPED_SURFACE_READ:
|
|
|
|
return "typed_surface_read";
|
2015-07-21 18:45:32 +03:00
|
|
|
case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
|
|
|
|
return "typed_surface_read_logical";
|
2015-04-23 14:28:25 +03:00
|
|
|
case SHADER_OPCODE_TYPED_SURFACE_WRITE:
|
|
|
|
return "typed_surface_write";
|
2015-07-21 18:45:32 +03:00
|
|
|
case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
|
|
|
|
return "typed_surface_write_logical";
|
2015-04-23 14:30:28 +03:00
|
|
|
case SHADER_OPCODE_MEMORY_FENCE:
|
|
|
|
return "memory_fence";
|
2014-12-06 14:16:13 -08:00
|
|
|
|
2014-05-27 18:47:40 -07:00
|
|
|
case SHADER_OPCODE_LOAD_PAYLOAD:
|
|
|
|
return "load_payload";
|
|
|
|
|
2013-10-16 11:45:06 -07:00
|
|
|
case SHADER_OPCODE_GEN4_SCRATCH_READ:
|
|
|
|
return "gen4_scratch_read";
|
|
|
|
case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
|
|
|
|
return "gen4_scratch_write";
|
2013-10-16 11:51:22 -07:00
|
|
|
case SHADER_OPCODE_GEN7_SCRATCH_READ:
|
|
|
|
return "gen7_scratch_read";
|
2014-10-20 23:00:50 -07:00
|
|
|
case SHADER_OPCODE_URB_WRITE_SIMD8:
|
|
|
|
return "gen8_urb_write_simd8";
|
2015-05-06 00:04:10 -07:00
|
|
|
case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT:
|
|
|
|
return "gen8_urb_write_simd8_per_slot";
|
|
|
|
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:
|
|
|
|
return "gen8_urb_write_simd8_masked";
|
|
|
|
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
|
|
|
|
return "gen8_urb_write_simd8_masked_per_slot";
|
2015-09-29 14:32:02 -07:00
|
|
|
case SHADER_OPCODE_URB_READ_SIMD8:
|
|
|
|
return "urb_read_simd8";
|
2015-11-07 01:37:33 -08:00
|
|
|
case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT:
|
|
|
|
return "urb_read_simd8_per_slot";
|
2013-10-16 11:45:06 -07:00
|
|
|
|
2015-04-23 14:42:53 +03:00
|
|
|
case SHADER_OPCODE_FIND_LIVE_CHANNEL:
|
|
|
|
return "find_live_channel";
|
2015-02-20 20:14:24 +02:00
|
|
|
case SHADER_OPCODE_BROADCAST:
|
|
|
|
return "broadcast";
|
|
|
|
|
2015-02-12 01:42:43 +00:00
|
|
|
case VEC4_OPCODE_MOV_BYTES:
|
|
|
|
return "mov_bytes";
|
2014-03-10 13:26:30 -07:00
|
|
|
case VEC4_OPCODE_PACK_BYTES:
|
|
|
|
return "pack_bytes";
|
2014-10-23 23:22:09 -07:00
|
|
|
case VEC4_OPCODE_UNPACK_UNIFORM:
|
|
|
|
return "unpack_uniform";
|
2014-03-10 13:26:30 -07:00
|
|
|
|
2014-11-08 01:39:14 -08:00
|
|
|
case FS_OPCODE_DDX_COARSE:
|
|
|
|
return "ddx_coarse";
|
|
|
|
case FS_OPCODE_DDX_FINE:
|
|
|
|
return "ddx_fine";
|
|
|
|
case FS_OPCODE_DDY_COARSE:
|
|
|
|
return "ddy_coarse";
|
|
|
|
case FS_OPCODE_DDY_FINE:
|
|
|
|
return "ddy_fine";
|
2013-03-11 17:36:54 -07:00
|
|
|
|
|
|
|
case FS_OPCODE_CINTERP:
|
|
|
|
return "cinterp";
|
|
|
|
case FS_OPCODE_LINTERP:
|
|
|
|
return "linterp";
|
|
|
|
|
2015-04-24 16:23:46 -07:00
|
|
|
case FS_OPCODE_PIXEL_X:
|
|
|
|
return "pixel_x";
|
|
|
|
case FS_OPCODE_PIXEL_Y:
|
|
|
|
return "pixel_y";
|
|
|
|
|
2015-04-13 16:55:49 +02:00
|
|
|
case FS_OPCODE_GET_BUFFER_SIZE:
|
|
|
|
return "fs_get_buffer_size";
|
|
|
|
|
2013-03-11 17:36:54 -07:00
|
|
|
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
|
|
|
|
return "uniform_pull_const";
|
|
|
|
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7:
|
|
|
|
return "uniform_pull_const_gen7";
|
|
|
|
case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD:
|
|
|
|
return "varying_pull_const";
|
|
|
|
case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7:
|
|
|
|
return "varying_pull_const_gen7";
|
|
|
|
|
|
|
|
case FS_OPCODE_MOV_DISPATCH_TO_FLAGS:
|
|
|
|
return "mov_dispatch_to_flags";
|
|
|
|
case FS_OPCODE_DISCARD_JUMP:
|
|
|
|
return "discard_jump";
|
|
|
|
|
2014-12-06 13:34:13 -08:00
|
|
|
case FS_OPCODE_SET_SAMPLE_ID:
|
|
|
|
return "set_sample_id";
|
2013-03-11 17:36:54 -07:00
|
|
|
case FS_OPCODE_SET_SIMD4X2_OFFSET:
|
|
|
|
return "set_simd4x2_offset";
|
|
|
|
|
|
|
|
case FS_OPCODE_PACK_HALF_2x16_SPLIT:
|
|
|
|
return "pack_half_2x16_split";
|
|
|
|
case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X:
|
|
|
|
return "unpack_half_2x16_split_x";
|
|
|
|
case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y:
|
|
|
|
return "unpack_half_2x16_split_y";
|
|
|
|
|
2013-03-27 23:19:39 -07:00
|
|
|
case FS_OPCODE_PLACEHOLDER_HALT:
|
|
|
|
return "placeholder_halt";
|
|
|
|
|
2014-12-07 10:07:16 +13:00
|
|
|
case FS_OPCODE_INTERPOLATE_AT_CENTROID:
|
|
|
|
return "interp_centroid";
|
|
|
|
case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
|
|
|
|
return "interp_sample";
|
|
|
|
case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
|
|
|
|
return "interp_shared_offset";
|
|
|
|
case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
|
|
|
|
return "interp_per_slot_offset";
|
|
|
|
|
2013-03-11 17:36:54 -07:00
|
|
|
case VS_OPCODE_URB_WRITE:
|
2013-03-21 09:11:12 -07:00
|
|
|
return "vs_urb_write";
|
2013-03-11 17:36:54 -07:00
|
|
|
case VS_OPCODE_PULL_CONSTANT_LOAD:
|
|
|
|
return "pull_constant_load";
|
2013-04-04 14:10:18 -07:00
|
|
|
case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
|
|
|
|
return "pull_constant_load_gen7";
|
2015-03-24 15:52:20 +00:00
|
|
|
|
|
|
|
case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9:
|
|
|
|
return "set_simd4x2_header_gen9";
|
|
|
|
|
2015-08-28 09:39:49 +02:00
|
|
|
case VS_OPCODE_GET_BUFFER_SIZE:
|
|
|
|
return "vs_get_buffer_size";
|
|
|
|
|
2013-08-08 06:31:33 +12:00
|
|
|
case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
|
|
|
|
return "unpack_flags_simd4x2";
|
2013-03-11 17:36:54 -07:00
|
|
|
|
2013-03-21 09:11:12 -07:00
|
|
|
case GS_OPCODE_URB_WRITE:
|
|
|
|
return "gs_urb_write";
|
2014-07-09 16:28:30 +02:00
|
|
|
case GS_OPCODE_URB_WRITE_ALLOCATE:
|
|
|
|
return "gs_urb_write_allocate";
|
2013-03-23 07:42:32 -07:00
|
|
|
case GS_OPCODE_THREAD_END:
|
|
|
|
return "gs_thread_end";
|
2013-03-23 07:59:13 -07:00
|
|
|
case GS_OPCODE_SET_WRITE_OFFSET:
|
|
|
|
return "set_write_offset";
|
2013-03-23 08:18:43 -07:00
|
|
|
case GS_OPCODE_SET_VERTEX_COUNT:
|
|
|
|
return "set_vertex_count";
|
2014-07-17 08:54:03 +02:00
|
|
|
case GS_OPCODE_SET_DWORD_2:
|
|
|
|
return "set_dword_2";
|
2013-04-21 08:51:33 -07:00
|
|
|
case GS_OPCODE_PREPARE_CHANNEL_MASKS:
|
|
|
|
return "prepare_channel_masks";
|
|
|
|
case GS_OPCODE_SET_CHANNEL_MASKS:
|
|
|
|
return "set_channel_masks";
|
2014-01-25 12:55:24 -08:00
|
|
|
case GS_OPCODE_GET_INSTANCE_ID:
|
|
|
|
return "get_instance_id";
|
2014-07-09 08:46:17 +02:00
|
|
|
case GS_OPCODE_FF_SYNC:
|
|
|
|
return "ff_sync";
|
2014-07-24 12:14:27 +02:00
|
|
|
case GS_OPCODE_SET_PRIMITIVE_ID:
|
|
|
|
return "set_primitive_id";
|
2014-07-18 10:36:10 +02:00
|
|
|
case GS_OPCODE_SVB_WRITE:
|
|
|
|
return "gs_svb_write";
|
2014-07-18 10:47:15 +02:00
|
|
|
case GS_OPCODE_SVB_SET_DST_INDEX:
|
|
|
|
return "gs_svb_set_dst_index";
|
2014-07-23 12:56:53 +02:00
|
|
|
case GS_OPCODE_FF_SYNC_SET_PRIMITIVES:
|
|
|
|
return "gs_ff_sync_set_primitives";
|
2014-08-27 11:32:08 -07:00
|
|
|
case CS_OPCODE_CS_TERMINATE:
|
|
|
|
return "cs_terminate";
|
2014-08-27 11:32:08 -07:00
|
|
|
case SHADER_OPCODE_BARRIER:
|
|
|
|
return "barrier";
|
2015-08-04 19:04:55 +03:00
|
|
|
case SHADER_OPCODE_MULH:
|
|
|
|
return "mulh";
|
2015-11-07 18:58:34 -08:00
|
|
|
case SHADER_OPCODE_MOV_INDIRECT:
|
|
|
|
return "mov_indirect";
|
2015-11-17 01:07:39 -08:00
|
|
|
|
|
|
|
case VEC4_OPCODE_URB_READ:
|
|
|
|
return "urb_read";
|
|
|
|
case TCS_OPCODE_GET_INSTANCE_ID:
|
|
|
|
return "tcs_get_instance_id";
|
|
|
|
case TCS_OPCODE_URB_WRITE:
|
|
|
|
return "tcs_urb_write";
|
|
|
|
case TCS_OPCODE_SET_INPUT_URB_OFFSETS:
|
|
|
|
return "tcs_set_input_urb_offsets";
|
|
|
|
case TCS_OPCODE_SET_OUTPUT_URB_OFFSETS:
|
|
|
|
return "tcs_set_output_urb_offsets";
|
|
|
|
case TCS_OPCODE_GET_PRIMITIVE_ID:
|
|
|
|
return "tcs_get_primitive_id";
|
|
|
|
case TCS_OPCODE_CREATE_BARRIER_HEADER:
|
|
|
|
return "tcs_create_barrier_header";
|
2013-03-11 17:36:54 -07:00
|
|
|
}
|
2014-12-06 14:18:21 -08:00
|
|
|
|
|
|
|
unreachable("not reached");
|
2013-03-11 17:36:54 -07:00
|
|
|
}
|
2013-04-28 01:35:57 -07:00
|
|
|
|
2014-12-21 06:56:54 -08:00
|
|
|
bool
|
|
|
|
brw_saturate_immediate(enum brw_reg_type type, struct brw_reg *reg)
|
|
|
|
{
|
|
|
|
union {
|
|
|
|
unsigned ud;
|
|
|
|
int d;
|
|
|
|
float f;
|
2015-10-22 19:41:30 -07:00
|
|
|
} imm = { reg->ud }, sat_imm = { 0 };
|
2014-12-21 06:56:54 -08:00
|
|
|
|
|
|
|
switch (type) {
|
|
|
|
case BRW_REGISTER_TYPE_UD:
|
|
|
|
case BRW_REGISTER_TYPE_D:
|
2015-11-02 11:28:35 -08:00
|
|
|
case BRW_REGISTER_TYPE_UW:
|
|
|
|
case BRW_REGISTER_TYPE_W:
|
2014-12-21 06:56:54 -08:00
|
|
|
case BRW_REGISTER_TYPE_UQ:
|
|
|
|
case BRW_REGISTER_TYPE_Q:
|
|
|
|
/* Nothing to do. */
|
|
|
|
return false;
|
|
|
|
case BRW_REGISTER_TYPE_F:
|
|
|
|
sat_imm.f = CLAMP(imm.f, 0.0f, 1.0f);
|
|
|
|
break;
|
|
|
|
case BRW_REGISTER_TYPE_UB:
|
|
|
|
case BRW_REGISTER_TYPE_B:
|
2015-01-29 11:16:43 -08:00
|
|
|
unreachable("no UB/B immediates");
|
2014-12-21 06:56:54 -08:00
|
|
|
case BRW_REGISTER_TYPE_V:
|
|
|
|
case BRW_REGISTER_TYPE_UV:
|
|
|
|
case BRW_REGISTER_TYPE_VF:
|
2015-02-11 14:53:08 -08:00
|
|
|
unreachable("unimplemented: saturate vector immediate");
|
2014-12-21 06:56:54 -08:00
|
|
|
case BRW_REGISTER_TYPE_DF:
|
|
|
|
case BRW_REGISTER_TYPE_HF:
|
2015-02-11 14:53:08 -08:00
|
|
|
unreachable("unimplemented: saturate DF/HF immediate");
|
2014-12-21 06:56:54 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (imm.ud != sat_imm.ud) {
|
2015-10-22 19:41:30 -07:00
|
|
|
reg->ud = sat_imm.ud;
|
2014-12-21 06:56:54 -08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2015-01-29 11:15:10 -08:00
|
|
|
bool
|
|
|
|
brw_negate_immediate(enum brw_reg_type type, struct brw_reg *reg)
|
|
|
|
{
|
|
|
|
switch (type) {
|
|
|
|
case BRW_REGISTER_TYPE_D:
|
2015-02-06 14:38:20 +02:00
|
|
|
case BRW_REGISTER_TYPE_UD:
|
2015-10-22 19:41:30 -07:00
|
|
|
reg->d = -reg->d;
|
2015-01-29 11:15:10 -08:00
|
|
|
return true;
|
|
|
|
case BRW_REGISTER_TYPE_W:
|
2015-02-06 14:38:20 +02:00
|
|
|
case BRW_REGISTER_TYPE_UW:
|
2015-10-22 19:41:30 -07:00
|
|
|
reg->d = -(int16_t)reg->ud;
|
2015-01-29 11:15:10 -08:00
|
|
|
return true;
|
|
|
|
case BRW_REGISTER_TYPE_F:
|
2015-10-22 19:41:30 -07:00
|
|
|
reg->f = -reg->f;
|
2015-01-29 11:15:10 -08:00
|
|
|
return true;
|
|
|
|
case BRW_REGISTER_TYPE_VF:
|
2015-10-22 19:41:30 -07:00
|
|
|
reg->ud ^= 0x80808080;
|
2015-01-29 11:15:10 -08:00
|
|
|
return true;
|
|
|
|
case BRW_REGISTER_TYPE_UB:
|
|
|
|
case BRW_REGISTER_TYPE_B:
|
|
|
|
unreachable("no UB/B immediates");
|
|
|
|
case BRW_REGISTER_TYPE_UV:
|
|
|
|
case BRW_REGISTER_TYPE_V:
|
|
|
|
assert(!"unimplemented: negate UV/V immediate");
|
|
|
|
case BRW_REGISTER_TYPE_UQ:
|
|
|
|
case BRW_REGISTER_TYPE_Q:
|
|
|
|
assert(!"unimplemented: negate UQ/Q immediate");
|
|
|
|
case BRW_REGISTER_TYPE_DF:
|
|
|
|
case BRW_REGISTER_TYPE_HF:
|
|
|
|
assert(!"unimplemented: negate DF/HF immediate");
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2015-01-30 14:14:43 -08:00
|
|
|
bool
|
|
|
|
brw_abs_immediate(enum brw_reg_type type, struct brw_reg *reg)
|
|
|
|
{
|
|
|
|
switch (type) {
|
|
|
|
case BRW_REGISTER_TYPE_D:
|
2015-10-22 19:41:30 -07:00
|
|
|
reg->d = abs(reg->d);
|
2015-01-30 14:14:43 -08:00
|
|
|
return true;
|
|
|
|
case BRW_REGISTER_TYPE_W:
|
2015-10-22 19:41:30 -07:00
|
|
|
reg->d = abs((int16_t)reg->ud);
|
2015-01-30 14:14:43 -08:00
|
|
|
return true;
|
|
|
|
case BRW_REGISTER_TYPE_F:
|
2015-10-22 19:41:30 -07:00
|
|
|
reg->f = fabsf(reg->f);
|
2015-01-30 14:14:43 -08:00
|
|
|
return true;
|
|
|
|
case BRW_REGISTER_TYPE_VF:
|
2015-10-22 19:41:30 -07:00
|
|
|
reg->ud &= ~0x80808080;
|
2015-01-30 14:14:43 -08:00
|
|
|
return true;
|
|
|
|
case BRW_REGISTER_TYPE_UB:
|
|
|
|
case BRW_REGISTER_TYPE_B:
|
|
|
|
unreachable("no UB/B immediates");
|
|
|
|
case BRW_REGISTER_TYPE_UQ:
|
|
|
|
case BRW_REGISTER_TYPE_UD:
|
|
|
|
case BRW_REGISTER_TYPE_UW:
|
|
|
|
case BRW_REGISTER_TYPE_UV:
|
|
|
|
/* Presumably the absolute value modifier on an unsigned source is a
|
|
|
|
* nop, but it would be nice to confirm.
|
|
|
|
*/
|
|
|
|
assert(!"unimplemented: abs unsigned immediate");
|
|
|
|
case BRW_REGISTER_TYPE_V:
|
|
|
|
assert(!"unimplemented: abs V immediate");
|
|
|
|
case BRW_REGISTER_TYPE_Q:
|
|
|
|
assert(!"unimplemented: abs Q immediate");
|
|
|
|
case BRW_REGISTER_TYPE_DF:
|
|
|
|
case BRW_REGISTER_TYPE_HF:
|
|
|
|
assert(!"unimplemented: abs DF/HF immediate");
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2015-06-22 17:17:56 -07:00
|
|
|
backend_shader::backend_shader(const struct brw_compiler *compiler,
|
|
|
|
void *log_data,
|
2015-06-22 11:42:15 -07:00
|
|
|
void *mem_ctx,
|
2015-10-05 19:26:02 -07:00
|
|
|
const nir_shader *shader,
|
2015-10-01 15:21:57 -07:00
|
|
|
struct brw_stage_prog_data *stage_prog_data)
|
2015-06-22 17:17:56 -07:00
|
|
|
: compiler(compiler),
|
|
|
|
log_data(log_data),
|
|
|
|
devinfo(compiler->devinfo),
|
2015-10-01 15:21:57 -07:00
|
|
|
nir(shader),
|
2014-07-11 20:54:52 -07:00
|
|
|
stage_prog_data(stage_prog_data),
|
2015-06-22 11:42:15 -07:00
|
|
|
mem_ctx(mem_ctx),
|
2014-07-21 20:05:21 -07:00
|
|
|
cfg(NULL),
|
2015-10-01 15:21:57 -07:00
|
|
|
stage(shader->stage)
|
2014-02-14 11:54:02 +02:00
|
|
|
{
|
2015-02-18 17:38:45 -08:00
|
|
|
debug_enabled = INTEL_DEBUG & intel_debug_flag_for_shader_stage(stage);
|
|
|
|
stage_name = _mesa_shader_stage_to_string(stage);
|
|
|
|
stage_abbrev = _mesa_shader_stage_to_abbrev(stage);
|
2014-02-14 11:54:02 +02:00
|
|
|
}
|
|
|
|
|
2015-11-22 13:25:05 -08:00
|
|
|
bool
|
|
|
|
backend_reg::equals(const backend_reg &r) const
|
|
|
|
{
|
|
|
|
return memcmp((brw_reg *)this, (brw_reg *)&r, sizeof(brw_reg)) == 0 &&
|
|
|
|
reg_offset == r.reg_offset;
|
|
|
|
}
|
|
|
|
|
2014-06-29 15:35:58 -07:00
|
|
|
bool
|
|
|
|
backend_reg::is_zero() const
|
|
|
|
{
|
|
|
|
if (file != IMM)
|
|
|
|
return false;
|
|
|
|
|
2015-10-24 14:55:57 -07:00
|
|
|
return d == 0;
|
2014-06-29 15:35:58 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
bool
|
|
|
|
backend_reg::is_one() const
|
|
|
|
{
|
|
|
|
if (file != IMM)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
return type == BRW_REGISTER_TYPE_F
|
2015-10-24 14:55:57 -07:00
|
|
|
? f == 1.0
|
|
|
|
: d == 1;
|
2014-06-29 15:35:58 -07:00
|
|
|
}
|
|
|
|
|
2015-02-04 18:08:21 -08:00
|
|
|
bool
|
|
|
|
backend_reg::is_negative_one() const
|
|
|
|
{
|
|
|
|
if (file != IMM)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
switch (type) {
|
|
|
|
case BRW_REGISTER_TYPE_F:
|
2015-10-24 14:55:57 -07:00
|
|
|
return f == -1.0;
|
2015-02-04 18:08:21 -08:00
|
|
|
case BRW_REGISTER_TYPE_D:
|
2015-10-24 14:55:57 -07:00
|
|
|
return d == -1;
|
2015-02-04 18:08:21 -08:00
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-06-29 15:35:58 -07:00
|
|
|
bool
|
|
|
|
backend_reg::is_null() const
|
|
|
|
{
|
2015-10-26 17:52:57 -07:00
|
|
|
return file == ARF && nr == BRW_ARF_NULL;
|
2014-06-29 15:35:58 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
bool
|
|
|
|
backend_reg::is_accumulator() const
|
|
|
|
{
|
2015-10-26 17:52:57 -07:00
|
|
|
return file == ARF && nr == BRW_ARF_ACCUMULATOR;
|
2014-06-29 15:35:58 -07:00
|
|
|
}
|
|
|
|
|
2015-03-18 19:35:31 +02:00
|
|
|
bool
|
|
|
|
backend_reg::in_range(const backend_reg &r, unsigned n) const
|
|
|
|
{
|
|
|
|
return (file == r.file &&
|
2015-10-26 04:35:14 -07:00
|
|
|
nr == r.nr &&
|
2015-03-18 19:35:31 +02:00
|
|
|
reg_offset >= r.reg_offset &&
|
|
|
|
reg_offset < r.reg_offset + n);
|
|
|
|
}
|
|
|
|
|
2015-03-13 14:34:06 -07:00
|
|
|
bool
|
|
|
|
backend_instruction::is_commutative() const
|
|
|
|
{
|
|
|
|
switch (opcode) {
|
|
|
|
case BRW_OPCODE_AND:
|
|
|
|
case BRW_OPCODE_OR:
|
|
|
|
case BRW_OPCODE_XOR:
|
|
|
|
case BRW_OPCODE_ADD:
|
|
|
|
case BRW_OPCODE_MUL:
|
2015-08-04 19:04:55 +03:00
|
|
|
case SHADER_OPCODE_MULH:
|
2015-03-13 14:34:06 -07:00
|
|
|
return true;
|
|
|
|
case BRW_OPCODE_SEL:
|
|
|
|
/* MIN and MAX are commutative. */
|
|
|
|
if (conditional_mod == BRW_CONDITIONAL_GE ||
|
|
|
|
conditional_mod == BRW_CONDITIONAL_L) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
/* fallthrough */
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-12-29 19:29:21 -08:00
|
|
|
bool
|
|
|
|
backend_instruction::is_3src() const
|
|
|
|
{
|
2015-10-08 14:19:10 -07:00
|
|
|
return ::is_3src(opcode);
|
2014-12-29 19:29:21 -08:00
|
|
|
}
|
|
|
|
|
2013-04-28 01:35:57 -07:00
|
|
|
bool
|
2014-02-27 15:44:45 -08:00
|
|
|
backend_instruction::is_tex() const
|
2013-04-28 01:35:57 -07:00
|
|
|
{
|
|
|
|
return (opcode == SHADER_OPCODE_TEX ||
|
|
|
|
opcode == FS_OPCODE_TXB ||
|
|
|
|
opcode == SHADER_OPCODE_TXD ||
|
|
|
|
opcode == SHADER_OPCODE_TXF ||
|
2013-12-10 16:36:31 +02:00
|
|
|
opcode == SHADER_OPCODE_TXF_CMS ||
|
2015-09-08 15:52:09 +01:00
|
|
|
opcode == SHADER_OPCODE_TXF_CMS_W ||
|
2013-12-10 16:38:15 +02:00
|
|
|
opcode == SHADER_OPCODE_TXF_UMS ||
|
2013-11-30 10:32:16 +13:00
|
|
|
opcode == SHADER_OPCODE_TXF_MCS ||
|
2013-04-28 01:35:57 -07:00
|
|
|
opcode == SHADER_OPCODE_TXL ||
|
|
|
|
opcode == SHADER_OPCODE_TXS ||
|
2013-03-31 21:31:12 +13:00
|
|
|
opcode == SHADER_OPCODE_LOD ||
|
2013-10-08 21:42:10 +13:00
|
|
|
opcode == SHADER_OPCODE_TG4 ||
|
|
|
|
opcode == SHADER_OPCODE_TG4_OFFSET);
|
2013-04-28 01:35:57 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
bool
|
2014-02-27 15:44:45 -08:00
|
|
|
backend_instruction::is_math() const
|
2013-04-28 01:35:57 -07:00
|
|
|
{
|
|
|
|
return (opcode == SHADER_OPCODE_RCP ||
|
|
|
|
opcode == SHADER_OPCODE_RSQ ||
|
|
|
|
opcode == SHADER_OPCODE_SQRT ||
|
|
|
|
opcode == SHADER_OPCODE_EXP2 ||
|
|
|
|
opcode == SHADER_OPCODE_LOG2 ||
|
|
|
|
opcode == SHADER_OPCODE_SIN ||
|
|
|
|
opcode == SHADER_OPCODE_COS ||
|
|
|
|
opcode == SHADER_OPCODE_INT_QUOTIENT ||
|
|
|
|
opcode == SHADER_OPCODE_INT_REMAINDER ||
|
|
|
|
opcode == SHADER_OPCODE_POW);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool
|
2014-02-27 15:44:45 -08:00
|
|
|
backend_instruction::is_control_flow() const
|
2013-04-28 01:35:57 -07:00
|
|
|
{
|
|
|
|
switch (opcode) {
|
|
|
|
case BRW_OPCODE_DO:
|
|
|
|
case BRW_OPCODE_WHILE:
|
|
|
|
case BRW_OPCODE_IF:
|
|
|
|
case BRW_OPCODE_ELSE:
|
|
|
|
case BRW_OPCODE_ENDIF:
|
|
|
|
case BRW_OPCODE_BREAK:
|
|
|
|
case BRW_OPCODE_CONTINUE:
|
|
|
|
return true;
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
2013-04-29 14:21:14 -07:00
|
|
|
|
2013-09-19 19:48:22 -07:00
|
|
|
bool
|
2014-02-27 15:44:45 -08:00
|
|
|
backend_instruction::can_do_source_mods() const
|
2013-09-19 19:48:22 -07:00
|
|
|
{
|
|
|
|
switch (opcode) {
|
|
|
|
case BRW_OPCODE_ADDC:
|
|
|
|
case BRW_OPCODE_BFE:
|
|
|
|
case BRW_OPCODE_BFI1:
|
|
|
|
case BRW_OPCODE_BFI2:
|
|
|
|
case BRW_OPCODE_BFREV:
|
|
|
|
case BRW_OPCODE_CBIT:
|
|
|
|
case BRW_OPCODE_FBH:
|
|
|
|
case BRW_OPCODE_FBL:
|
|
|
|
case BRW_OPCODE_SUBB:
|
|
|
|
return false;
|
|
|
|
default:
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-12-11 23:07:49 -08:00
|
|
|
bool
|
2014-02-27 15:44:45 -08:00
|
|
|
backend_instruction::can_do_saturate() const
|
2013-12-11 23:07:49 -08:00
|
|
|
{
|
|
|
|
switch (opcode) {
|
|
|
|
case BRW_OPCODE_ADD:
|
|
|
|
case BRW_OPCODE_ASR:
|
|
|
|
case BRW_OPCODE_AVG:
|
|
|
|
case BRW_OPCODE_DP2:
|
|
|
|
case BRW_OPCODE_DP3:
|
|
|
|
case BRW_OPCODE_DP4:
|
|
|
|
case BRW_OPCODE_DPH:
|
|
|
|
case BRW_OPCODE_F16TO32:
|
|
|
|
case BRW_OPCODE_F32TO16:
|
|
|
|
case BRW_OPCODE_LINE:
|
|
|
|
case BRW_OPCODE_LRP:
|
|
|
|
case BRW_OPCODE_MAC:
|
|
|
|
case BRW_OPCODE_MAD:
|
|
|
|
case BRW_OPCODE_MATH:
|
|
|
|
case BRW_OPCODE_MOV:
|
|
|
|
case BRW_OPCODE_MUL:
|
2015-08-04 19:04:55 +03:00
|
|
|
case SHADER_OPCODE_MULH:
|
2013-12-11 23:07:49 -08:00
|
|
|
case BRW_OPCODE_PLN:
|
|
|
|
case BRW_OPCODE_RNDD:
|
|
|
|
case BRW_OPCODE_RNDE:
|
|
|
|
case BRW_OPCODE_RNDU:
|
|
|
|
case BRW_OPCODE_RNDZ:
|
|
|
|
case BRW_OPCODE_SEL:
|
|
|
|
case BRW_OPCODE_SHL:
|
|
|
|
case BRW_OPCODE_SHR:
|
|
|
|
case FS_OPCODE_LINTERP:
|
|
|
|
case SHADER_OPCODE_COS:
|
|
|
|
case SHADER_OPCODE_EXP2:
|
|
|
|
case SHADER_OPCODE_LOG2:
|
|
|
|
case SHADER_OPCODE_POW:
|
|
|
|
case SHADER_OPCODE_RCP:
|
|
|
|
case SHADER_OPCODE_RSQ:
|
|
|
|
case SHADER_OPCODE_SIN:
|
|
|
|
case SHADER_OPCODE_SQRT:
|
|
|
|
return true;
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-08-24 14:01:48 -07:00
|
|
|
bool
|
|
|
|
backend_instruction::can_do_cmod() const
|
|
|
|
{
|
|
|
|
switch (opcode) {
|
|
|
|
case BRW_OPCODE_ADD:
|
|
|
|
case BRW_OPCODE_ADDC:
|
|
|
|
case BRW_OPCODE_AND:
|
|
|
|
case BRW_OPCODE_ASR:
|
|
|
|
case BRW_OPCODE_AVG:
|
|
|
|
case BRW_OPCODE_CMP:
|
|
|
|
case BRW_OPCODE_CMPN:
|
|
|
|
case BRW_OPCODE_DP2:
|
|
|
|
case BRW_OPCODE_DP3:
|
|
|
|
case BRW_OPCODE_DP4:
|
|
|
|
case BRW_OPCODE_DPH:
|
|
|
|
case BRW_OPCODE_F16TO32:
|
|
|
|
case BRW_OPCODE_F32TO16:
|
|
|
|
case BRW_OPCODE_FRC:
|
|
|
|
case BRW_OPCODE_LINE:
|
|
|
|
case BRW_OPCODE_LRP:
|
|
|
|
case BRW_OPCODE_LZD:
|
|
|
|
case BRW_OPCODE_MAC:
|
|
|
|
case BRW_OPCODE_MACH:
|
|
|
|
case BRW_OPCODE_MAD:
|
|
|
|
case BRW_OPCODE_MOV:
|
|
|
|
case BRW_OPCODE_MUL:
|
|
|
|
case BRW_OPCODE_NOT:
|
|
|
|
case BRW_OPCODE_OR:
|
|
|
|
case BRW_OPCODE_PLN:
|
|
|
|
case BRW_OPCODE_RNDD:
|
|
|
|
case BRW_OPCODE_RNDE:
|
|
|
|
case BRW_OPCODE_RNDU:
|
|
|
|
case BRW_OPCODE_RNDZ:
|
|
|
|
case BRW_OPCODE_SAD2:
|
|
|
|
case BRW_OPCODE_SADA2:
|
|
|
|
case BRW_OPCODE_SHL:
|
|
|
|
case BRW_OPCODE_SHR:
|
|
|
|
case BRW_OPCODE_SUBB:
|
|
|
|
case BRW_OPCODE_XOR:
|
2015-01-23 21:58:51 -08:00
|
|
|
case FS_OPCODE_CINTERP:
|
|
|
|
case FS_OPCODE_LINTERP:
|
2014-08-24 14:01:48 -07:00
|
|
|
return true;
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-04-09 12:01:49 -07:00
|
|
|
bool
|
|
|
|
backend_instruction::reads_accumulator_implicitly() const
|
|
|
|
{
|
|
|
|
switch (opcode) {
|
|
|
|
case BRW_OPCODE_MAC:
|
|
|
|
case BRW_OPCODE_MACH:
|
|
|
|
case BRW_OPCODE_SADA2:
|
|
|
|
return true;
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-05-07 09:58:43 +02:00
|
|
|
bool
|
2015-04-17 12:15:58 -07:00
|
|
|
backend_instruction::writes_accumulator_implicitly(const struct brw_device_info *devinfo) const
|
2014-05-07 09:58:43 +02:00
|
|
|
{
|
|
|
|
return writes_accumulator ||
|
2015-04-17 12:15:58 -07:00
|
|
|
(devinfo->gen < 6 &&
|
2014-05-07 09:58:43 +02:00
|
|
|
((opcode >= BRW_OPCODE_ADD && opcode < BRW_OPCODE_NOP) ||
|
2014-11-08 01:39:14 -08:00
|
|
|
(opcode >= FS_OPCODE_DDX_COARSE && opcode <= FS_OPCODE_LINTERP &&
|
2014-05-07 09:58:43 +02:00
|
|
|
opcode != FS_OPCODE_CINTERP)));
|
|
|
|
}
|
|
|
|
|
2013-10-20 14:02:08 -07:00
|
|
|
bool
|
|
|
|
backend_instruction::has_side_effects() const
|
|
|
|
{
|
|
|
|
switch (opcode) {
|
|
|
|
case SHADER_OPCODE_UNTYPED_ATOMIC:
|
2015-07-21 18:45:32 +03:00
|
|
|
case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
|
2015-02-28 13:36:21 -08:00
|
|
|
case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
|
2015-04-23 14:24:14 +03:00
|
|
|
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
|
2015-07-21 18:45:32 +03:00
|
|
|
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
|
2015-04-23 14:28:25 +03:00
|
|
|
case SHADER_OPCODE_TYPED_ATOMIC:
|
2015-07-21 18:45:32 +03:00
|
|
|
case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
|
2015-04-23 14:28:25 +03:00
|
|
|
case SHADER_OPCODE_TYPED_SURFACE_WRITE:
|
2015-07-21 18:45:32 +03:00
|
|
|
case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
|
2015-04-23 14:30:28 +03:00
|
|
|
case SHADER_OPCODE_MEMORY_FENCE:
|
2014-10-20 23:00:50 -07:00
|
|
|
case SHADER_OPCODE_URB_WRITE_SIMD8:
|
2015-05-06 00:04:10 -07:00
|
|
|
case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT:
|
|
|
|
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:
|
|
|
|
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
|
2014-09-12 16:17:37 -07:00
|
|
|
case FS_OPCODE_FB_WRITE:
|
2014-08-27 11:32:08 -07:00
|
|
|
case SHADER_OPCODE_BARRIER:
|
2013-10-20 14:02:08 -07:00
|
|
|
return true;
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-10-19 23:13:09 -07:00
|
|
|
bool
|
|
|
|
backend_instruction::is_volatile() const
|
|
|
|
{
|
|
|
|
switch (opcode) {
|
|
|
|
case SHADER_OPCODE_UNTYPED_SURFACE_READ:
|
|
|
|
case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
|
|
|
|
case SHADER_OPCODE_TYPED_SURFACE_READ:
|
|
|
|
case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
|
|
|
|
return true;
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-07-12 21:16:34 -07:00
|
|
|
#ifndef NDEBUG
|
|
|
|
static bool
|
|
|
|
inst_is_in_block(const bblock_t *block, const backend_instruction *inst)
|
|
|
|
{
|
|
|
|
bool found = false;
|
|
|
|
foreach_inst_in_block (backend_instruction, i, block) {
|
|
|
|
if (inst == i) {
|
|
|
|
found = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return found;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
static void
|
|
|
|
adjust_later_block_ips(bblock_t *start_block, int ip_adjustment)
|
|
|
|
{
|
2014-09-02 21:07:51 -07:00
|
|
|
for (bblock_t *block_iter = start_block->next();
|
2014-07-12 21:16:34 -07:00
|
|
|
!block_iter->link.is_tail_sentinel();
|
2014-09-02 21:07:51 -07:00
|
|
|
block_iter = block_iter->next()) {
|
2014-07-12 21:16:34 -07:00
|
|
|
block_iter->start_ip += ip_adjustment;
|
|
|
|
block_iter->end_ip += ip_adjustment;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-07-12 21:18:08 -07:00
|
|
|
void
|
|
|
|
backend_instruction::insert_after(bblock_t *block, backend_instruction *inst)
|
|
|
|
{
|
2015-02-17 18:01:41 -08:00
|
|
|
if (!this->is_head_sentinel())
|
|
|
|
assert(inst_is_in_block(block, this) || !"Instruction not in block");
|
2014-07-12 21:18:08 -07:00
|
|
|
|
|
|
|
block->end_ip++;
|
|
|
|
|
|
|
|
adjust_later_block_ips(block, 1);
|
|
|
|
|
|
|
|
exec_node::insert_after(inst);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
backend_instruction::insert_before(bblock_t *block, backend_instruction *inst)
|
|
|
|
{
|
2015-02-17 18:01:41 -08:00
|
|
|
if (!this->is_tail_sentinel())
|
|
|
|
assert(inst_is_in_block(block, this) || !"Instruction not in block");
|
2014-07-12 21:18:08 -07:00
|
|
|
|
|
|
|
block->end_ip++;
|
|
|
|
|
|
|
|
adjust_later_block_ips(block, 1);
|
|
|
|
|
|
|
|
exec_node::insert_before(inst);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
backend_instruction::insert_before(bblock_t *block, exec_list *list)
|
|
|
|
{
|
|
|
|
assert(inst_is_in_block(block, this) || !"Instruction not in block");
|
|
|
|
|
|
|
|
unsigned num_inst = list->length();
|
|
|
|
|
|
|
|
block->end_ip += num_inst;
|
|
|
|
|
|
|
|
adjust_later_block_ips(block, num_inst);
|
|
|
|
|
|
|
|
exec_node::insert_before(list);
|
|
|
|
}
|
|
|
|
|
2014-07-12 21:16:34 -07:00
|
|
|
void
|
|
|
|
backend_instruction::remove(bblock_t *block)
|
|
|
|
{
|
|
|
|
assert(inst_is_in_block(block, this) || !"Instruction not in block");
|
|
|
|
|
|
|
|
adjust_later_block_ips(block, -1);
|
|
|
|
|
|
|
|
if (block->start_ip == block->end_ip) {
|
|
|
|
block->cfg->remove_block(block);
|
|
|
|
} else {
|
|
|
|
block->end_ip--;
|
|
|
|
}
|
|
|
|
|
|
|
|
exec_node::remove();
|
|
|
|
}
|
|
|
|
|
2013-04-29 14:21:14 -07:00
|
|
|
void
|
2015-05-20 09:44:01 -07:00
|
|
|
backend_shader::dump_instructions()
|
2013-04-29 14:21:14 -07:00
|
|
|
{
|
2014-05-29 13:08:59 -07:00
|
|
|
dump_instructions(NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2015-05-20 09:44:01 -07:00
|
|
|
backend_shader::dump_instructions(const char *name)
|
2014-05-29 13:08:59 -07:00
|
|
|
{
|
|
|
|
FILE *file = stderr;
|
|
|
|
if (name && geteuid() != 0) {
|
|
|
|
file = fopen(name, "w");
|
|
|
|
if (!file)
|
|
|
|
file = stderr;
|
|
|
|
}
|
|
|
|
|
2015-02-13 10:46:32 -08:00
|
|
|
if (cfg) {
|
|
|
|
int ip = 0;
|
|
|
|
foreach_block_and_inst(block, backend_instruction, inst, cfg) {
|
2015-09-26 14:40:09 -07:00
|
|
|
if (!unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER))
|
|
|
|
fprintf(file, "%4d: ", ip++);
|
2015-02-13 10:46:32 -08:00
|
|
|
dump_instruction(inst, file);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
int ip = 0;
|
|
|
|
foreach_in_list(backend_instruction, inst, &instructions) {
|
2015-09-26 14:40:09 -07:00
|
|
|
if (!unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER))
|
|
|
|
fprintf(file, "%4d: ", ip++);
|
2015-02-13 10:46:32 -08:00
|
|
|
dump_instruction(inst, file);
|
|
|
|
}
|
2014-05-29 13:08:59 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
if (file != stderr) {
|
|
|
|
fclose(file);
|
2013-04-29 14:21:14 -07:00
|
|
|
}
|
|
|
|
}
|
2013-10-03 09:58:43 -07:00
|
|
|
|
2014-07-11 20:54:52 -07:00
|
|
|
void
|
2015-05-20 09:44:01 -07:00
|
|
|
backend_shader::calculate_cfg()
|
2014-07-11 20:54:52 -07:00
|
|
|
{
|
|
|
|
if (this->cfg)
|
|
|
|
return;
|
|
|
|
cfg = new(mem_ctx) cfg_t(&this->instructions);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2015-05-20 09:44:01 -07:00
|
|
|
backend_shader::invalidate_cfg()
|
2014-07-11 20:54:52 -07:00
|
|
|
{
|
|
|
|
ralloc_free(this->cfg);
|
|
|
|
this->cfg = NULL;
|
|
|
|
}
|
2013-10-03 09:58:43 -07:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Sets up the starting offsets for the groups of binding table entries
|
|
|
|
* commong to all pipeline stages.
|
|
|
|
*
|
|
|
|
* Unused groups are initialized to 0xd0d0d0d0 to make it obvious that they're
|
|
|
|
* unused but also make sure that addition of small offsets to them will
|
|
|
|
* trigger some of our asserts that surface indices are < BRW_MAX_SURFACES.
|
|
|
|
*/
|
|
|
|
void
|
2015-10-01 08:30:56 -07:00
|
|
|
brw_assign_common_binding_table_offsets(gl_shader_stage stage,
|
|
|
|
const struct brw_device_info *devinfo,
|
|
|
|
const struct gl_shader_program *shader_prog,
|
|
|
|
const struct gl_program *prog,
|
|
|
|
struct brw_stage_prog_data *stage_prog_data,
|
|
|
|
uint32_t next_binding_table_offset)
|
2013-10-03 09:58:43 -07:00
|
|
|
{
|
2015-10-01 08:30:56 -07:00
|
|
|
const struct gl_shader *shader = NULL;
|
2013-10-03 09:58:43 -07:00
|
|
|
int num_textures = _mesa_fls(prog->SamplersUsed);
|
|
|
|
|
2015-10-01 08:30:56 -07:00
|
|
|
if (shader_prog)
|
|
|
|
shader = shader_prog->_LinkedShaders[stage];
|
|
|
|
|
2013-10-03 09:58:43 -07:00
|
|
|
stage_prog_data->binding_table.texture_start = next_binding_table_offset;
|
|
|
|
next_binding_table_offset += num_textures;
|
|
|
|
|
|
|
|
if (shader) {
|
2015-10-09 14:41:21 +02:00
|
|
|
assert(shader->NumUniformBlocks <= BRW_MAX_UBO);
|
2013-10-03 09:58:43 -07:00
|
|
|
stage_prog_data->binding_table.ubo_start = next_binding_table_offset;
|
2015-10-09 14:41:21 +02:00
|
|
|
next_binding_table_offset += shader->NumUniformBlocks;
|
|
|
|
|
|
|
|
assert(shader->NumShaderStorageBlocks <= BRW_MAX_SSBO);
|
|
|
|
stage_prog_data->binding_table.ssbo_start = next_binding_table_offset;
|
|
|
|
next_binding_table_offset += shader->NumShaderStorageBlocks;
|
2013-10-03 09:58:43 -07:00
|
|
|
} else {
|
|
|
|
stage_prog_data->binding_table.ubo_start = 0xd0d0d0d0;
|
2015-10-09 14:41:21 +02:00
|
|
|
stage_prog_data->binding_table.ssbo_start = 0xd0d0d0d0;
|
2013-10-03 09:58:43 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
|
|
|
|
stage_prog_data->binding_table.shader_time_start = next_binding_table_offset;
|
|
|
|
next_binding_table_offset++;
|
|
|
|
} else {
|
|
|
|
stage_prog_data->binding_table.shader_time_start = 0xd0d0d0d0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (prog->UsesGather) {
|
2015-04-15 18:00:05 -07:00
|
|
|
if (devinfo->gen >= 8) {
|
2014-05-29 00:06:08 -07:00
|
|
|
stage_prog_data->binding_table.gather_texture_start =
|
|
|
|
stage_prog_data->binding_table.texture_start;
|
|
|
|
} else {
|
|
|
|
stage_prog_data->binding_table.gather_texture_start = next_binding_table_offset;
|
|
|
|
next_binding_table_offset += num_textures;
|
|
|
|
}
|
2013-10-03 09:58:43 -07:00
|
|
|
} else {
|
|
|
|
stage_prog_data->binding_table.gather_texture_start = 0xd0d0d0d0;
|
|
|
|
}
|
|
|
|
|
2015-10-27 06:58:15 +11:00
|
|
|
if (shader && shader->NumAtomicBuffers) {
|
2013-10-20 13:09:57 -07:00
|
|
|
stage_prog_data->binding_table.abo_start = next_binding_table_offset;
|
2015-10-27 06:58:15 +11:00
|
|
|
next_binding_table_offset += shader->NumAtomicBuffers;
|
2013-10-20 13:09:57 -07:00
|
|
|
} else {
|
|
|
|
stage_prog_data->binding_table.abo_start = 0xd0d0d0d0;
|
|
|
|
}
|
|
|
|
|
2015-10-01 08:30:56 -07:00
|
|
|
if (shader && shader->NumImages) {
|
2013-11-22 16:08:12 -08:00
|
|
|
stage_prog_data->binding_table.image_start = next_binding_table_offset;
|
2015-10-01 08:30:56 -07:00
|
|
|
next_binding_table_offset += shader->NumImages;
|
2013-11-22 16:08:12 -08:00
|
|
|
} else {
|
|
|
|
stage_prog_data->binding_table.image_start = 0xd0d0d0d0;
|
|
|
|
}
|
|
|
|
|
2013-10-03 09:58:43 -07:00
|
|
|
/* This may or may not be used depending on how the compile goes. */
|
|
|
|
stage_prog_data->binding_table.pull_constants_start = next_binding_table_offset;
|
|
|
|
next_binding_table_offset++;
|
|
|
|
|
|
|
|
assert(next_binding_table_offset <= BRW_MAX_SURFACES);
|
|
|
|
|
2013-11-26 19:56:07 -08:00
|
|
|
/* prog_data->base.binding_table.size will be set by brw_mark_surface_used. */
|
2013-10-03 09:58:43 -07:00
|
|
|
}
|
2015-05-05 21:07:15 +03:00
|
|
|
|
2015-09-30 11:46:36 -07:00
|
|
|
static void
|
|
|
|
setup_vec4_uniform_value(const gl_constant_value **params,
|
|
|
|
const gl_constant_value *values,
|
|
|
|
unsigned n)
|
|
|
|
{
|
|
|
|
static const gl_constant_value zero = { 0 };
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < n; ++i)
|
|
|
|
params[i] = &values[i];
|
|
|
|
|
|
|
|
for (unsigned i = n; i < 4; ++i)
|
|
|
|
params[i] = &zero;
|
|
|
|
}
|
|
|
|
|
2015-05-05 21:07:15 +03:00
|
|
|
void
|
2015-09-30 11:46:36 -07:00
|
|
|
brw_setup_image_uniform_values(gl_shader_stage stage,
|
|
|
|
struct brw_stage_prog_data *stage_prog_data,
|
|
|
|
unsigned param_start_index,
|
|
|
|
const gl_uniform_storage *storage)
|
2015-05-05 21:07:15 +03:00
|
|
|
{
|
2015-09-30 11:46:36 -07:00
|
|
|
const gl_constant_value **param =
|
|
|
|
&stage_prog_data->param[param_start_index];
|
2015-05-05 21:07:15 +03:00
|
|
|
|
|
|
|
for (unsigned i = 0; i < MAX2(storage->array_elements, 1); i++) {
|
2015-09-30 11:00:02 +10:00
|
|
|
const unsigned image_idx = storage->opaque[stage].index + i;
|
2015-09-30 11:46:36 -07:00
|
|
|
const brw_image_param *image_param =
|
|
|
|
&stage_prog_data->image_param[image_idx];
|
2015-05-05 21:07:15 +03:00
|
|
|
|
|
|
|
/* Upload the brw_image_param structure. The order is expected to match
|
|
|
|
* the BRW_IMAGE_PARAM_*_OFFSET defines.
|
|
|
|
*/
|
2015-09-30 11:46:36 -07:00
|
|
|
setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET,
|
|
|
|
(const gl_constant_value *)&image_param->surface_idx, 1);
|
|
|
|
setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET,
|
|
|
|
(const gl_constant_value *)image_param->offset, 2);
|
|
|
|
setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SIZE_OFFSET,
|
|
|
|
(const gl_constant_value *)image_param->size, 3);
|
|
|
|
setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_STRIDE_OFFSET,
|
|
|
|
(const gl_constant_value *)image_param->stride, 4);
|
|
|
|
setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_TILING_OFFSET,
|
|
|
|
(const gl_constant_value *)image_param->tiling, 3);
|
|
|
|
setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET,
|
|
|
|
(const gl_constant_value *)image_param->swizzling, 2);
|
|
|
|
param += BRW_IMAGE_PARAM_SIZE;
|
2015-05-05 21:07:15 +03:00
|
|
|
|
|
|
|
brw_mark_surface_used(
|
|
|
|
stage_prog_data,
|
|
|
|
stage_prog_data->binding_table.image_start + image_idx);
|
|
|
|
}
|
|
|
|
}
|
2015-10-07 04:19:39 -07:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Decide which set of clip planes should be used when clipping via
|
|
|
|
* gl_Position or gl_ClipVertex.
|
|
|
|
*/
|
|
|
|
gl_clip_plane *brw_select_clip_planes(struct gl_context *ctx)
|
|
|
|
{
|
|
|
|
if (ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX]) {
|
|
|
|
/* There is currently a GLSL vertex shader, so clip according to GLSL
|
|
|
|
* rules, which means compare gl_ClipVertex (or gl_Position, if
|
|
|
|
* gl_ClipVertex wasn't assigned) against the eye-coordinate clip planes
|
|
|
|
* that were stored in EyeUserPlane at the time the clip planes were
|
|
|
|
* specified.
|
|
|
|
*/
|
|
|
|
return ctx->Transform.EyeUserPlane;
|
|
|
|
} else {
|
|
|
|
/* Either we are using fixed function or an ARB vertex program. In
|
|
|
|
* either case the clip planes are going to be compared against
|
|
|
|
* gl_Position (which is in clip coordinates) so we have to clip using
|
|
|
|
* _ClipUserPlane, which was transformed into clip coordinates by Mesa
|
|
|
|
* core.
|
|
|
|
*/
|
|
|
|
return ctx->Transform._ClipUserPlane;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-11-10 14:35:27 -08:00
|
|
|
extern "C" const unsigned *
|
|
|
|
brw_compile_tes(const struct brw_compiler *compiler,
|
|
|
|
void *log_data,
|
|
|
|
void *mem_ctx,
|
|
|
|
const struct brw_tes_prog_key *key,
|
|
|
|
struct brw_tes_prog_data *prog_data,
|
|
|
|
const nir_shader *src_shader,
|
|
|
|
struct gl_shader_program *shader_prog,
|
|
|
|
int shader_time_index,
|
|
|
|
unsigned *final_assembly_size,
|
|
|
|
char **error_str)
|
|
|
|
{
|
|
|
|
const struct brw_device_info *devinfo = compiler->devinfo;
|
|
|
|
struct gl_shader *shader =
|
|
|
|
shader_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL];
|
|
|
|
const bool is_scalar = compiler->scalar_stage[MESA_SHADER_TESS_EVAL];
|
|
|
|
|
|
|
|
nir_shader *nir = nir_shader_clone(mem_ctx, src_shader);
|
|
|
|
nir = brw_nir_apply_sampler_key(nir, devinfo, &key->tex, is_scalar);
|
i965: Handle mix-and-match TCS/TES with separate shader objects.
GL_ARB_separate_shader_objects allows the application to mix-and-match
TCS and TES programs separately. This means that the interface between
the two stages isn't known until the final SSO pipeline is in place.
This isn't a great match for our hardware: the TCS and TES have to agree
on the Patch URB entry layout. Since we store data as per-patch slots
followed by per-vertex slots, changing the number of per-patch slots can
significantly alter the layout. This can easily happen with SSO.
To handle this, we store the [Patch]OutputsWritten and [Patch]InputsRead
bitfields in the TCS/TES program keys, introducing program recompiles.
brw_upload_programs() decides the layout for both TCS and TES, and
passes it to brw_upload_tcs/tes(), which store it in the key.
When creating the NIR for a shader specialization, we override
nir->info.inputs_read (and friends) to the program key's values.
Since everything uses those, no further compiler changes are needed.
This also replaces the hack in brw_create_nir().
To avoid recompiles, brw_precompile_tes() looks to see if there's a
TCS in the linked shader. If so, it accounts for the TCS outputs,
just as brw_upload_programs() would. This eliminates all recompiles
in the non-SSO case. In the SSO case, there should only be recompiles
when using a TCS and TES that have different input/output interfaces.
Fixes Piglit's mix-and-match-tcs-tes test.
v2: Pull the brw_upload_programs code into a brw_upload_tess_programs()
helper function (requested by Jordan Justen).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
2015-12-07 20:18:42 -08:00
|
|
|
nir->info.inputs_read = key->inputs_read;
|
|
|
|
nir->info.patch_inputs_read = key->patch_inputs_read;
|
i965: Defer input lowering for tessellation stages until specialization.
With tessellation shaders and SSO, we won't be able to always decide on
VUE map layouts at LinkProgram time. Unfortunately, we have to delay it
until shader specialization time.
However, uniform lowering cannot be deferred - brw_codegen_*_prog()
reads nir->num_uniforms. Fortunately, we don't need to defer it -
uniform, system value, atomic, and sampler lowering can safely stay
where it is. This patch moves those to brw_lower_nir()'s only caller,
renames brw_lower_nir() to brw_nir_lower_io(), and introduces calls
to that.
For non-tessellation stages, I chose to call brw_nir_lower_io() from
brw_create_nir(), so it's still done at the same time. There's no
need to defer it, and doing it at LinkProgram time is nice.
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
2015-12-07 17:58:35 -08:00
|
|
|
nir = brw_nir_lower_io(nir, compiler->devinfo, is_scalar);
|
2015-11-10 14:35:27 -08:00
|
|
|
nir = brw_postprocess_nir(nir, compiler->devinfo, is_scalar);
|
|
|
|
|
|
|
|
brw_compute_vue_map(devinfo, &prog_data->base.vue_map,
|
|
|
|
nir->info.outputs_written,
|
|
|
|
nir->info.separate_shader);
|
|
|
|
|
|
|
|
unsigned output_size_bytes = prog_data->base.vue_map.num_slots * 4 * 4;
|
|
|
|
|
|
|
|
assert(output_size_bytes >= 1);
|
|
|
|
if (output_size_bytes > GEN7_MAX_DS_URB_ENTRY_SIZE_BYTES) {
|
|
|
|
if (error_str)
|
|
|
|
*error_str = ralloc_strdup(mem_ctx, "DS outputs exceed maximum size");
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* URB entry sizes are stored as a multiple of 64 bytes. */
|
|
|
|
prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
|
|
|
|
|
|
|
|
struct brw_vue_map input_vue_map;
|
|
|
|
brw_compute_tess_vue_map(&input_vue_map,
|
|
|
|
nir->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID,
|
|
|
|
nir->info.patch_inputs_read);
|
|
|
|
|
|
|
|
bool need_patch_header = nir->info.system_values_read &
|
|
|
|
(BITFIELD64_BIT(SYSTEM_VALUE_TESS_LEVEL_OUTER) |
|
|
|
|
BITFIELD64_BIT(SYSTEM_VALUE_TESS_LEVEL_INNER));
|
|
|
|
|
|
|
|
/* The TES will pull most inputs using URB read messages.
|
|
|
|
*
|
|
|
|
* However, we push the patch header for TessLevel factors when required,
|
|
|
|
* as it's a tiny amount of extra data.
|
|
|
|
*/
|
|
|
|
prog_data->base.urb_read_length = need_patch_header ? 1 : 0;
|
|
|
|
|
|
|
|
if (unlikely(INTEL_DEBUG & DEBUG_TES)) {
|
|
|
|
fprintf(stderr, "TES Input ");
|
|
|
|
brw_print_vue_map(stderr, &input_vue_map);
|
|
|
|
fprintf(stderr, "TES Output ");
|
|
|
|
brw_print_vue_map(stderr, &prog_data->base.vue_map);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (is_scalar) {
|
|
|
|
fs_visitor v(compiler, log_data, mem_ctx, (void *) key,
|
|
|
|
&prog_data->base.base, shader->Program, nir, 8,
|
|
|
|
shader_time_index, &input_vue_map);
|
|
|
|
if (!v.run_tes()) {
|
|
|
|
if (error_str)
|
|
|
|
*error_str = ralloc_strdup(mem_ctx, v.fail_msg);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8;
|
|
|
|
|
|
|
|
fs_generator g(compiler, log_data, mem_ctx, (void *) key,
|
|
|
|
&prog_data->base.base, v.promoted_constants, false,
|
|
|
|
"TES");
|
|
|
|
if (unlikely(INTEL_DEBUG & DEBUG_TES)) {
|
|
|
|
g.enable_debug(ralloc_asprintf(mem_ctx,
|
|
|
|
"%s tessellation evaluation shader %s",
|
|
|
|
nir->info.label ? nir->info.label
|
|
|
|
: "unnamed",
|
|
|
|
nir->info.name));
|
|
|
|
}
|
|
|
|
|
|
|
|
g.generate_code(v.cfg, 8);
|
|
|
|
|
|
|
|
return g.get_assembly(final_assembly_size);
|
|
|
|
} else {
|
|
|
|
unreachable("XXX: vec4 tessellation evalation shaders not merged yet.");
|
|
|
|
}
|
|
|
|
}
|