2011-05-02 09:45:40 -07:00
|
|
|
/*
|
|
|
|
* Copyright © 2011 Intel Corporation
|
|
|
|
*
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
|
|
* to deal in the Software without restriction, including without limitation
|
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice (including the next
|
|
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
|
|
* Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
|
|
* IN THE SOFTWARE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef BRW_VEC4_H
|
|
|
|
#define BRW_VEC4_H
|
|
|
|
|
|
|
|
#include "brw_shader.h"
|
|
|
|
|
2013-08-21 07:53:42 -07:00
|
|
|
#ifdef __cplusplus
|
2015-02-06 01:11:18 +02:00
|
|
|
#include "brw_ir_vec4.h"
|
2020-03-26 14:59:02 -07:00
|
|
|
#include "brw_ir_performance.h"
|
2017-03-07 10:29:53 +01:00
|
|
|
#include "brw_vec4_builder.h"
|
2016-03-09 17:06:50 -08:00
|
|
|
#include "brw_vec4_live_variables.h"
|
2013-08-21 07:53:42 -07:00
|
|
|
#endif
|
2011-05-02 09:45:40 -07:00
|
|
|
|
2016-01-18 12:16:48 +02:00
|
|
|
#include "compiler/glsl/ir.h"
|
2016-01-18 12:54:03 +02:00
|
|
|
#include "compiler/nir/nir.h"
|
2011-05-02 09:45:40 -07:00
|
|
|
|
2013-08-14 17:20:04 -07:00
|
|
|
|
2013-08-21 07:53:42 -07:00
|
|
|
#ifdef __cplusplus
|
2013-08-14 17:20:04 -07:00
|
|
|
extern "C" {
|
|
|
|
#endif
|
|
|
|
|
2015-10-22 15:01:27 -07:00
|
|
|
const unsigned *
|
|
|
|
brw_vec4_generate_assembly(const struct brw_compiler *compiler,
|
|
|
|
void *log_data,
|
|
|
|
void *mem_ctx,
|
|
|
|
const nir_shader *nir,
|
|
|
|
struct brw_vue_prog_data *prog_data,
|
2019-04-23 23:19:56 -05:00
|
|
|
const struct cfg_t *cfg,
|
2020-03-26 16:27:32 -07:00
|
|
|
const brw::performance &perf,
|
2019-04-23 23:19:56 -05:00
|
|
|
struct brw_compile_stats *stats);
|
2015-10-22 15:01:27 -07:00
|
|
|
|
2013-08-14 17:20:04 -07:00
|
|
|
#ifdef __cplusplus
|
|
|
|
} /* extern "C" */
|
|
|
|
|
2011-05-02 09:45:40 -07:00
|
|
|
namespace brw {
|
2012-11-26 22:53:10 -08:00
|
|
|
/**
|
|
|
|
* The vertex shader front-end.
|
|
|
|
*
|
|
|
|
* Translates either GLSL IR or Mesa IR (for ARB_vertex_program and
|
|
|
|
* fixed-function) into VS IR.
|
|
|
|
*/
|
2015-09-21 11:03:29 -07:00
|
|
|
class vec4_visitor : public backend_shader
|
2011-05-02 09:45:40 -07:00
|
|
|
{
|
|
|
|
public:
|
2015-06-22 17:17:56 -07:00
|
|
|
vec4_visitor(const struct brw_compiler *compiler,
|
2015-06-29 21:58:47 -07:00
|
|
|
void *log_data,
|
2015-08-27 18:24:39 -07:00
|
|
|
const struct brw_sampler_prog_key_data *key,
|
2014-11-25 14:29:48 -08:00
|
|
|
struct brw_vue_prog_data *prog_data,
|
2015-10-05 19:26:02 -07:00
|
|
|
const nir_shader *shader,
|
2013-03-22 21:55:03 -07:00
|
|
|
void *mem_ctx,
|
2014-01-17 14:42:48 -08:00
|
|
|
bool no_spills,
|
2015-06-19 15:40:09 -07:00
|
|
|
int shader_time_index);
|
2011-05-02 09:45:40 -07:00
|
|
|
|
|
|
|
dst_reg dst_null_f()
|
|
|
|
{
|
|
|
|
return dst_reg(brw_null_reg());
|
|
|
|
}
|
|
|
|
|
2016-02-05 10:11:48 +01:00
|
|
|
dst_reg dst_null_df()
|
|
|
|
{
|
|
|
|
return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_DF));
|
|
|
|
}
|
|
|
|
|
2011-05-02 09:45:40 -07:00
|
|
|
dst_reg dst_null_d()
|
|
|
|
{
|
|
|
|
return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
|
|
|
|
}
|
|
|
|
|
2013-09-19 13:02:23 -07:00
|
|
|
dst_reg dst_null_ud()
|
|
|
|
{
|
|
|
|
return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
|
|
|
|
}
|
|
|
|
|
2015-08-27 23:55:28 -07:00
|
|
|
const struct brw_sampler_prog_key_data * const key_tex;
|
2014-11-25 14:29:48 -08:00
|
|
|
struct brw_vue_prog_data * const prog_data;
|
2011-05-02 09:45:40 -07:00
|
|
|
char *fail_msg;
|
|
|
|
bool failed;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* GLSL IR currently being processed, which is associated with our
|
|
|
|
* driver IR instructions for debugging purposes.
|
|
|
|
*/
|
2012-10-08 10:21:30 -07:00
|
|
|
const void *base_ir;
|
2011-05-02 09:45:40 -07:00
|
|
|
const char *current_annotation;
|
|
|
|
|
|
|
|
int first_non_payload_grf;
|
2012-01-27 12:59:24 -08:00
|
|
|
unsigned int max_grf;
|
2016-03-13 16:33:39 -07:00
|
|
|
BRW_ANALYSIS(live_analysis, brw::vec4_live_variables,
|
|
|
|
backend_shader *) live_analysis;
|
2020-03-26 14:59:02 -07:00
|
|
|
BRW_ANALYSIS(performance_analysis, brw::performance,
|
|
|
|
vec4_visitor *) performance_analysis;
|
2011-09-01 08:34:18 -07:00
|
|
|
|
2012-10-08 10:21:30 -07:00
|
|
|
bool need_all_constants_in_pull_buffer;
|
|
|
|
|
2011-05-02 09:45:40 -07:00
|
|
|
/* Regs for vertex results. Generated at ir_variable visiting time
|
|
|
|
* for the ir->location's used.
|
|
|
|
*/
|
2016-10-17 11:14:10 -07:00
|
|
|
dst_reg output_reg[VARYING_SLOT_TESS_MAX][4];
|
|
|
|
unsigned output_num_components[VARYING_SLOT_TESS_MAX][4];
|
|
|
|
const char *output_reg_annotation[VARYING_SLOT_TESS_MAX];
|
2011-05-04 12:50:16 -07:00
|
|
|
int uniforms;
|
2011-05-02 09:45:40 -07:00
|
|
|
|
2012-11-27 14:10:52 -08:00
|
|
|
src_reg shader_start_time;
|
|
|
|
|
2015-08-27 23:49:03 -07:00
|
|
|
bool run();
|
2011-05-02 09:45:40 -07:00
|
|
|
void fail(const char *msg, ...);
|
|
|
|
|
2011-05-04 12:50:16 -07:00
|
|
|
int setup_uniforms(int payload_reg);
|
2015-06-28 21:04:17 +03:00
|
|
|
|
2012-10-03 10:03:22 -07:00
|
|
|
bool reg_allocate_trivial();
|
|
|
|
bool reg_allocate();
|
2012-10-01 15:28:56 -07:00
|
|
|
void evaluate_spill_costs(float *spill_costs, bool *no_spill);
|
|
|
|
int choose_spill_reg(struct ra_graph *g);
|
2018-12-10 14:49:49 -08:00
|
|
|
void spill_reg(unsigned spill_reg);
|
2011-08-07 12:15:26 -07:00
|
|
|
void move_grf_array_access_to_scratch();
|
2011-08-22 10:35:24 -07:00
|
|
|
void move_uniform_array_access_to_pull_constants();
|
2011-09-06 22:32:33 -07:00
|
|
|
void move_push_constants_to_pull_constants();
|
2011-08-23 10:22:50 -07:00
|
|
|
void split_uniform_registers();
|
2011-08-23 12:13:14 -07:00
|
|
|
void pack_uniform_registers();
|
2016-03-12 18:50:24 -08:00
|
|
|
virtual void invalidate_analysis(brw::analysis_dependency_class c);
|
2012-10-03 10:04:22 -07:00
|
|
|
void split_virtual_grfs();
|
2014-12-20 11:50:31 -08:00
|
|
|
bool opt_vector_float();
|
2014-08-17 15:13:54 -07:00
|
|
|
bool opt_reduce_swizzle();
|
2011-08-17 10:50:17 -07:00
|
|
|
bool dead_code_eliminate();
|
2015-09-28 17:00:19 +02:00
|
|
|
bool opt_cmod_propagation();
|
2014-12-20 17:37:09 -08:00
|
|
|
bool opt_copy_propagation(bool do_constant_prop = true);
|
2016-03-13 16:33:39 -07:00
|
|
|
bool opt_cse_local(bblock_t *block, const vec4_live_variables &live);
|
2013-03-06 10:48:55 -08:00
|
|
|
bool opt_cse();
|
2011-09-02 15:18:29 -07:00
|
|
|
bool opt_algebraic();
|
2012-08-01 19:35:18 -07:00
|
|
|
bool opt_register_coalesce();
|
2015-02-20 20:25:04 +02:00
|
|
|
bool eliminate_find_live_channel();
|
2014-11-21 10:47:41 -08:00
|
|
|
bool is_dep_ctrl_unsafe(const vec4_instruction *inst);
|
2012-11-30 18:29:34 -08:00
|
|
|
void opt_set_dependency_control();
|
2012-11-30 16:13:34 -08:00
|
|
|
void opt_schedule_instructions();
|
2015-10-22 16:04:15 -07:00
|
|
|
void convert_to_hw_regs();
|
2018-03-23 11:46:12 -07:00
|
|
|
void fixup_3src_null_dest();
|
2011-05-02 09:45:40 -07:00
|
|
|
|
2016-08-18 11:15:56 +02:00
|
|
|
bool is_supported_64bit_region(vec4_instruction *inst, unsigned arg);
|
2016-08-29 10:41:45 +02:00
|
|
|
bool lower_simd_width();
|
i965/vec4: add a scalarization pass for double-precision instructions
The hardware only supports 32-bit swizzles, which means that we can
only access directly channels XY of a DF making access to channels ZW
more difficult, specially considering the various regioning restrictions
imposed by the hardware. The combination of both things makes handling
ramdom swizzles on DF operands rather difficult, as there are many
combinations that can't be represented at all, at least not without
some work and some level of instruction splitting depending on the case.
Writemasks are 64-bit in general, however XY and ZW writemasks also work
in 32-bit, which means these writemasks can't be represented natively,
adding to the complexity.
For now, we decided to try and simplify things as much as possible to
avoid dealing with all this from the get go by adding a scalarization
pass that runs after the main optimization loop. By fully scalarizing
DF instructions in align16 we avoid most of the complexity introduced
by the aforementioned hardware restrictions and we have an easier path
to an initial fully functional version for the vector backend in Haswell
and IvyBridge.
Later, we can improve the implementation so we don't necessarily
scalarize everything, iteratively adding more complexity and building
on top of a framework that is already working. Curro drafted some ideas
for how this could be done here:
https://bugs.freedesktop.org/show_bug.cgi?id=92760#c82
v2:
- Use a copy constructor for the scalar instructions so we copy all
relevant instructions fields from the original instruction.
v3: Fix indention in one switch (Matt)
Reviewed-by: Matt Turner <mattst88@gmail.com>
2016-05-24 09:20:51 +02:00
|
|
|
bool scalarize_df();
|
2016-06-08 11:04:34 +02:00
|
|
|
bool lower_64bit_mad_to_mul_add();
|
2016-05-24 11:01:27 +02:00
|
|
|
void apply_logical_swizzle(struct brw_reg *hw_reg,
|
|
|
|
vec4_instruction *inst, int arg);
|
2016-08-29 10:41:45 +02:00
|
|
|
|
2011-08-26 16:37:37 -07:00
|
|
|
vec4_instruction *emit(vec4_instruction *inst);
|
|
|
|
|
2011-05-02 09:45:40 -07:00
|
|
|
vec4_instruction *emit(enum opcode opcode);
|
2014-11-10 17:20:37 -08:00
|
|
|
vec4_instruction *emit(enum opcode opcode, const dst_reg &dst);
|
|
|
|
vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
|
|
|
|
const src_reg &src0);
|
|
|
|
vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
|
|
|
|
const src_reg &src0, const src_reg &src1);
|
|
|
|
vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
|
|
|
|
const src_reg &src0, const src_reg &src1,
|
|
|
|
const src_reg &src2);
|
2011-05-02 09:45:40 -07:00
|
|
|
|
2014-08-24 19:38:21 -07:00
|
|
|
vec4_instruction *emit_before(bblock_t *block,
|
|
|
|
vec4_instruction *inst,
|
2011-08-27 11:13:33 -07:00
|
|
|
vec4_instruction *new_inst);
|
|
|
|
|
2014-11-10 17:14:48 -08:00
|
|
|
#define EMIT1(op) vec4_instruction *op(const dst_reg &, const src_reg &);
|
|
|
|
#define EMIT2(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &);
|
|
|
|
#define EMIT3(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &, const src_reg &);
|
|
|
|
EMIT1(MOV)
|
|
|
|
EMIT1(NOT)
|
|
|
|
EMIT1(RNDD)
|
|
|
|
EMIT1(RNDE)
|
|
|
|
EMIT1(RNDZ)
|
|
|
|
EMIT1(FRC)
|
|
|
|
EMIT1(F32TO16)
|
|
|
|
EMIT1(F16TO32)
|
|
|
|
EMIT2(ADD)
|
|
|
|
EMIT2(MUL)
|
|
|
|
EMIT2(MACH)
|
|
|
|
EMIT2(MAC)
|
|
|
|
EMIT2(AND)
|
|
|
|
EMIT2(OR)
|
|
|
|
EMIT2(XOR)
|
|
|
|
EMIT2(DP3)
|
|
|
|
EMIT2(DP4)
|
|
|
|
EMIT2(DPH)
|
|
|
|
EMIT2(SHL)
|
|
|
|
EMIT2(SHR)
|
|
|
|
EMIT2(ASR)
|
2011-08-26 16:43:06 -07:00
|
|
|
vec4_instruction *CMP(dst_reg dst, src_reg src0, src_reg src1,
|
2014-06-29 17:50:20 -07:00
|
|
|
enum brw_conditional_mod condition);
|
|
|
|
vec4_instruction *IF(src_reg src0, src_reg src1,
|
|
|
|
enum brw_conditional_mod condition);
|
2014-06-29 17:58:59 -07:00
|
|
|
vec4_instruction *IF(enum brw_predicate predicate);
|
2014-11-10 17:14:48 -08:00
|
|
|
EMIT1(SCRATCH_READ)
|
|
|
|
EMIT2(SCRATCH_WRITE)
|
|
|
|
EMIT3(LRP)
|
|
|
|
EMIT1(BFREV)
|
|
|
|
EMIT3(BFE)
|
|
|
|
EMIT2(BFI1)
|
|
|
|
EMIT3(BFI2)
|
|
|
|
EMIT1(FBH)
|
|
|
|
EMIT1(FBL)
|
|
|
|
EMIT1(CBIT)
|
|
|
|
EMIT3(MAD)
|
|
|
|
EMIT2(ADDC)
|
|
|
|
EMIT2(SUBB)
|
2016-07-07 08:38:22 +02:00
|
|
|
EMIT1(DIM)
|
|
|
|
|
2014-11-10 17:14:48 -08:00
|
|
|
#undef EMIT1
|
|
|
|
#undef EMIT2
|
|
|
|
#undef EMIT3
|
2011-08-26 16:43:06 -07:00
|
|
|
|
2015-06-17 00:32:58 +02:00
|
|
|
vec4_instruction *emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
|
|
|
|
src_reg src0, src_reg src1);
|
2012-10-08 10:45:08 -07:00
|
|
|
|
2015-07-13 15:52:28 +03:00
|
|
|
/**
|
|
|
|
* Copy any live channel from \p src to the first channel of the
|
|
|
|
* result.
|
|
|
|
*/
|
|
|
|
src_reg emit_uniformize(const src_reg &src);
|
2015-03-20 14:16:09 +02:00
|
|
|
|
2019-06-06 11:12:14 -07:00
|
|
|
/** Fix all float operands of a 3-source instruction. */
|
2019-06-06 11:21:15 -07:00
|
|
|
void fix_float_operands(src_reg op[3], nir_alu_instr *instr);
|
2019-06-06 11:12:14 -07:00
|
|
|
|
2015-08-10 11:48:14 -07:00
|
|
|
src_reg fix_3src_operand(const src_reg &src);
|
2013-04-25 11:02:02 -07:00
|
|
|
|
2015-06-17 00:04:09 +02:00
|
|
|
vec4_instruction *emit_math(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
|
|
|
|
const src_reg &src1 = src_reg());
|
|
|
|
|
2015-08-10 11:48:14 -07:00
|
|
|
src_reg fix_math_operand(const src_reg &src);
|
2011-05-02 09:45:40 -07:00
|
|
|
|
2013-01-09 11:44:31 -08:00
|
|
|
void emit_pack_half_2x16(dst_reg dst, src_reg src0);
|
|
|
|
void emit_unpack_half_2x16(dst_reg dst, src_reg src0);
|
2014-03-08 17:29:33 -08:00
|
|
|
void emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0);
|
2014-03-09 20:22:23 -07:00
|
|
|
void emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0);
|
2014-03-10 13:27:46 -07:00
|
|
|
void emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0);
|
2014-03-10 14:11:05 -07:00
|
|
|
void emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0);
|
2013-01-09 11:44:31 -08:00
|
|
|
|
2015-07-06 14:33:21 +02:00
|
|
|
void emit_texture(ir_texture_opcode op,
|
|
|
|
dst_reg dest,
|
|
|
|
const glsl_type *dest_type,
|
|
|
|
src_reg coordinate,
|
|
|
|
int coord_components,
|
2016-12-12 08:32:38 -05:00
|
|
|
src_reg shadow_comparator,
|
2015-07-06 14:33:21 +02:00
|
|
|
src_reg lod, src_reg lod2,
|
|
|
|
src_reg sample_index,
|
|
|
|
uint32_t constant_offset,
|
|
|
|
src_reg offset_value,
|
|
|
|
src_reg mcs,
|
2015-11-02 18:39:17 -08:00
|
|
|
uint32_t surface, src_reg surface_reg,
|
2016-10-10 17:11:34 -07:00
|
|
|
src_reg sampler_reg);
|
2015-07-06 14:33:21 +02:00
|
|
|
|
2015-06-18 11:31:54 +02:00
|
|
|
src_reg emit_mcs_fetch(const glsl_type *coordinate_type, src_reg coordinate,
|
2015-11-02 18:39:17 -08:00
|
|
|
src_reg surface);
|
2014-02-03 22:15:41 +13:00
|
|
|
void emit_gen6_gather_wa(uint8_t wa, dst_reg dst);
|
2011-12-07 03:20:53 -08:00
|
|
|
|
2011-08-23 10:26:15 -07:00
|
|
|
void emit_ndc_computation();
|
2014-07-17 16:59:10 +02:00
|
|
|
void emit_psiz_and_flags(dst_reg reg);
|
2016-10-17 11:14:10 -07:00
|
|
|
vec4_instruction *emit_generic_urb_slot(dst_reg reg, int varying, int comp);
|
2015-08-28 00:29:05 -07:00
|
|
|
virtual void emit_urb_slot(dst_reg reg, int varying);
|
2011-05-02 09:45:40 -07:00
|
|
|
|
2012-11-27 14:10:52 -08:00
|
|
|
void emit_shader_time_begin();
|
|
|
|
void emit_shader_time_end();
|
2015-06-19 15:40:09 -07:00
|
|
|
void emit_shader_time_write(int shader_time_subindex, src_reg value);
|
2012-11-27 14:10:52 -08:00
|
|
|
|
2014-08-24 19:38:21 -07:00
|
|
|
src_reg get_scratch_offset(bblock_t *block, vec4_instruction *inst,
|
2011-08-07 12:15:26 -07:00
|
|
|
src_reg *reladdr, int reg_offset);
|
2014-08-24 19:38:21 -07:00
|
|
|
void emit_scratch_read(bblock_t *block, vec4_instruction *inst,
|
2011-08-07 12:15:26 -07:00
|
|
|
dst_reg dst,
|
|
|
|
src_reg orig_src,
|
|
|
|
int base_offset);
|
2014-08-24 19:38:21 -07:00
|
|
|
void emit_scratch_write(bblock_t *block, vec4_instruction *inst,
|
2011-08-07 12:15:26 -07:00
|
|
|
int base_offset);
|
2014-08-24 19:38:21 -07:00
|
|
|
void emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
|
2011-08-22 10:35:24 -07:00
|
|
|
dst_reg dst,
|
|
|
|
src_reg orig_src,
|
2015-11-25 09:36:34 -08:00
|
|
|
int base_offset,
|
|
|
|
src_reg indirect);
|
2015-04-15 14:28:26 +01:00
|
|
|
void emit_pull_constant_load_reg(dst_reg dst,
|
|
|
|
src_reg surf_index,
|
|
|
|
src_reg offset,
|
|
|
|
bblock_t *before_block,
|
|
|
|
vec4_instruction *before_inst);
|
2015-03-17 10:48:04 +01:00
|
|
|
src_reg emit_resolve_reladdr(int scratch_loc[], bblock_t *block,
|
|
|
|
vec4_instruction *inst, src_reg src);
|
2011-08-07 12:15:26 -07:00
|
|
|
|
2011-10-03 15:31:52 -07:00
|
|
|
void resolve_ud_negate(src_reg *reg);
|
2011-05-02 09:45:40 -07:00
|
|
|
|
2016-02-11 12:27:02 -08:00
|
|
|
bool lower_minmax();
|
|
|
|
|
2012-11-27 14:10:52 -08:00
|
|
|
src_reg get_timestamp();
|
|
|
|
|
2016-09-23 15:15:33 +03:00
|
|
|
void dump_instruction(const backend_instruction *inst) const;
|
|
|
|
void dump_instruction(const backend_instruction *inst, FILE *file) const;
|
2013-02-17 08:05:52 -08:00
|
|
|
|
2015-06-18 09:37:33 +02:00
|
|
|
bool is_high_sampler(src_reg sampler);
|
2015-06-16 13:39:48 +02:00
|
|
|
|
i965/vec4: Optimize predicate handling for any/all.
For a select whose condition is any(v), instead of emitting
cmp.nz.f0(8) null<1>D g1<0,4,1>D 0D
mov(8) g7<1>.xUD 0x00000000UD
(+f0.any4h) mov(8) g7<1>.xUD 0xffffffffUD
cmp.nz.f0(8) null<1>D g7<4,4,1>.xD 0D
(+f0) sel(8) g8<1>UD g4<4,4,1>UD g3<4,4,1>UD
we now emit
cmp.nz.f0(8) null<1>D g1<0,4,1>D 0D
(+f0.any4h) sel(8) g9<1>UD g4<4,4,1>UD g3<4,4,1>UD
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2015-11-25 21:54:30 -08:00
|
|
|
bool optimize_predicate(nir_alu_instr *instr, enum brw_predicate *predicate);
|
|
|
|
|
intel/compiler: Drop nir_lower_to_source_mods() and related handling.
I think we're unanimous in wanting to drop nir_lower_to_source_mods.
It's a bit of complexity to handle in the backend, but perhaps more
importantly, would be even more complexity to handle in nir_search.
And, it turns out that since we made other compiler improvements in the
last few years, they no longer appear to buy us anything of value.
Summarizing the results from shader-db from this patch:
- Icelake (scalar mode)
Instruction counts:
- 411 helped, 598 hurt (out of 139,470 shaders)
- 99.2% of shaders remain unaffected. The average increase in
instruction count in hurt programs is 1.78 instructions.
- total instructions in shared programs: 17214951 -> 17215206 (<.01%)
- instructions in affected programs: 1143879 -> 1144134 (0.02%)
Cycles:
- 1042 helped, 1357 hurt
- total cycles in shared programs: 365613294 -> 365882263 (0.07%)
- cycles in affected programs: 138155497 -> 138424466 (0.19%)
- Haswell (both scalar and vector modes)
Instruction counts:
- 73 helped, 1680 hurt (out of 139,470 shaders)
- 98.7% of shaders remain unaffected. The average increase in
instruction count in hurt programs is 1.9 instructions.
- total instructions in shared programs: 14199527 -> 14202262 (0.02%)
- instructions in affected programs: 446499 -> 449234 (0.61%)
Cycles:
- 5253 helped, 5559 hurt
- total cycles in shared programs: 359996545 -> 360038731 (0.01%)
- cycles in affected programs: 155897127 -> 155939313 (0.03%)
Given that ~99% of shader-db remains unaffected, and the affected
programs are hurt by about 1-2 instructions - which are all cheap
ALU instructions - this is unlikely to be measurable in terms of
any real performance impact that would affect users.
So, drop them and simplify the backend, and hopefully enable other
future simplifications in NIR.
Reviewed-by: Eric Anholt <eric@anholt.net> [v1]
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4616>
2020-04-18 01:20:42 -07:00
|
|
|
void emit_conversion_from_double(dst_reg dst, src_reg src);
|
|
|
|
void emit_conversion_to_double(dst_reg dst, src_reg src);
|
2016-06-29 13:07:35 +02:00
|
|
|
|
2016-06-22 11:44:15 +02:00
|
|
|
vec4_instruction *shuffle_64bit_data(dst_reg dst, src_reg src,
|
|
|
|
bool for_write,
|
|
|
|
bblock_t *block = NULL,
|
|
|
|
vec4_instruction *ref = NULL);
|
|
|
|
|
2015-06-16 12:08:09 +02:00
|
|
|
virtual void emit_nir_code();
|
2015-10-01 12:23:53 -07:00
|
|
|
virtual void nir_setup_uniforms();
|
2015-06-16 12:08:09 +02:00
|
|
|
virtual void nir_emit_impl(nir_function_impl *impl);
|
|
|
|
virtual void nir_emit_cf_list(exec_list *list);
|
|
|
|
virtual void nir_emit_if(nir_if *if_stmt);
|
|
|
|
virtual void nir_emit_loop(nir_loop *loop);
|
|
|
|
virtual void nir_emit_block(nir_block *block);
|
|
|
|
virtual void nir_emit_instr(nir_instr *instr);
|
|
|
|
virtual void nir_emit_load_const(nir_load_const_instr *instr);
|
2018-10-20 10:05:33 -05:00
|
|
|
src_reg get_nir_ssbo_intrinsic_index(nir_intrinsic_instr *instr);
|
2015-06-16 12:08:09 +02:00
|
|
|
virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr);
|
|
|
|
virtual void nir_emit_alu(nir_alu_instr *instr);
|
|
|
|
virtual void nir_emit_jump(nir_jump_instr *instr);
|
|
|
|
virtual void nir_emit_texture(nir_tex_instr *instr);
|
2015-09-09 13:55:39 -07:00
|
|
|
virtual void nir_emit_undef(nir_ssa_undef_instr *instr);
|
2015-08-07 11:31:13 +02:00
|
|
|
virtual void nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr);
|
2015-06-16 12:08:09 +02:00
|
|
|
|
2016-05-19 14:43:23 -07:00
|
|
|
dst_reg get_nir_dest(const nir_dest &dest, enum brw_reg_type type);
|
|
|
|
dst_reg get_nir_dest(const nir_dest &dest, nir_alu_type type);
|
|
|
|
dst_reg get_nir_dest(const nir_dest &dest);
|
|
|
|
src_reg get_nir_src(const nir_src &src, enum brw_reg_type type,
|
2015-06-16 17:43:02 +02:00
|
|
|
unsigned num_components = 4);
|
2016-05-19 14:43:23 -07:00
|
|
|
src_reg get_nir_src(const nir_src &src, nir_alu_type type,
|
2015-06-16 17:43:02 +02:00
|
|
|
unsigned num_components = 4);
|
2016-05-19 14:43:23 -07:00
|
|
|
src_reg get_nir_src(const nir_src &src,
|
2015-06-16 17:43:02 +02:00
|
|
|
unsigned num_components = 4);
|
2018-10-20 10:05:33 -05:00
|
|
|
src_reg get_nir_src_imm(const nir_src &src);
|
2015-11-17 01:07:39 -08:00
|
|
|
src_reg get_indirect_offset(nir_intrinsic_instr *instr);
|
2015-06-16 17:43:02 +02:00
|
|
|
|
2015-07-22 09:34:35 +02:00
|
|
|
dst_reg *nir_locals;
|
2015-07-01 16:10:49 +02:00
|
|
|
dst_reg *nir_ssa_values;
|
2015-06-16 13:50:43 +02:00
|
|
|
|
2013-02-17 08:05:52 -08:00
|
|
|
protected:
|
2013-03-22 06:59:52 -07:00
|
|
|
void emit_vertex();
|
2013-08-21 21:55:40 -07:00
|
|
|
void setup_payload_interference(struct ra_graph *g, int first_payload_node,
|
|
|
|
int reg_node_count);
|
2013-07-13 09:03:18 -07:00
|
|
|
virtual void setup_payload() = 0;
|
2013-02-17 08:05:52 -08:00
|
|
|
virtual void emit_prolog() = 0;
|
|
|
|
virtual void emit_thread_end() = 0;
|
2013-03-22 06:59:52 -07:00
|
|
|
virtual void emit_urb_write_header(int mrf) = 0;
|
|
|
|
virtual vec4_instruction *emit_urb_write_opcode(bool complete) = 0;
|
2015-06-29 13:37:31 +02:00
|
|
|
virtual void gs_emit_vertex(int stream_id);
|
|
|
|
virtual void gs_end_primitive();
|
2013-03-22 21:55:03 -07:00
|
|
|
|
2013-10-16 12:13:20 -07:00
|
|
|
private:
|
|
|
|
/**
|
|
|
|
* If true, then register allocation should fail instead of spilling.
|
|
|
|
*/
|
|
|
|
const bool no_spills;
|
2014-01-17 14:42:48 -08:00
|
|
|
|
2015-06-19 15:40:09 -07:00
|
|
|
int shader_time_index;
|
2015-06-28 21:02:15 -07:00
|
|
|
|
|
|
|
unsigned last_scratch; /**< measured in 32-byte (register size) units */
|
2012-11-26 22:53:10 -08:00
|
|
|
};
|
|
|
|
|
2011-05-02 09:45:40 -07:00
|
|
|
} /* namespace brw */
|
2013-08-21 07:53:42 -07:00
|
|
|
#endif /* __cplusplus */
|
2011-05-02 09:45:40 -07:00
|
|
|
|
|
|
|
#endif /* BRW_VEC4_H */
|