i965: Start adding the VS visitor and codegen.
The low-level IR is a mashup of brw_fs.cpp and ir_to_mesa.cpp. It's currently controlled by the INTEL_NEW_VS=1 environment variable, and only tested for the trivial "gl_Position = gl_Vertex;" shader so far.
This commit is contained in:
@@ -124,7 +124,10 @@ CXX_SOURCES = \
|
||||
brw_fs_reg_allocate.cpp \
|
||||
brw_fs_schedule_instructions.cpp \
|
||||
brw_fs_vector_splitting.cpp \
|
||||
brw_shader.cpp
|
||||
brw_shader.cpp \
|
||||
brw_vec4_emit.cpp \
|
||||
brw_vec4_reg_allocate.cpp \
|
||||
brw_vec4_visitor.cpp
|
||||
|
||||
ASM_SOURCES =
|
||||
|
||||
|
@@ -529,7 +529,7 @@ struct brw_context
|
||||
* the CURBE, the depth buffer, and a query BO.
|
||||
*/
|
||||
drm_intel_bo *validated_bos[VERT_ATTRIB_MAX + BRW_WM_MAX_SURF + 16];
|
||||
int validated_bo_count;
|
||||
unsigned int validated_bo_count;
|
||||
} state;
|
||||
|
||||
struct brw_cache cache;
|
||||
|
@@ -638,6 +638,8 @@ enum opcode {
|
||||
FS_OPCODE_SPILL,
|
||||
FS_OPCODE_UNSPILL,
|
||||
FS_OPCODE_PULL_CONSTANT_LOAD,
|
||||
|
||||
VS_OPCODE_URB_WRITE,
|
||||
};
|
||||
|
||||
#define BRW_PREDICATE_NONE 0
|
||||
|
@@ -44,6 +44,9 @@
|
||||
#define BRW_SWIZZLE_NOOP BRW_SWIZZLE4(0,1,2,3)
|
||||
#define BRW_SWIZZLE_XYZW BRW_SWIZZLE4(0,1,2,3)
|
||||
#define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0)
|
||||
#define BRW_SWIZZLE_YYYY BRW_SWIZZLE4(1,1,1,1)
|
||||
#define BRW_SWIZZLE_ZZZZ BRW_SWIZZLE4(2,2,2,2)
|
||||
#define BRW_SWIZZLE_WWWW BRW_SWIZZLE4(3,3,3,3)
|
||||
#define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1)
|
||||
|
||||
|
||||
|
@@ -146,38 +146,7 @@ void
|
||||
fs_visitor::generate_math(fs_inst *inst,
|
||||
struct brw_reg dst, struct brw_reg *src)
|
||||
{
|
||||
int op;
|
||||
|
||||
switch (inst->opcode) {
|
||||
case SHADER_OPCODE_RCP:
|
||||
op = BRW_MATH_FUNCTION_INV;
|
||||
break;
|
||||
case SHADER_OPCODE_RSQ:
|
||||
op = BRW_MATH_FUNCTION_RSQ;
|
||||
break;
|
||||
case SHADER_OPCODE_SQRT:
|
||||
op = BRW_MATH_FUNCTION_SQRT;
|
||||
break;
|
||||
case SHADER_OPCODE_EXP2:
|
||||
op = BRW_MATH_FUNCTION_EXP;
|
||||
break;
|
||||
case SHADER_OPCODE_LOG2:
|
||||
op = BRW_MATH_FUNCTION_LOG;
|
||||
break;
|
||||
case SHADER_OPCODE_POW:
|
||||
op = BRW_MATH_FUNCTION_POW;
|
||||
break;
|
||||
case SHADER_OPCODE_SIN:
|
||||
op = BRW_MATH_FUNCTION_SIN;
|
||||
break;
|
||||
case SHADER_OPCODE_COS:
|
||||
op = BRW_MATH_FUNCTION_COS;
|
||||
break;
|
||||
default:
|
||||
assert(!"not reached: unknown math function");
|
||||
op = 0;
|
||||
break;
|
||||
}
|
||||
int op = brw_math_function(inst->opcode);
|
||||
|
||||
if (intel->gen >= 6) {
|
||||
assert(inst->mlen == 0);
|
||||
|
@@ -199,3 +199,29 @@ brw_conditional_for_comparison(unsigned int op)
|
||||
return BRW_CONDITIONAL_NZ;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t
|
||||
brw_math_function(enum opcode op)
|
||||
{
|
||||
switch (op) {
|
||||
case SHADER_OPCODE_RCP:
|
||||
return BRW_MATH_FUNCTION_INV;
|
||||
case SHADER_OPCODE_RSQ:
|
||||
return BRW_MATH_FUNCTION_RSQ;
|
||||
case SHADER_OPCODE_SQRT:
|
||||
return BRW_MATH_FUNCTION_SQRT;
|
||||
case SHADER_OPCODE_EXP2:
|
||||
return BRW_MATH_FUNCTION_EXP;
|
||||
case SHADER_OPCODE_LOG2:
|
||||
return BRW_MATH_FUNCTION_LOG;
|
||||
case SHADER_OPCODE_POW:
|
||||
return BRW_MATH_FUNCTION_POW;
|
||||
case SHADER_OPCODE_SIN:
|
||||
return BRW_MATH_FUNCTION_SIN;
|
||||
case SHADER_OPCODE_COS:
|
||||
return BRW_MATH_FUNCTION_COS;
|
||||
default:
|
||||
assert(!"not reached: unknown math function");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
@@ -22,8 +22,10 @@
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include "brw_defines.h"
|
||||
|
||||
#pragma once
|
||||
|
||||
int brw_type_for_base_type(const struct glsl_type *type);
|
||||
uint32_t brw_conditional_for_comparison(unsigned int op);
|
||||
uint32_t brw_math_function(enum opcode op);
|
||||
|
434
src/mesa/drivers/dri/i965/brw_vec4.h
Normal file
434
src/mesa/drivers/dri/i965/brw_vec4.h
Normal file
@@ -0,0 +1,434 @@
|
||||
/*
|
||||
* Copyright © 2011 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef BRW_VEC4_H
|
||||
#define BRW_VEC4_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include "brw_shader.h"
|
||||
#include "main/compiler.h"
|
||||
#include "program/hash_table.h"
|
||||
|
||||
extern "C" {
|
||||
#include "brw_vs.h"
|
||||
#include "brw_context.h"
|
||||
#include "brw_eu.h"
|
||||
};
|
||||
|
||||
#include "../glsl/ir.h"
|
||||
|
||||
namespace brw {
|
||||
|
||||
class dst_reg;
|
||||
|
||||
/**
|
||||
* Common helper for constructing swizzles. When only a subset of
|
||||
* channels of a vec4 are used, we don't want to reference the other
|
||||
* channels, as that will tell optimization passes that those other
|
||||
* channels are used.
|
||||
*/
|
||||
static int
|
||||
swizzle_for_size(int size)
|
||||
{
|
||||
int size_swizzles[4] = {
|
||||
BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
|
||||
BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
|
||||
BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
|
||||
BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
|
||||
};
|
||||
|
||||
assert((size >= 1) && (size <= 4));
|
||||
return size_swizzles[size - 1];
|
||||
}
|
||||
|
||||
enum register_file {
|
||||
ARF = BRW_ARCHITECTURE_REGISTER_FILE,
|
||||
GRF = BRW_GENERAL_REGISTER_FILE,
|
||||
MRF = BRW_MESSAGE_REGISTER_FILE,
|
||||
IMM = BRW_IMMEDIATE_VALUE,
|
||||
HW_REG, /* a struct brw_reg */
|
||||
ATTR,
|
||||
UNIFORM, /* prog_data->params[hw_reg] */
|
||||
BAD_FILE
|
||||
};
|
||||
|
||||
class reg
|
||||
{
|
||||
public:
|
||||
/** Register file: ARF, GRF, MRF, IMM. */
|
||||
enum register_file file;
|
||||
/** virtual register number. 0 = fixed hw reg */
|
||||
int reg;
|
||||
/** Offset within the virtual register. */
|
||||
int reg_offset;
|
||||
/** Register type. BRW_REGISTER_TYPE_* */
|
||||
int type;
|
||||
bool sechalf;
|
||||
struct brw_reg fixed_hw_reg;
|
||||
int smear; /* -1, or a channel of the reg to smear to all channels. */
|
||||
|
||||
/** Value for file == BRW_IMMMEDIATE_FILE */
|
||||
union {
|
||||
int32_t i;
|
||||
uint32_t u;
|
||||
float f;
|
||||
} imm;
|
||||
};
|
||||
|
||||
class src_reg : public reg
|
||||
{
|
||||
public:
|
||||
/* Callers of this ralloc-based new need not call delete. It's
|
||||
* easier to just ralloc_free 'ctx' (or any of its ancestors). */
|
||||
static void* operator new(size_t size, void *ctx)
|
||||
{
|
||||
void *node;
|
||||
|
||||
node = ralloc_size(ctx, size);
|
||||
assert(node != NULL);
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
void init()
|
||||
{
|
||||
memset(this, 0, sizeof(*this));
|
||||
|
||||
this->file = BAD_FILE;
|
||||
}
|
||||
|
||||
src_reg(register_file file, int reg, const glsl_type *type)
|
||||
{
|
||||
init();
|
||||
|
||||
this->file = file;
|
||||
this->reg = reg;
|
||||
if (type && (type->is_scalar() || type->is_vector() || type->is_matrix()))
|
||||
this->swizzle = swizzle_for_size(type->vector_elements);
|
||||
else
|
||||
this->swizzle = SWIZZLE_XYZW;
|
||||
}
|
||||
|
||||
/** Generic unset register constructor. */
|
||||
src_reg()
|
||||
{
|
||||
init();
|
||||
}
|
||||
|
||||
src_reg(float f)
|
||||
{
|
||||
init();
|
||||
|
||||
this->file = IMM;
|
||||
this->type = BRW_REGISTER_TYPE_F;
|
||||
this->imm.f = f;
|
||||
}
|
||||
|
||||
src_reg(uint32_t u)
|
||||
{
|
||||
init();
|
||||
|
||||
this->file = IMM;
|
||||
this->type = BRW_REGISTER_TYPE_UD;
|
||||
this->imm.f = u;
|
||||
}
|
||||
|
||||
src_reg(int32_t i)
|
||||
{
|
||||
init();
|
||||
|
||||
this->file = IMM;
|
||||
this->type = BRW_REGISTER_TYPE_D;
|
||||
this->imm.i = i;
|
||||
}
|
||||
|
||||
src_reg(class vec4_visitor *v, const struct glsl_type *type);
|
||||
|
||||
explicit src_reg(dst_reg reg);
|
||||
|
||||
GLuint swizzle; /**< SWIZZLE_XYZW swizzles from Mesa. */
|
||||
bool negate;
|
||||
bool abs;
|
||||
};
|
||||
|
||||
class dst_reg : public reg
|
||||
{
|
||||
public:
|
||||
/* Callers of this ralloc-based new need not call delete. It's
|
||||
* easier to just ralloc_free 'ctx' (or any of its ancestors). */
|
||||
static void* operator new(size_t size, void *ctx)
|
||||
{
|
||||
void *node;
|
||||
|
||||
node = ralloc_size(ctx, size);
|
||||
assert(node != NULL);
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
void init()
|
||||
{
|
||||
memset(this, 0, sizeof(*this));
|
||||
this->file = BAD_FILE;
|
||||
this->writemask = WRITEMASK_XYZW;
|
||||
}
|
||||
|
||||
dst_reg()
|
||||
{
|
||||
init();
|
||||
}
|
||||
|
||||
dst_reg(register_file file, int reg)
|
||||
{
|
||||
init();
|
||||
|
||||
this->file = file;
|
||||
this->reg = reg;
|
||||
}
|
||||
|
||||
dst_reg(struct brw_reg reg)
|
||||
{
|
||||
init();
|
||||
|
||||
this->file = HW_REG;
|
||||
this->fixed_hw_reg = reg;
|
||||
}
|
||||
|
||||
dst_reg(class vec4_visitor *v, const struct glsl_type *type);
|
||||
|
||||
explicit dst_reg(src_reg reg);
|
||||
|
||||
int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
|
||||
};
|
||||
|
||||
class vec4_instruction : public exec_node {
|
||||
public:
|
||||
/* Callers of this ralloc-based new need not call delete. It's
|
||||
* easier to just ralloc_free 'ctx' (or any of its ancestors). */
|
||||
static void* operator new(size_t size, void *ctx)
|
||||
{
|
||||
void *node;
|
||||
|
||||
node = rzalloc_size(ctx, size);
|
||||
assert(node != NULL);
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
struct brw_reg get_dst(void);
|
||||
struct brw_reg get_src(int i);
|
||||
|
||||
enum opcode opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
|
||||
dst_reg dst;
|
||||
src_reg src[3];
|
||||
|
||||
bool saturate;
|
||||
bool predicate_inverse;
|
||||
uint32_t predicate;
|
||||
|
||||
int conditional_mod; /**< BRW_CONDITIONAL_* */
|
||||
|
||||
int sampler;
|
||||
int target; /**< MRT target. */
|
||||
bool shadow_compare;
|
||||
|
||||
bool eot;
|
||||
bool header_present;
|
||||
int mlen; /**< SEND message length */
|
||||
int base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */
|
||||
|
||||
uint32_t offset; /* spill/unspill offset */
|
||||
/** @{
|
||||
* Annotation for the generated IR. One of the two can be set.
|
||||
*/
|
||||
ir_instruction *ir;
|
||||
const char *annotation;
|
||||
};
|
||||
|
||||
class vec4_visitor : public ir_visitor
|
||||
{
|
||||
public:
|
||||
vec4_visitor(struct brw_vs_compile *c,
|
||||
struct gl_shader_program *prog, struct brw_shader *shader);
|
||||
~vec4_visitor();
|
||||
|
||||
dst_reg dst_null_f()
|
||||
{
|
||||
return dst_reg(brw_null_reg());
|
||||
}
|
||||
|
||||
dst_reg dst_null_d()
|
||||
{
|
||||
return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
|
||||
}
|
||||
|
||||
dst_reg dst_null_cmp()
|
||||
{
|
||||
if (intel->gen > 4)
|
||||
return dst_null_d();
|
||||
else
|
||||
return dst_null_f();
|
||||
}
|
||||
|
||||
struct brw_context *brw;
|
||||
const struct gl_vertex_program *vp;
|
||||
struct intel_context *intel;
|
||||
struct gl_context *ctx;
|
||||
struct brw_vs_compile *c;
|
||||
struct brw_vs_prog_data *prog_data;
|
||||
struct brw_compile *p;
|
||||
struct brw_shader *shader;
|
||||
struct gl_shader_program *prog;
|
||||
void *mem_ctx;
|
||||
exec_list instructions;
|
||||
|
||||
char *fail_msg;
|
||||
bool failed;
|
||||
|
||||
/**
|
||||
* GLSL IR currently being processed, which is associated with our
|
||||
* driver IR instructions for debugging purposes.
|
||||
*/
|
||||
ir_instruction *base_ir;
|
||||
const char *current_annotation;
|
||||
|
||||
int *virtual_grf_sizes;
|
||||
int virtual_grf_count;
|
||||
int virtual_grf_array_size;
|
||||
int first_non_payload_grf;
|
||||
|
||||
dst_reg *variable_storage(ir_variable *var);
|
||||
|
||||
void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr);
|
||||
|
||||
src_reg src_reg_for_float(float val);
|
||||
|
||||
/**
|
||||
* \name Visit methods
|
||||
*
|
||||
* As typical for the visitor pattern, there must be one \c visit method for
|
||||
* each concrete subclass of \c ir_instruction. Virtual base classes within
|
||||
* the hierarchy should not have \c visit methods.
|
||||
*/
|
||||
/*@{*/
|
||||
virtual void visit(ir_variable *);
|
||||
virtual void visit(ir_loop *);
|
||||
virtual void visit(ir_loop_jump *);
|
||||
virtual void visit(ir_function_signature *);
|
||||
virtual void visit(ir_function *);
|
||||
virtual void visit(ir_expression *);
|
||||
virtual void visit(ir_swizzle *);
|
||||
virtual void visit(ir_dereference_variable *);
|
||||
virtual void visit(ir_dereference_array *);
|
||||
virtual void visit(ir_dereference_record *);
|
||||
virtual void visit(ir_assignment *);
|
||||
virtual void visit(ir_constant *);
|
||||
virtual void visit(ir_call *);
|
||||
virtual void visit(ir_return *);
|
||||
virtual void visit(ir_discard *);
|
||||
virtual void visit(ir_texture *);
|
||||
virtual void visit(ir_if *);
|
||||
/*@}*/
|
||||
|
||||
src_reg result;
|
||||
|
||||
/* Regs for vertex results. Generated at ir_variable visiting time
|
||||
* for the ir->location's used.
|
||||
*/
|
||||
dst_reg output_reg[VERT_RESULT_MAX];
|
||||
|
||||
struct hash_table *variable_ht;
|
||||
|
||||
bool run(void);
|
||||
void fail(const char *msg, ...);
|
||||
|
||||
int virtual_grf_alloc(int size);
|
||||
int setup_attributes(int payload_reg);
|
||||
void setup_payload();
|
||||
void reg_allocate_trivial();
|
||||
void reg_allocate();
|
||||
|
||||
vec4_instruction *emit(enum opcode opcode);
|
||||
|
||||
vec4_instruction *emit(enum opcode opcode, dst_reg dst, src_reg src0);
|
||||
|
||||
vec4_instruction *emit(enum opcode opcode, dst_reg dst,
|
||||
src_reg src0, src_reg src1);
|
||||
|
||||
vec4_instruction *emit(enum opcode opcode, dst_reg dst,
|
||||
src_reg src0, src_reg src1, src_reg src2);
|
||||
|
||||
/** Walks an exec_list of ir_instruction and sends it through this visitor. */
|
||||
void visit_instructions(const exec_list *list);
|
||||
|
||||
void emit_bool_to_cond_code(ir_rvalue *ir);
|
||||
void emit_bool_comparison(unsigned int op, dst_reg dst, src_reg src0, src_reg src1);
|
||||
void emit_if_gen6(ir_if *ir);
|
||||
|
||||
void emit_block_move(ir_assignment *ir);
|
||||
|
||||
/**
|
||||
* Emit the correct dot-product instruction for the type of arguments
|
||||
*/
|
||||
void emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements);
|
||||
|
||||
void emit_scalar(ir_instruction *ir, enum prog_opcode op,
|
||||
dst_reg dst, src_reg src0);
|
||||
|
||||
void emit_scalar(ir_instruction *ir, enum prog_opcode op,
|
||||
dst_reg dst, src_reg src0, src_reg src1);
|
||||
|
||||
void emit_scs(ir_instruction *ir, enum prog_opcode op,
|
||||
dst_reg dst, const src_reg &src);
|
||||
|
||||
void emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src);
|
||||
void emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src);
|
||||
void emit_math(enum opcode opcode, dst_reg dst, src_reg src);
|
||||
void emit_math2_gen6(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1);
|
||||
void emit_math2_gen4(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1);
|
||||
void emit_math(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1);
|
||||
|
||||
int emit_vue_header_gen6(int header_mrf);
|
||||
int emit_vue_header_gen4(int header_mrf);
|
||||
void emit_urb_writes(void);
|
||||
|
||||
GLboolean try_emit_sat(ir_expression *ir);
|
||||
|
||||
bool process_move_condition(ir_rvalue *ir);
|
||||
|
||||
void generate_code();
|
||||
void generate_vs_instruction(vec4_instruction *inst,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg *src);
|
||||
void generate_math1_gen4(vec4_instruction *inst,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg src);
|
||||
void generate_math1_gen6(vec4_instruction *inst,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg src);
|
||||
void generate_urb_write(vec4_instruction *inst);
|
||||
};
|
||||
|
||||
} /* namespace brw */
|
||||
|
||||
#endif /* BRW_VEC4_H */
|
568
src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
Normal file
568
src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
Normal file
@@ -0,0 +1,568 @@
|
||||
/*
|
||||
* Copyright © 2011 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "brw_vec4.h"
|
||||
#include "../glsl/ir_print_visitor.h"
|
||||
|
||||
extern "C" {
|
||||
#include "brw_eu.h"
|
||||
};
|
||||
|
||||
using namespace brw;
|
||||
|
||||
namespace brw {
|
||||
|
||||
int
|
||||
vec4_visitor::setup_attributes(int payload_reg)
|
||||
{
|
||||
int nr_attributes;
|
||||
int attribute_map[VERT_ATTRIB_MAX];
|
||||
|
||||
nr_attributes = 0;
|
||||
for (int i = 0; i < VERT_ATTRIB_MAX; i++) {
|
||||
if (prog_data->inputs_read & BITFIELD64_BIT(i)) {
|
||||
attribute_map[i] = payload_reg + nr_attributes;
|
||||
nr_attributes++;
|
||||
}
|
||||
}
|
||||
|
||||
foreach_iter(exec_list_iterator, iter, this->instructions) {
|
||||
vec4_instruction *inst = (vec4_instruction *)iter.get();
|
||||
|
||||
for (int i = 0; i < 3; i++) {
|
||||
if (inst->src[i].file != ATTR)
|
||||
continue;
|
||||
|
||||
inst->src[i].file = HW_REG;
|
||||
inst->src[i].fixed_hw_reg = brw_vec8_grf(attribute_map[inst->src[i].reg], 0);
|
||||
inst->src[i].fixed_hw_reg.dw1.bits.swizzle = inst->src[i].swizzle;
|
||||
}
|
||||
}
|
||||
|
||||
/* The BSpec says we always have to read at least one thing from
|
||||
* the VF, and it appears that the hardware wedges otherwise.
|
||||
*/
|
||||
if (nr_attributes == 0)
|
||||
nr_attributes = 1;
|
||||
|
||||
prog_data->urb_read_length = (nr_attributes + 1) / 2;
|
||||
|
||||
return nr_attributes;
|
||||
}
|
||||
|
||||
void
|
||||
vec4_visitor::setup_payload(void)
|
||||
{
|
||||
int reg = 0;
|
||||
|
||||
/* r0 is always reserved, as it contains the payload with the URB
|
||||
* handles that are passed on to the URB write at the end of the
|
||||
* thread.
|
||||
*/
|
||||
reg++;
|
||||
|
||||
/* User clip planes from curbe:
|
||||
*/
|
||||
if (c->key.nr_userclip) {
|
||||
if (intel->gen >= 6) {
|
||||
for (int i = 0; i < c->key.nr_userclip; i++) {
|
||||
c->userplane[i] = stride(brw_vec4_grf(reg + i / 2,
|
||||
(i % 2) * 4), 0, 4, 1);
|
||||
}
|
||||
reg += ALIGN(c->key.nr_userclip, 2) / 2;
|
||||
} else {
|
||||
for (int i = 0; i < c->key.nr_userclip; i++) {
|
||||
c->userplane[i] = stride(brw_vec4_grf(reg + (6 + i) / 2,
|
||||
(i % 2) * 4), 0, 4, 1);
|
||||
}
|
||||
reg += (ALIGN(6 + c->key.nr_userclip, 4) / 4) * 2;
|
||||
}
|
||||
}
|
||||
|
||||
/* FINISHME: push constants */
|
||||
c->prog_data.curb_read_length = reg - 1;
|
||||
c->prog_data.nr_params = 0;
|
||||
/* XXX 0 causes a bug elsewhere... */
|
||||
if (intel->gen < 6 && c->prog_data.nr_params == 0)
|
||||
c->prog_data.nr_params = 4;
|
||||
|
||||
reg += setup_attributes(reg);
|
||||
|
||||
this->first_non_payload_grf = reg;
|
||||
}
|
||||
|
||||
struct brw_reg
|
||||
vec4_instruction::get_dst(void)
|
||||
{
|
||||
struct brw_reg brw_reg;
|
||||
|
||||
switch (dst.file) {
|
||||
case GRF:
|
||||
assert(dst.reg_offset == 0);
|
||||
brw_reg = brw_vec8_grf(dst.reg + dst.reg_offset, 0);
|
||||
brw_reg = retype(brw_reg, dst.type);
|
||||
brw_reg.dw1.bits.writemask = dst.writemask;
|
||||
break;
|
||||
|
||||
case HW_REG:
|
||||
brw_reg = dst.fixed_hw_reg;
|
||||
break;
|
||||
|
||||
case BAD_FILE:
|
||||
brw_reg = brw_null_reg();
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(!"not reached");
|
||||
brw_reg = brw_null_reg();
|
||||
break;
|
||||
}
|
||||
return brw_reg;
|
||||
}
|
||||
|
||||
struct brw_reg
|
||||
vec4_instruction::get_src(int i)
|
||||
{
|
||||
struct brw_reg brw_reg;
|
||||
|
||||
switch (src[i].file) {
|
||||
case GRF:
|
||||
brw_reg = brw_vec8_grf(src[i].reg + src[i].reg_offset, 0);
|
||||
brw_reg = retype(brw_reg, src[i].type);
|
||||
brw_reg.dw1.bits.swizzle = src[i].swizzle;
|
||||
if (src[i].abs)
|
||||
brw_reg = brw_abs(brw_reg);
|
||||
if (src[i].negate)
|
||||
brw_reg = negate(brw_reg);
|
||||
break;
|
||||
|
||||
case IMM:
|
||||
switch (src[i].type) {
|
||||
case BRW_REGISTER_TYPE_F:
|
||||
brw_reg = brw_imm_f(src[i].imm.f);
|
||||
break;
|
||||
case BRW_REGISTER_TYPE_D:
|
||||
brw_reg = brw_imm_d(src[i].imm.i);
|
||||
break;
|
||||
case BRW_REGISTER_TYPE_UD:
|
||||
brw_reg = brw_imm_ud(src[i].imm.u);
|
||||
break;
|
||||
default:
|
||||
assert(!"not reached");
|
||||
brw_reg = brw_null_reg();
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case HW_REG:
|
||||
brw_reg = src[i].fixed_hw_reg;
|
||||
break;
|
||||
|
||||
case BAD_FILE:
|
||||
/* Probably unused. */
|
||||
brw_reg = brw_null_reg();
|
||||
break;
|
||||
case ATTR:
|
||||
default:
|
||||
assert(!"not reached");
|
||||
brw_reg = brw_null_reg();
|
||||
break;
|
||||
}
|
||||
|
||||
return brw_reg;
|
||||
}
|
||||
|
||||
void
|
||||
vec4_visitor::generate_math1_gen4(vec4_instruction *inst,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg src)
|
||||
{
|
||||
brw_math(p,
|
||||
dst,
|
||||
brw_math_function(inst->opcode),
|
||||
BRW_MATH_SATURATE_NONE,
|
||||
inst->base_mrf,
|
||||
src,
|
||||
BRW_MATH_DATA_SCALAR,
|
||||
BRW_MATH_PRECISION_FULL);
|
||||
}
|
||||
|
||||
void
|
||||
vec4_visitor::generate_math1_gen6(vec4_instruction *inst,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg src)
|
||||
{
|
||||
brw_math(p,
|
||||
dst,
|
||||
brw_math_function(inst->opcode),
|
||||
BRW_MATH_SATURATE_NONE,
|
||||
inst->base_mrf,
|
||||
src,
|
||||
BRW_MATH_DATA_SCALAR,
|
||||
BRW_MATH_PRECISION_FULL);
|
||||
}
|
||||
|
||||
void
|
||||
vec4_visitor::generate_urb_write(vec4_instruction *inst)
|
||||
{
|
||||
brw_urb_WRITE(p,
|
||||
brw_null_reg(), /* dest */
|
||||
inst->base_mrf, /* starting mrf reg nr */
|
||||
brw_vec8_grf(0, 0), /* src */
|
||||
false, /* allocate */
|
||||
true, /* used */
|
||||
inst->mlen,
|
||||
0, /* response len */
|
||||
inst->eot, /* eot */
|
||||
inst->eot, /* writes complete */
|
||||
inst->offset, /* urb destination offset */
|
||||
BRW_URB_SWIZZLE_INTERLEAVE);
|
||||
}
|
||||
|
||||
void
|
||||
vec4_visitor::generate_vs_instruction(vec4_instruction *instruction,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg *src)
|
||||
{
|
||||
vec4_instruction *inst = (vec4_instruction *)instruction;
|
||||
|
||||
switch (inst->opcode) {
|
||||
case SHADER_OPCODE_RCP:
|
||||
case SHADER_OPCODE_RSQ:
|
||||
case SHADER_OPCODE_SQRT:
|
||||
case SHADER_OPCODE_EXP2:
|
||||
case SHADER_OPCODE_LOG2:
|
||||
case SHADER_OPCODE_SIN:
|
||||
case SHADER_OPCODE_COS:
|
||||
if (intel->gen >= 6) {
|
||||
generate_math1_gen6(inst, dst, src[0]);
|
||||
} else {
|
||||
generate_math1_gen4(inst, dst, src[0]);
|
||||
}
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_POW:
|
||||
assert(!"finishme");
|
||||
break;
|
||||
|
||||
case VS_OPCODE_URB_WRITE:
|
||||
generate_urb_write(inst);
|
||||
break;
|
||||
|
||||
default:
|
||||
if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) {
|
||||
fail("unsupported opcode in `%s' in VS\n",
|
||||
brw_opcodes[inst->opcode].name);
|
||||
} else {
|
||||
fail("Unsupported opcode %d in VS", inst->opcode);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
vec4_visitor::run()
|
||||
{
|
||||
/* Generate FS IR for main(). (the visitor only descends into
|
||||
* functions called "main").
|
||||
*/
|
||||
foreach_iter(exec_list_iterator, iter, *shader->ir) {
|
||||
ir_instruction *ir = (ir_instruction *)iter.get();
|
||||
base_ir = ir;
|
||||
ir->accept(this);
|
||||
}
|
||||
|
||||
emit_urb_writes();
|
||||
|
||||
if (failed)
|
||||
return false;
|
||||
|
||||
setup_payload();
|
||||
reg_allocate();
|
||||
|
||||
brw_set_access_mode(p, BRW_ALIGN_16);
|
||||
|
||||
generate_code();
|
||||
|
||||
return !failed;
|
||||
}
|
||||
|
||||
void
|
||||
vec4_visitor::generate_code()
|
||||
{
|
||||
int last_native_inst = p->nr_insn;
|
||||
const char *last_annotation_string = NULL;
|
||||
ir_instruction *last_annotation_ir = NULL;
|
||||
|
||||
int loop_stack_array_size = 16;
|
||||
int loop_stack_depth = 0;
|
||||
brw_instruction **loop_stack =
|
||||
rzalloc_array(this->mem_ctx, brw_instruction *, loop_stack_array_size);
|
||||
int *if_depth_in_loop =
|
||||
rzalloc_array(this->mem_ctx, int, loop_stack_array_size);
|
||||
|
||||
|
||||
if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
|
||||
printf("Native code for vertex shader %d:\n", prog->Name);
|
||||
}
|
||||
|
||||
foreach_list(node, &this->instructions) {
|
||||
vec4_instruction *inst = (vec4_instruction *)node;
|
||||
struct brw_reg src[3], dst;
|
||||
|
||||
if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
|
||||
if (last_annotation_ir != inst->ir) {
|
||||
last_annotation_ir = inst->ir;
|
||||
if (last_annotation_ir) {
|
||||
printf(" ");
|
||||
last_annotation_ir->print();
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
if (last_annotation_string != inst->annotation) {
|
||||
last_annotation_string = inst->annotation;
|
||||
if (last_annotation_string)
|
||||
printf(" %s\n", last_annotation_string);
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned int i = 0; i < 3; i++) {
|
||||
src[i] = inst->get_src(i);
|
||||
}
|
||||
dst = inst->get_dst();
|
||||
|
||||
brw_set_conditionalmod(p, inst->conditional_mod);
|
||||
brw_set_predicate_control(p, inst->predicate);
|
||||
brw_set_predicate_inverse(p, inst->predicate_inverse);
|
||||
brw_set_saturate(p, inst->saturate);
|
||||
|
||||
switch (inst->opcode) {
|
||||
case BRW_OPCODE_MOV:
|
||||
brw_MOV(p, dst, src[0]);
|
||||
break;
|
||||
case BRW_OPCODE_ADD:
|
||||
brw_ADD(p, dst, src[0], src[1]);
|
||||
break;
|
||||
case BRW_OPCODE_MUL:
|
||||
brw_MUL(p, dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_FRC:
|
||||
brw_FRC(p, dst, src[0]);
|
||||
break;
|
||||
case BRW_OPCODE_RNDD:
|
||||
brw_RNDD(p, dst, src[0]);
|
||||
break;
|
||||
case BRW_OPCODE_RNDE:
|
||||
brw_RNDE(p, dst, src[0]);
|
||||
break;
|
||||
case BRW_OPCODE_RNDZ:
|
||||
brw_RNDZ(p, dst, src[0]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_AND:
|
||||
brw_AND(p, dst, src[0], src[1]);
|
||||
break;
|
||||
case BRW_OPCODE_OR:
|
||||
brw_OR(p, dst, src[0], src[1]);
|
||||
break;
|
||||
case BRW_OPCODE_XOR:
|
||||
brw_XOR(p, dst, src[0], src[1]);
|
||||
break;
|
||||
case BRW_OPCODE_NOT:
|
||||
brw_NOT(p, dst, src[0]);
|
||||
break;
|
||||
case BRW_OPCODE_ASR:
|
||||
brw_ASR(p, dst, src[0], src[1]);
|
||||
break;
|
||||
case BRW_OPCODE_SHR:
|
||||
brw_SHR(p, dst, src[0], src[1]);
|
||||
break;
|
||||
case BRW_OPCODE_SHL:
|
||||
brw_SHL(p, dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_CMP:
|
||||
brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
|
||||
break;
|
||||
case BRW_OPCODE_SEL:
|
||||
brw_SEL(p, dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_IF:
|
||||
if (inst->src[0].file != BAD_FILE) {
|
||||
/* The instruction has an embedded compare (only allowed on gen6) */
|
||||
assert(intel->gen == 6);
|
||||
gen6_IF(p, inst->conditional_mod, src[0], src[1]);
|
||||
} else {
|
||||
brw_IF(p, BRW_EXECUTE_8);
|
||||
}
|
||||
if_depth_in_loop[loop_stack_depth]++;
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_ELSE:
|
||||
brw_ELSE(p);
|
||||
break;
|
||||
case BRW_OPCODE_ENDIF:
|
||||
brw_ENDIF(p);
|
||||
if_depth_in_loop[loop_stack_depth]--;
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_DO:
|
||||
loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8);
|
||||
if (loop_stack_array_size <= loop_stack_depth) {
|
||||
loop_stack_array_size *= 2;
|
||||
loop_stack = reralloc(this->mem_ctx, loop_stack, brw_instruction *,
|
||||
loop_stack_array_size);
|
||||
if_depth_in_loop = reralloc(this->mem_ctx, if_depth_in_loop, int,
|
||||
loop_stack_array_size);
|
||||
}
|
||||
if_depth_in_loop[loop_stack_depth] = 0;
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_BREAK:
|
||||
brw_BREAK(p, if_depth_in_loop[loop_stack_depth]);
|
||||
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
|
||||
break;
|
||||
case BRW_OPCODE_CONTINUE:
|
||||
/* FINISHME: We need to write the loop instruction support still. */
|
||||
if (intel->gen >= 6)
|
||||
gen6_CONT(p, loop_stack[loop_stack_depth - 1]);
|
||||
else
|
||||
brw_CONT(p, if_depth_in_loop[loop_stack_depth]);
|
||||
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_WHILE: {
|
||||
struct brw_instruction *inst0, *inst1;
|
||||
GLuint br = 1;
|
||||
|
||||
if (intel->gen >= 5)
|
||||
br = 2;
|
||||
|
||||
assert(loop_stack_depth > 0);
|
||||
loop_stack_depth--;
|
||||
inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]);
|
||||
if (intel->gen < 6) {
|
||||
/* patch all the BREAK/CONT instructions from last BGNLOOP */
|
||||
while (inst0 > loop_stack[loop_stack_depth]) {
|
||||
inst0--;
|
||||
if (inst0->header.opcode == BRW_OPCODE_BREAK &&
|
||||
inst0->bits3.if_else.jump_count == 0) {
|
||||
inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
|
||||
}
|
||||
else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
|
||||
inst0->bits3.if_else.jump_count == 0) {
|
||||
inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
generate_vs_instruction(inst, dst, src);
|
||||
break;
|
||||
}
|
||||
|
||||
if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
|
||||
for (unsigned int i = last_native_inst; i < p->nr_insn; i++) {
|
||||
if (0) {
|
||||
printf("0x%08x 0x%08x 0x%08x 0x%08x ",
|
||||
((uint32_t *)&p->store[i])[3],
|
||||
((uint32_t *)&p->store[i])[2],
|
||||
((uint32_t *)&p->store[i])[1],
|
||||
((uint32_t *)&p->store[i])[0]);
|
||||
}
|
||||
brw_disasm(stdout, &p->store[i], intel->gen);
|
||||
}
|
||||
}
|
||||
|
||||
last_native_inst = p->nr_insn;
|
||||
}
|
||||
|
||||
if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
ralloc_free(loop_stack);
|
||||
ralloc_free(if_depth_in_loop);
|
||||
|
||||
brw_set_uip_jip(p);
|
||||
|
||||
/* OK, while the INTEL_DEBUG=vs above is very nice for debugging VS
|
||||
* emit issues, it doesn't get the jump distances into the output,
|
||||
* which is often something we want to debug. So this is here in
|
||||
* case you're doing that.
|
||||
*/
|
||||
if (0) {
|
||||
if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
|
||||
for (unsigned int i = 0; i < p->nr_insn; i++) {
|
||||
printf("0x%08x 0x%08x 0x%08x 0x%08x ",
|
||||
((uint32_t *)&p->store[i])[3],
|
||||
((uint32_t *)&p->store[i])[2],
|
||||
((uint32_t *)&p->store[i])[1],
|
||||
((uint32_t *)&p->store[i])[0]);
|
||||
brw_disasm(stdout, &p->store[i], intel->gen);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
|
||||
bool
|
||||
brw_vs_emit(struct brw_vs_compile *c)
|
||||
{
|
||||
struct brw_compile *p = &c->func;
|
||||
struct brw_context *brw = p->brw;
|
||||
struct intel_context *intel = &brw->intel;
|
||||
struct gl_context *ctx = &intel->ctx;
|
||||
struct gl_shader_program *prog = ctx->Shader.CurrentVertexProgram;
|
||||
|
||||
if (!prog)
|
||||
return false;
|
||||
|
||||
struct brw_shader *shader =
|
||||
(brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX];
|
||||
if (!shader)
|
||||
return false;
|
||||
|
||||
if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
|
||||
printf("GLSL IR for native vertex shader %d:\n", prog->Name);
|
||||
_mesa_print_ir(shader->ir, NULL);
|
||||
printf("\n\n");
|
||||
}
|
||||
|
||||
vec4_visitor v(c, prog, shader);
|
||||
if (!v.run()) {
|
||||
/* FINISHME: Cleanly fail, test at link time, etc. */
|
||||
assert(!"not reached");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} /* extern "C" */
|
||||
|
||||
} /* namespace brw */
|
77
src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
Normal file
77
src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
Normal file
@@ -0,0 +1,77 @@
|
||||
/*
|
||||
* Copyright © 2011 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "brw_vec4.h"
|
||||
#include "../glsl/ir_print_visitor.h"
|
||||
|
||||
using namespace brw;
|
||||
|
||||
namespace brw {
|
||||
|
||||
static void
|
||||
assign(int *reg_hw_locations, reg *reg)
|
||||
{
|
||||
if (reg->file == GRF) {
|
||||
reg->reg = reg_hw_locations[reg->reg];
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
vec4_visitor::reg_allocate_trivial()
|
||||
{
|
||||
int last_grf = 0;
|
||||
int hw_reg_mapping[this->virtual_grf_count];
|
||||
int i;
|
||||
int next;
|
||||
|
||||
/* Note that compressed instructions require alignment to 2 registers. */
|
||||
hw_reg_mapping[0] = this->first_non_payload_grf;
|
||||
next = hw_reg_mapping[0] + this->virtual_grf_sizes[0];
|
||||
for (i = 1; i < this->virtual_grf_count; i++) {
|
||||
hw_reg_mapping[i] = next;
|
||||
next += this->virtual_grf_sizes[i];
|
||||
}
|
||||
prog_data->total_grf = next;
|
||||
|
||||
foreach_iter(exec_list_iterator, iter, this->instructions) {
|
||||
vec4_instruction *inst = (vec4_instruction *)iter.get();
|
||||
|
||||
assign(hw_reg_mapping, &inst->dst);
|
||||
assign(hw_reg_mapping, &inst->src[0]);
|
||||
assign(hw_reg_mapping, &inst->src[1]);
|
||||
assign(hw_reg_mapping, &inst->src[2]);
|
||||
}
|
||||
|
||||
if (last_grf >= BRW_MAX_GRF) {
|
||||
fail("Ran out of regs on trivial allocator (%d/%d)\n",
|
||||
last_grf, BRW_MAX_GRF);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
vec4_visitor::reg_allocate()
|
||||
{
|
||||
reg_allocate_trivial();
|
||||
}
|
||||
|
||||
} /* namespace brw */
|
1649
src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
Normal file
1649
src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
Normal file
File diff suppressed because it is too large
Load Diff
@@ -30,6 +30,7 @@
|
||||
*/
|
||||
|
||||
|
||||
#include "main/compiler.h"
|
||||
#include "brw_context.h"
|
||||
#include "brw_vs.h"
|
||||
#include "brw_util.h"
|
||||
@@ -50,6 +51,7 @@ static void do_vs_prog( struct brw_context *brw,
|
||||
void *mem_ctx;
|
||||
int aux_size;
|
||||
int i;
|
||||
static int new_vs = -1;
|
||||
|
||||
memset(&c, 0, sizeof(c));
|
||||
memcpy(&c.key, key, sizeof(*key));
|
||||
@@ -85,7 +87,15 @@ static void do_vs_prog( struct brw_context *brw,
|
||||
|
||||
/* Emit GEN4 code.
|
||||
*/
|
||||
brw_vs_emit(&c);
|
||||
if (new_vs == -1)
|
||||
new_vs = getenv("INTEL_NEW_VS") != NULL;
|
||||
|
||||
if (new_vs) {
|
||||
if (!brw_vs_emit(&c))
|
||||
brw_old_vs_emit(&c);
|
||||
} else {
|
||||
brw_old_vs_emit(&c);
|
||||
}
|
||||
|
||||
/* get the program
|
||||
*/
|
||||
|
@@ -92,6 +92,7 @@ struct brw_vs_compile {
|
||||
GLboolean needs_stack;
|
||||
};
|
||||
|
||||
void brw_vs_emit( struct brw_vs_compile *c );
|
||||
bool brw_vs_emit(struct brw_vs_compile *c);
|
||||
void brw_old_vs_emit(struct brw_vs_compile *c);
|
||||
|
||||
#endif
|
||||
|
@@ -1903,7 +1903,7 @@ brw_vs_rescale_gl_fixed(struct brw_vs_compile *c)
|
||||
|
||||
/* Emit the vertex program instructions here.
|
||||
*/
|
||||
void brw_vs_emit(struct brw_vs_compile *c )
|
||||
void brw_old_vs_emit(struct brw_vs_compile *c )
|
||||
{
|
||||
#define MAX_IF_DEPTH 32
|
||||
#define MAX_LOOP_DEPTH 32
|
||||
|
Reference in New Issue
Block a user