i965: Start adding the VS visitor and codegen.

The low-level IR is a mashup of brw_fs.cpp and ir_to_mesa.cpp.  It's
currently controlled by the INTEL_NEW_VS=1 environment variable, and
only tested for the trivial "gl_Position = gl_Vertex;" shader so far.
This commit is contained in:
Eric Anholt
2011-05-02 09:45:40 -07:00
parent 65b5cbbcf7
commit af3c9803d8
14 changed files with 2781 additions and 37 deletions

View File

@@ -124,7 +124,10 @@ CXX_SOURCES = \
brw_fs_reg_allocate.cpp \
brw_fs_schedule_instructions.cpp \
brw_fs_vector_splitting.cpp \
brw_shader.cpp
brw_shader.cpp \
brw_vec4_emit.cpp \
brw_vec4_reg_allocate.cpp \
brw_vec4_visitor.cpp
ASM_SOURCES =

View File

@@ -529,7 +529,7 @@ struct brw_context
* the CURBE, the depth buffer, and a query BO.
*/
drm_intel_bo *validated_bos[VERT_ATTRIB_MAX + BRW_WM_MAX_SURF + 16];
int validated_bo_count;
unsigned int validated_bo_count;
} state;
struct brw_cache cache;

View File

@@ -638,6 +638,8 @@ enum opcode {
FS_OPCODE_SPILL,
FS_OPCODE_UNSPILL,
FS_OPCODE_PULL_CONSTANT_LOAD,
VS_OPCODE_URB_WRITE,
};
#define BRW_PREDICATE_NONE 0

View File

@@ -44,6 +44,9 @@
#define BRW_SWIZZLE_NOOP BRW_SWIZZLE4(0,1,2,3)
#define BRW_SWIZZLE_XYZW BRW_SWIZZLE4(0,1,2,3)
#define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0)
#define BRW_SWIZZLE_YYYY BRW_SWIZZLE4(1,1,1,1)
#define BRW_SWIZZLE_ZZZZ BRW_SWIZZLE4(2,2,2,2)
#define BRW_SWIZZLE_WWWW BRW_SWIZZLE4(3,3,3,3)
#define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1)

View File

@@ -146,38 +146,7 @@ void
fs_visitor::generate_math(fs_inst *inst,
struct brw_reg dst, struct brw_reg *src)
{
int op;
switch (inst->opcode) {
case SHADER_OPCODE_RCP:
op = BRW_MATH_FUNCTION_INV;
break;
case SHADER_OPCODE_RSQ:
op = BRW_MATH_FUNCTION_RSQ;
break;
case SHADER_OPCODE_SQRT:
op = BRW_MATH_FUNCTION_SQRT;
break;
case SHADER_OPCODE_EXP2:
op = BRW_MATH_FUNCTION_EXP;
break;
case SHADER_OPCODE_LOG2:
op = BRW_MATH_FUNCTION_LOG;
break;
case SHADER_OPCODE_POW:
op = BRW_MATH_FUNCTION_POW;
break;
case SHADER_OPCODE_SIN:
op = BRW_MATH_FUNCTION_SIN;
break;
case SHADER_OPCODE_COS:
op = BRW_MATH_FUNCTION_COS;
break;
default:
assert(!"not reached: unknown math function");
op = 0;
break;
}
int op = brw_math_function(inst->opcode);
if (intel->gen >= 6) {
assert(inst->mlen == 0);

View File

@@ -199,3 +199,29 @@ brw_conditional_for_comparison(unsigned int op)
return BRW_CONDITIONAL_NZ;
}
}
uint32_t
brw_math_function(enum opcode op)
{
switch (op) {
case SHADER_OPCODE_RCP:
return BRW_MATH_FUNCTION_INV;
case SHADER_OPCODE_RSQ:
return BRW_MATH_FUNCTION_RSQ;
case SHADER_OPCODE_SQRT:
return BRW_MATH_FUNCTION_SQRT;
case SHADER_OPCODE_EXP2:
return BRW_MATH_FUNCTION_EXP;
case SHADER_OPCODE_LOG2:
return BRW_MATH_FUNCTION_LOG;
case SHADER_OPCODE_POW:
return BRW_MATH_FUNCTION_POW;
case SHADER_OPCODE_SIN:
return BRW_MATH_FUNCTION_SIN;
case SHADER_OPCODE_COS:
return BRW_MATH_FUNCTION_COS;
default:
assert(!"not reached: unknown math function");
return 0;
}
}

View File

@@ -22,8 +22,10 @@
*/
#include <stdint.h>
#include "brw_defines.h"
#pragma once
int brw_type_for_base_type(const struct glsl_type *type);
uint32_t brw_conditional_for_comparison(unsigned int op);
uint32_t brw_math_function(enum opcode op);

View File

@@ -0,0 +1,434 @@
/*
* Copyright © 2011 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef BRW_VEC4_H
#define BRW_VEC4_H
#include <stdint.h>
#include "brw_shader.h"
#include "main/compiler.h"
#include "program/hash_table.h"
extern "C" {
#include "brw_vs.h"
#include "brw_context.h"
#include "brw_eu.h"
};
#include "../glsl/ir.h"
namespace brw {
class dst_reg;
/**
* Common helper for constructing swizzles. When only a subset of
* channels of a vec4 are used, we don't want to reference the other
* channels, as that will tell optimization passes that those other
* channels are used.
*/
static int
swizzle_for_size(int size)
{
int size_swizzles[4] = {
BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
};
assert((size >= 1) && (size <= 4));
return size_swizzles[size - 1];
}
enum register_file {
ARF = BRW_ARCHITECTURE_REGISTER_FILE,
GRF = BRW_GENERAL_REGISTER_FILE,
MRF = BRW_MESSAGE_REGISTER_FILE,
IMM = BRW_IMMEDIATE_VALUE,
HW_REG, /* a struct brw_reg */
ATTR,
UNIFORM, /* prog_data->params[hw_reg] */
BAD_FILE
};
class reg
{
public:
/** Register file: ARF, GRF, MRF, IMM. */
enum register_file file;
/** virtual register number. 0 = fixed hw reg */
int reg;
/** Offset within the virtual register. */
int reg_offset;
/** Register type. BRW_REGISTER_TYPE_* */
int type;
bool sechalf;
struct brw_reg fixed_hw_reg;
int smear; /* -1, or a channel of the reg to smear to all channels. */
/** Value for file == BRW_IMMMEDIATE_FILE */
union {
int32_t i;
uint32_t u;
float f;
} imm;
};
class src_reg : public reg
{
public:
/* Callers of this ralloc-based new need not call delete. It's
* easier to just ralloc_free 'ctx' (or any of its ancestors). */
static void* operator new(size_t size, void *ctx)
{
void *node;
node = ralloc_size(ctx, size);
assert(node != NULL);
return node;
}
void init()
{
memset(this, 0, sizeof(*this));
this->file = BAD_FILE;
}
src_reg(register_file file, int reg, const glsl_type *type)
{
init();
this->file = file;
this->reg = reg;
if (type && (type->is_scalar() || type->is_vector() || type->is_matrix()))
this->swizzle = swizzle_for_size(type->vector_elements);
else
this->swizzle = SWIZZLE_XYZW;
}
/** Generic unset register constructor. */
src_reg()
{
init();
}
src_reg(float f)
{
init();
this->file = IMM;
this->type = BRW_REGISTER_TYPE_F;
this->imm.f = f;
}
src_reg(uint32_t u)
{
init();
this->file = IMM;
this->type = BRW_REGISTER_TYPE_UD;
this->imm.f = u;
}
src_reg(int32_t i)
{
init();
this->file = IMM;
this->type = BRW_REGISTER_TYPE_D;
this->imm.i = i;
}
src_reg(class vec4_visitor *v, const struct glsl_type *type);
explicit src_reg(dst_reg reg);
GLuint swizzle; /**< SWIZZLE_XYZW swizzles from Mesa. */
bool negate;
bool abs;
};
class dst_reg : public reg
{
public:
/* Callers of this ralloc-based new need not call delete. It's
* easier to just ralloc_free 'ctx' (or any of its ancestors). */
static void* operator new(size_t size, void *ctx)
{
void *node;
node = ralloc_size(ctx, size);
assert(node != NULL);
return node;
}
void init()
{
memset(this, 0, sizeof(*this));
this->file = BAD_FILE;
this->writemask = WRITEMASK_XYZW;
}
dst_reg()
{
init();
}
dst_reg(register_file file, int reg)
{
init();
this->file = file;
this->reg = reg;
}
dst_reg(struct brw_reg reg)
{
init();
this->file = HW_REG;
this->fixed_hw_reg = reg;
}
dst_reg(class vec4_visitor *v, const struct glsl_type *type);
explicit dst_reg(src_reg reg);
int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
};
class vec4_instruction : public exec_node {
public:
/* Callers of this ralloc-based new need not call delete. It's
* easier to just ralloc_free 'ctx' (or any of its ancestors). */
static void* operator new(size_t size, void *ctx)
{
void *node;
node = rzalloc_size(ctx, size);
assert(node != NULL);
return node;
}
struct brw_reg get_dst(void);
struct brw_reg get_src(int i);
enum opcode opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
dst_reg dst;
src_reg src[3];
bool saturate;
bool predicate_inverse;
uint32_t predicate;
int conditional_mod; /**< BRW_CONDITIONAL_* */
int sampler;
int target; /**< MRT target. */
bool shadow_compare;
bool eot;
bool header_present;
int mlen; /**< SEND message length */
int base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */
uint32_t offset; /* spill/unspill offset */
/** @{
* Annotation for the generated IR. One of the two can be set.
*/
ir_instruction *ir;
const char *annotation;
};
class vec4_visitor : public ir_visitor
{
public:
vec4_visitor(struct brw_vs_compile *c,
struct gl_shader_program *prog, struct brw_shader *shader);
~vec4_visitor();
dst_reg dst_null_f()
{
return dst_reg(brw_null_reg());
}
dst_reg dst_null_d()
{
return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
}
dst_reg dst_null_cmp()
{
if (intel->gen > 4)
return dst_null_d();
else
return dst_null_f();
}
struct brw_context *brw;
const struct gl_vertex_program *vp;
struct intel_context *intel;
struct gl_context *ctx;
struct brw_vs_compile *c;
struct brw_vs_prog_data *prog_data;
struct brw_compile *p;
struct brw_shader *shader;
struct gl_shader_program *prog;
void *mem_ctx;
exec_list instructions;
char *fail_msg;
bool failed;
/**
* GLSL IR currently being processed, which is associated with our
* driver IR instructions for debugging purposes.
*/
ir_instruction *base_ir;
const char *current_annotation;
int *virtual_grf_sizes;
int virtual_grf_count;
int virtual_grf_array_size;
int first_non_payload_grf;
dst_reg *variable_storage(ir_variable *var);
void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr);
src_reg src_reg_for_float(float val);
/**
* \name Visit methods
*
* As typical for the visitor pattern, there must be one \c visit method for
* each concrete subclass of \c ir_instruction. Virtual base classes within
* the hierarchy should not have \c visit methods.
*/
/*@{*/
virtual void visit(ir_variable *);
virtual void visit(ir_loop *);
virtual void visit(ir_loop_jump *);
virtual void visit(ir_function_signature *);
virtual void visit(ir_function *);
virtual void visit(ir_expression *);
virtual void visit(ir_swizzle *);
virtual void visit(ir_dereference_variable *);
virtual void visit(ir_dereference_array *);
virtual void visit(ir_dereference_record *);
virtual void visit(ir_assignment *);
virtual void visit(ir_constant *);
virtual void visit(ir_call *);
virtual void visit(ir_return *);
virtual void visit(ir_discard *);
virtual void visit(ir_texture *);
virtual void visit(ir_if *);
/*@}*/
src_reg result;
/* Regs for vertex results. Generated at ir_variable visiting time
* for the ir->location's used.
*/
dst_reg output_reg[VERT_RESULT_MAX];
struct hash_table *variable_ht;
bool run(void);
void fail(const char *msg, ...);
int virtual_grf_alloc(int size);
int setup_attributes(int payload_reg);
void setup_payload();
void reg_allocate_trivial();
void reg_allocate();
vec4_instruction *emit(enum opcode opcode);
vec4_instruction *emit(enum opcode opcode, dst_reg dst, src_reg src0);
vec4_instruction *emit(enum opcode opcode, dst_reg dst,
src_reg src0, src_reg src1);
vec4_instruction *emit(enum opcode opcode, dst_reg dst,
src_reg src0, src_reg src1, src_reg src2);
/** Walks an exec_list of ir_instruction and sends it through this visitor. */
void visit_instructions(const exec_list *list);
void emit_bool_to_cond_code(ir_rvalue *ir);
void emit_bool_comparison(unsigned int op, dst_reg dst, src_reg src0, src_reg src1);
void emit_if_gen6(ir_if *ir);
void emit_block_move(ir_assignment *ir);
/**
* Emit the correct dot-product instruction for the type of arguments
*/
void emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements);
void emit_scalar(ir_instruction *ir, enum prog_opcode op,
dst_reg dst, src_reg src0);
void emit_scalar(ir_instruction *ir, enum prog_opcode op,
dst_reg dst, src_reg src0, src_reg src1);
void emit_scs(ir_instruction *ir, enum prog_opcode op,
dst_reg dst, const src_reg &src);
void emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src);
void emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src);
void emit_math(enum opcode opcode, dst_reg dst, src_reg src);
void emit_math2_gen6(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1);
void emit_math2_gen4(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1);
void emit_math(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1);
int emit_vue_header_gen6(int header_mrf);
int emit_vue_header_gen4(int header_mrf);
void emit_urb_writes(void);
GLboolean try_emit_sat(ir_expression *ir);
bool process_move_condition(ir_rvalue *ir);
void generate_code();
void generate_vs_instruction(vec4_instruction *inst,
struct brw_reg dst,
struct brw_reg *src);
void generate_math1_gen4(vec4_instruction *inst,
struct brw_reg dst,
struct brw_reg src);
void generate_math1_gen6(vec4_instruction *inst,
struct brw_reg dst,
struct brw_reg src);
void generate_urb_write(vec4_instruction *inst);
};
} /* namespace brw */
#endif /* BRW_VEC4_H */

View File

@@ -0,0 +1,568 @@
/*
* Copyright © 2011 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "brw_vec4.h"
#include "../glsl/ir_print_visitor.h"
extern "C" {
#include "brw_eu.h"
};
using namespace brw;
namespace brw {
int
vec4_visitor::setup_attributes(int payload_reg)
{
int nr_attributes;
int attribute_map[VERT_ATTRIB_MAX];
nr_attributes = 0;
for (int i = 0; i < VERT_ATTRIB_MAX; i++) {
if (prog_data->inputs_read & BITFIELD64_BIT(i)) {
attribute_map[i] = payload_reg + nr_attributes;
nr_attributes++;
}
}
foreach_iter(exec_list_iterator, iter, this->instructions) {
vec4_instruction *inst = (vec4_instruction *)iter.get();
for (int i = 0; i < 3; i++) {
if (inst->src[i].file != ATTR)
continue;
inst->src[i].file = HW_REG;
inst->src[i].fixed_hw_reg = brw_vec8_grf(attribute_map[inst->src[i].reg], 0);
inst->src[i].fixed_hw_reg.dw1.bits.swizzle = inst->src[i].swizzle;
}
}
/* The BSpec says we always have to read at least one thing from
* the VF, and it appears that the hardware wedges otherwise.
*/
if (nr_attributes == 0)
nr_attributes = 1;
prog_data->urb_read_length = (nr_attributes + 1) / 2;
return nr_attributes;
}
void
vec4_visitor::setup_payload(void)
{
int reg = 0;
/* r0 is always reserved, as it contains the payload with the URB
* handles that are passed on to the URB write at the end of the
* thread.
*/
reg++;
/* User clip planes from curbe:
*/
if (c->key.nr_userclip) {
if (intel->gen >= 6) {
for (int i = 0; i < c->key.nr_userclip; i++) {
c->userplane[i] = stride(brw_vec4_grf(reg + i / 2,
(i % 2) * 4), 0, 4, 1);
}
reg += ALIGN(c->key.nr_userclip, 2) / 2;
} else {
for (int i = 0; i < c->key.nr_userclip; i++) {
c->userplane[i] = stride(brw_vec4_grf(reg + (6 + i) / 2,
(i % 2) * 4), 0, 4, 1);
}
reg += (ALIGN(6 + c->key.nr_userclip, 4) / 4) * 2;
}
}
/* FINISHME: push constants */
c->prog_data.curb_read_length = reg - 1;
c->prog_data.nr_params = 0;
/* XXX 0 causes a bug elsewhere... */
if (intel->gen < 6 && c->prog_data.nr_params == 0)
c->prog_data.nr_params = 4;
reg += setup_attributes(reg);
this->first_non_payload_grf = reg;
}
struct brw_reg
vec4_instruction::get_dst(void)
{
struct brw_reg brw_reg;
switch (dst.file) {
case GRF:
assert(dst.reg_offset == 0);
brw_reg = brw_vec8_grf(dst.reg + dst.reg_offset, 0);
brw_reg = retype(brw_reg, dst.type);
brw_reg.dw1.bits.writemask = dst.writemask;
break;
case HW_REG:
brw_reg = dst.fixed_hw_reg;
break;
case BAD_FILE:
brw_reg = brw_null_reg();
break;
default:
assert(!"not reached");
brw_reg = brw_null_reg();
break;
}
return brw_reg;
}
struct brw_reg
vec4_instruction::get_src(int i)
{
struct brw_reg brw_reg;
switch (src[i].file) {
case GRF:
brw_reg = brw_vec8_grf(src[i].reg + src[i].reg_offset, 0);
brw_reg = retype(brw_reg, src[i].type);
brw_reg.dw1.bits.swizzle = src[i].swizzle;
if (src[i].abs)
brw_reg = brw_abs(brw_reg);
if (src[i].negate)
brw_reg = negate(brw_reg);
break;
case IMM:
switch (src[i].type) {
case BRW_REGISTER_TYPE_F:
brw_reg = brw_imm_f(src[i].imm.f);
break;
case BRW_REGISTER_TYPE_D:
brw_reg = brw_imm_d(src[i].imm.i);
break;
case BRW_REGISTER_TYPE_UD:
brw_reg = brw_imm_ud(src[i].imm.u);
break;
default:
assert(!"not reached");
brw_reg = brw_null_reg();
break;
}
break;
case HW_REG:
brw_reg = src[i].fixed_hw_reg;
break;
case BAD_FILE:
/* Probably unused. */
brw_reg = brw_null_reg();
break;
case ATTR:
default:
assert(!"not reached");
brw_reg = brw_null_reg();
break;
}
return brw_reg;
}
void
vec4_visitor::generate_math1_gen4(vec4_instruction *inst,
struct brw_reg dst,
struct brw_reg src)
{
brw_math(p,
dst,
brw_math_function(inst->opcode),
BRW_MATH_SATURATE_NONE,
inst->base_mrf,
src,
BRW_MATH_DATA_SCALAR,
BRW_MATH_PRECISION_FULL);
}
void
vec4_visitor::generate_math1_gen6(vec4_instruction *inst,
struct brw_reg dst,
struct brw_reg src)
{
brw_math(p,
dst,
brw_math_function(inst->opcode),
BRW_MATH_SATURATE_NONE,
inst->base_mrf,
src,
BRW_MATH_DATA_SCALAR,
BRW_MATH_PRECISION_FULL);
}
void
vec4_visitor::generate_urb_write(vec4_instruction *inst)
{
brw_urb_WRITE(p,
brw_null_reg(), /* dest */
inst->base_mrf, /* starting mrf reg nr */
brw_vec8_grf(0, 0), /* src */
false, /* allocate */
true, /* used */
inst->mlen,
0, /* response len */
inst->eot, /* eot */
inst->eot, /* writes complete */
inst->offset, /* urb destination offset */
BRW_URB_SWIZZLE_INTERLEAVE);
}
void
vec4_visitor::generate_vs_instruction(vec4_instruction *instruction,
struct brw_reg dst,
struct brw_reg *src)
{
vec4_instruction *inst = (vec4_instruction *)instruction;
switch (inst->opcode) {
case SHADER_OPCODE_RCP:
case SHADER_OPCODE_RSQ:
case SHADER_OPCODE_SQRT:
case SHADER_OPCODE_EXP2:
case SHADER_OPCODE_LOG2:
case SHADER_OPCODE_SIN:
case SHADER_OPCODE_COS:
if (intel->gen >= 6) {
generate_math1_gen6(inst, dst, src[0]);
} else {
generate_math1_gen4(inst, dst, src[0]);
}
break;
case SHADER_OPCODE_POW:
assert(!"finishme");
break;
case VS_OPCODE_URB_WRITE:
generate_urb_write(inst);
break;
default:
if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) {
fail("unsupported opcode in `%s' in VS\n",
brw_opcodes[inst->opcode].name);
} else {
fail("Unsupported opcode %d in VS", inst->opcode);
}
}
}
bool
vec4_visitor::run()
{
/* Generate FS IR for main(). (the visitor only descends into
* functions called "main").
*/
foreach_iter(exec_list_iterator, iter, *shader->ir) {
ir_instruction *ir = (ir_instruction *)iter.get();
base_ir = ir;
ir->accept(this);
}
emit_urb_writes();
if (failed)
return false;
setup_payload();
reg_allocate();
brw_set_access_mode(p, BRW_ALIGN_16);
generate_code();
return !failed;
}
void
vec4_visitor::generate_code()
{
int last_native_inst = p->nr_insn;
const char *last_annotation_string = NULL;
ir_instruction *last_annotation_ir = NULL;
int loop_stack_array_size = 16;
int loop_stack_depth = 0;
brw_instruction **loop_stack =
rzalloc_array(this->mem_ctx, brw_instruction *, loop_stack_array_size);
int *if_depth_in_loop =
rzalloc_array(this->mem_ctx, int, loop_stack_array_size);
if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
printf("Native code for vertex shader %d:\n", prog->Name);
}
foreach_list(node, &this->instructions) {
vec4_instruction *inst = (vec4_instruction *)node;
struct brw_reg src[3], dst;
if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
if (last_annotation_ir != inst->ir) {
last_annotation_ir = inst->ir;
if (last_annotation_ir) {
printf(" ");
last_annotation_ir->print();
printf("\n");
}
}
if (last_annotation_string != inst->annotation) {
last_annotation_string = inst->annotation;
if (last_annotation_string)
printf(" %s\n", last_annotation_string);
}
}
for (unsigned int i = 0; i < 3; i++) {
src[i] = inst->get_src(i);
}
dst = inst->get_dst();
brw_set_conditionalmod(p, inst->conditional_mod);
brw_set_predicate_control(p, inst->predicate);
brw_set_predicate_inverse(p, inst->predicate_inverse);
brw_set_saturate(p, inst->saturate);
switch (inst->opcode) {
case BRW_OPCODE_MOV:
brw_MOV(p, dst, src[0]);
break;
case BRW_OPCODE_ADD:
brw_ADD(p, dst, src[0], src[1]);
break;
case BRW_OPCODE_MUL:
brw_MUL(p, dst, src[0], src[1]);
break;
case BRW_OPCODE_FRC:
brw_FRC(p, dst, src[0]);
break;
case BRW_OPCODE_RNDD:
brw_RNDD(p, dst, src[0]);
break;
case BRW_OPCODE_RNDE:
brw_RNDE(p, dst, src[0]);
break;
case BRW_OPCODE_RNDZ:
brw_RNDZ(p, dst, src[0]);
break;
case BRW_OPCODE_AND:
brw_AND(p, dst, src[0], src[1]);
break;
case BRW_OPCODE_OR:
brw_OR(p, dst, src[0], src[1]);
break;
case BRW_OPCODE_XOR:
brw_XOR(p, dst, src[0], src[1]);
break;
case BRW_OPCODE_NOT:
brw_NOT(p, dst, src[0]);
break;
case BRW_OPCODE_ASR:
brw_ASR(p, dst, src[0], src[1]);
break;
case BRW_OPCODE_SHR:
brw_SHR(p, dst, src[0], src[1]);
break;
case BRW_OPCODE_SHL:
brw_SHL(p, dst, src[0], src[1]);
break;
case BRW_OPCODE_CMP:
brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
break;
case BRW_OPCODE_SEL:
brw_SEL(p, dst, src[0], src[1]);
break;
case BRW_OPCODE_IF:
if (inst->src[0].file != BAD_FILE) {
/* The instruction has an embedded compare (only allowed on gen6) */
assert(intel->gen == 6);
gen6_IF(p, inst->conditional_mod, src[0], src[1]);
} else {
brw_IF(p, BRW_EXECUTE_8);
}
if_depth_in_loop[loop_stack_depth]++;
break;
case BRW_OPCODE_ELSE:
brw_ELSE(p);
break;
case BRW_OPCODE_ENDIF:
brw_ENDIF(p);
if_depth_in_loop[loop_stack_depth]--;
break;
case BRW_OPCODE_DO:
loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8);
if (loop_stack_array_size <= loop_stack_depth) {
loop_stack_array_size *= 2;
loop_stack = reralloc(this->mem_ctx, loop_stack, brw_instruction *,
loop_stack_array_size);
if_depth_in_loop = reralloc(this->mem_ctx, if_depth_in_loop, int,
loop_stack_array_size);
}
if_depth_in_loop[loop_stack_depth] = 0;
break;
case BRW_OPCODE_BREAK:
brw_BREAK(p, if_depth_in_loop[loop_stack_depth]);
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
break;
case BRW_OPCODE_CONTINUE:
/* FINISHME: We need to write the loop instruction support still. */
if (intel->gen >= 6)
gen6_CONT(p, loop_stack[loop_stack_depth - 1]);
else
brw_CONT(p, if_depth_in_loop[loop_stack_depth]);
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
break;
case BRW_OPCODE_WHILE: {
struct brw_instruction *inst0, *inst1;
GLuint br = 1;
if (intel->gen >= 5)
br = 2;
assert(loop_stack_depth > 0);
loop_stack_depth--;
inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]);
if (intel->gen < 6) {
/* patch all the BREAK/CONT instructions from last BGNLOOP */
while (inst0 > loop_stack[loop_stack_depth]) {
inst0--;
if (inst0->header.opcode == BRW_OPCODE_BREAK &&
inst0->bits3.if_else.jump_count == 0) {
inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
}
else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
inst0->bits3.if_else.jump_count == 0) {
inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
}
}
}
}
break;
default:
generate_vs_instruction(inst, dst, src);
break;
}
if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
for (unsigned int i = last_native_inst; i < p->nr_insn; i++) {
if (0) {
printf("0x%08x 0x%08x 0x%08x 0x%08x ",
((uint32_t *)&p->store[i])[3],
((uint32_t *)&p->store[i])[2],
((uint32_t *)&p->store[i])[1],
((uint32_t *)&p->store[i])[0]);
}
brw_disasm(stdout, &p->store[i], intel->gen);
}
}
last_native_inst = p->nr_insn;
}
if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
printf("\n");
}
ralloc_free(loop_stack);
ralloc_free(if_depth_in_loop);
brw_set_uip_jip(p);
/* OK, while the INTEL_DEBUG=vs above is very nice for debugging VS
* emit issues, it doesn't get the jump distances into the output,
* which is often something we want to debug. So this is here in
* case you're doing that.
*/
if (0) {
if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
for (unsigned int i = 0; i < p->nr_insn; i++) {
printf("0x%08x 0x%08x 0x%08x 0x%08x ",
((uint32_t *)&p->store[i])[3],
((uint32_t *)&p->store[i])[2],
((uint32_t *)&p->store[i])[1],
((uint32_t *)&p->store[i])[0]);
brw_disasm(stdout, &p->store[i], intel->gen);
}
}
}
}
extern "C" {
bool
brw_vs_emit(struct brw_vs_compile *c)
{
struct brw_compile *p = &c->func;
struct brw_context *brw = p->brw;
struct intel_context *intel = &brw->intel;
struct gl_context *ctx = &intel->ctx;
struct gl_shader_program *prog = ctx->Shader.CurrentVertexProgram;
if (!prog)
return false;
struct brw_shader *shader =
(brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX];
if (!shader)
return false;
if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
printf("GLSL IR for native vertex shader %d:\n", prog->Name);
_mesa_print_ir(shader->ir, NULL);
printf("\n\n");
}
vec4_visitor v(c, prog, shader);
if (!v.run()) {
/* FINISHME: Cleanly fail, test at link time, etc. */
assert(!"not reached");
return false;
}
return true;
}
} /* extern "C" */
} /* namespace brw */

View File

@@ -0,0 +1,77 @@
/*
* Copyright © 2011 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "brw_vec4.h"
#include "../glsl/ir_print_visitor.h"
using namespace brw;
namespace brw {
static void
assign(int *reg_hw_locations, reg *reg)
{
if (reg->file == GRF) {
reg->reg = reg_hw_locations[reg->reg];
}
}
void
vec4_visitor::reg_allocate_trivial()
{
int last_grf = 0;
int hw_reg_mapping[this->virtual_grf_count];
int i;
int next;
/* Note that compressed instructions require alignment to 2 registers. */
hw_reg_mapping[0] = this->first_non_payload_grf;
next = hw_reg_mapping[0] + this->virtual_grf_sizes[0];
for (i = 1; i < this->virtual_grf_count; i++) {
hw_reg_mapping[i] = next;
next += this->virtual_grf_sizes[i];
}
prog_data->total_grf = next;
foreach_iter(exec_list_iterator, iter, this->instructions) {
vec4_instruction *inst = (vec4_instruction *)iter.get();
assign(hw_reg_mapping, &inst->dst);
assign(hw_reg_mapping, &inst->src[0]);
assign(hw_reg_mapping, &inst->src[1]);
assign(hw_reg_mapping, &inst->src[2]);
}
if (last_grf >= BRW_MAX_GRF) {
fail("Ran out of regs on trivial allocator (%d/%d)\n",
last_grf, BRW_MAX_GRF);
}
}
void
vec4_visitor::reg_allocate()
{
reg_allocate_trivial();
}
} /* namespace brw */

File diff suppressed because it is too large Load Diff

View File

@@ -30,6 +30,7 @@
*/
#include "main/compiler.h"
#include "brw_context.h"
#include "brw_vs.h"
#include "brw_util.h"
@@ -50,6 +51,7 @@ static void do_vs_prog( struct brw_context *brw,
void *mem_ctx;
int aux_size;
int i;
static int new_vs = -1;
memset(&c, 0, sizeof(c));
memcpy(&c.key, key, sizeof(*key));
@@ -85,7 +87,15 @@ static void do_vs_prog( struct brw_context *brw,
/* Emit GEN4 code.
*/
brw_vs_emit(&c);
if (new_vs == -1)
new_vs = getenv("INTEL_NEW_VS") != NULL;
if (new_vs) {
if (!brw_vs_emit(&c))
brw_old_vs_emit(&c);
} else {
brw_old_vs_emit(&c);
}
/* get the program
*/

View File

@@ -92,6 +92,7 @@ struct brw_vs_compile {
GLboolean needs_stack;
};
void brw_vs_emit( struct brw_vs_compile *c );
bool brw_vs_emit(struct brw_vs_compile *c);
void brw_old_vs_emit(struct brw_vs_compile *c);
#endif

View File

@@ -1903,7 +1903,7 @@ brw_vs_rescale_gl_fixed(struct brw_vs_compile *c)
/* Emit the vertex program instructions here.
*/
void brw_vs_emit(struct brw_vs_compile *c )
void brw_old_vs_emit(struct brw_vs_compile *c )
{
#define MAX_IF_DEPTH 32
#define MAX_LOOP_DEPTH 32