glsl_to_tgsi: allocate arrays separately v2
Instead of allocating everything as temporaries, use the new array allocation functions. v2: fix bug in simplify_cmp, declare arrays on demand Signed-off-by: Christian König <christian.koenig@amd.com>
This commit is contained in:
@@ -1796,6 +1796,7 @@ struct gl_transform_feedback_state
|
|||||||
typedef enum
|
typedef enum
|
||||||
{
|
{
|
||||||
PROGRAM_TEMPORARY, /**< machine->Temporary[] */
|
PROGRAM_TEMPORARY, /**< machine->Temporary[] */
|
||||||
|
PROGRAM_ARRAY, /**< Arrays & Matrixes */
|
||||||
PROGRAM_INPUT, /**< machine->Inputs[] */
|
PROGRAM_INPUT, /**< machine->Inputs[] */
|
||||||
PROGRAM_OUTPUT, /**< machine->Outputs[] */
|
PROGRAM_OUTPUT, /**< machine->Outputs[] */
|
||||||
PROGRAM_LOCAL_PARAM, /**< gl_program->LocalParams[] */
|
PROGRAM_LOCAL_PARAM, /**< gl_program->LocalParams[] */
|
||||||
|
@@ -85,6 +85,11 @@ extern "C" {
|
|||||||
*/
|
*/
|
||||||
#define MAX_TEMPS 4096
|
#define MAX_TEMPS 4096
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Maximum number of arrays
|
||||||
|
*/
|
||||||
|
#define MAX_ARRAYS 256
|
||||||
|
|
||||||
/* will be 4 for GLSL 4.00 */
|
/* will be 4 for GLSL 4.00 */
|
||||||
#define MAX_GLSL_TEXTURE_OFFSET 1
|
#define MAX_GLSL_TEXTURE_OFFSET 1
|
||||||
|
|
||||||
@@ -315,6 +320,9 @@ public:
|
|||||||
|
|
||||||
int next_temp;
|
int next_temp;
|
||||||
|
|
||||||
|
unsigned array_sizes[MAX_ARRAYS];
|
||||||
|
unsigned next_array;
|
||||||
|
|
||||||
int num_address_regs;
|
int num_address_regs;
|
||||||
int samplers_used;
|
int samplers_used;
|
||||||
bool indirect_addr_temps;
|
bool indirect_addr_temps;
|
||||||
@@ -550,6 +558,7 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
|
|||||||
if (dst.reladdr) {
|
if (dst.reladdr) {
|
||||||
switch(dst.file) {
|
switch(dst.file) {
|
||||||
case PROGRAM_TEMPORARY:
|
case PROGRAM_TEMPORARY:
|
||||||
|
case PROGRAM_ARRAY:
|
||||||
this->indirect_addr_temps = true;
|
this->indirect_addr_temps = true;
|
||||||
break;
|
break;
|
||||||
case PROGRAM_LOCAL_PARAM:
|
case PROGRAM_LOCAL_PARAM:
|
||||||
@@ -571,6 +580,7 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
|
|||||||
if(inst->src[i].reladdr) {
|
if(inst->src[i].reladdr) {
|
||||||
switch(inst->src[i].file) {
|
switch(inst->src[i].file) {
|
||||||
case PROGRAM_TEMPORARY:
|
case PROGRAM_TEMPORARY:
|
||||||
|
case PROGRAM_ARRAY:
|
||||||
this->indirect_addr_temps = true;
|
this->indirect_addr_temps = true;
|
||||||
break;
|
break;
|
||||||
case PROGRAM_LOCAL_PARAM:
|
case PROGRAM_LOCAL_PARAM:
|
||||||
@@ -1005,17 +1015,26 @@ glsl_to_tgsi_visitor::get_temp(const glsl_type *type)
|
|||||||
st_src_reg src;
|
st_src_reg src;
|
||||||
|
|
||||||
src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT;
|
src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT;
|
||||||
src.file = PROGRAM_TEMPORARY;
|
|
||||||
src.index = next_temp;
|
|
||||||
src.reladdr = NULL;
|
src.reladdr = NULL;
|
||||||
next_temp += type_size(type);
|
src.negate = 0;
|
||||||
|
|
||||||
|
if (type->is_array() || type->is_matrix()) {
|
||||||
|
src.file = PROGRAM_ARRAY;
|
||||||
|
src.index = next_array << 16 | 0x8000;
|
||||||
|
array_sizes[next_array] = type_size(type);
|
||||||
|
++next_array;
|
||||||
|
|
||||||
|
} else {
|
||||||
|
src.file = PROGRAM_TEMPORARY;
|
||||||
|
src.index = next_temp;
|
||||||
|
next_temp += type_size(type);
|
||||||
|
}
|
||||||
|
|
||||||
if (type->is_array() || type->is_record()) {
|
if (type->is_array() || type->is_record()) {
|
||||||
src.swizzle = SWIZZLE_NOOP;
|
src.swizzle = SWIZZLE_NOOP;
|
||||||
} else {
|
} else {
|
||||||
src.swizzle = swizzle_for_size(type->vector_elements);
|
src.swizzle = swizzle_for_size(type->vector_elements);
|
||||||
}
|
}
|
||||||
src.negate = 0;
|
|
||||||
|
|
||||||
return src;
|
return src;
|
||||||
}
|
}
|
||||||
@@ -2975,6 +2994,7 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
|
|||||||
{
|
{
|
||||||
result.file = PROGRAM_UNDEFINED;
|
result.file = PROGRAM_UNDEFINED;
|
||||||
next_temp = 1;
|
next_temp = 1;
|
||||||
|
next_array = 0;
|
||||||
next_signature_id = 1;
|
next_signature_id = 1;
|
||||||
num_immediates = 0;
|
num_immediates = 0;
|
||||||
current_function = NULL;
|
current_function = NULL;
|
||||||
@@ -3180,7 +3200,8 @@ glsl_to_tgsi_visitor::simplify_cmp(void)
|
|||||||
assert(inst->dst.index < MAX_TEMPS);
|
assert(inst->dst.index < MAX_TEMPS);
|
||||||
prevWriteMask = tempWrites[inst->dst.index];
|
prevWriteMask = tempWrites[inst->dst.index];
|
||||||
tempWrites[inst->dst.index] |= inst->dst.writemask;
|
tempWrites[inst->dst.index] |= inst->dst.writemask;
|
||||||
}
|
} else
|
||||||
|
break;
|
||||||
|
|
||||||
/* For a CMP to be considered a conditional write, the destination
|
/* For a CMP to be considered a conditional write, the destination
|
||||||
* register and source register two must be the same. */
|
* register and source register two must be the same. */
|
||||||
@@ -4011,6 +4032,7 @@ struct st_translate {
|
|||||||
struct ureg_program *ureg;
|
struct ureg_program *ureg;
|
||||||
|
|
||||||
struct ureg_dst temps[MAX_TEMPS];
|
struct ureg_dst temps[MAX_TEMPS];
|
||||||
|
struct ureg_dst arrays[MAX_ARRAYS];
|
||||||
struct ureg_src *constants;
|
struct ureg_src *constants;
|
||||||
struct ureg_src *immediates;
|
struct ureg_src *immediates;
|
||||||
struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
|
struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
|
||||||
@@ -4019,6 +4041,8 @@ struct st_translate {
|
|||||||
struct ureg_src samplers[PIPE_MAX_SAMPLERS];
|
struct ureg_src samplers[PIPE_MAX_SAMPLERS];
|
||||||
struct ureg_src systemValues[SYSTEM_VALUE_MAX];
|
struct ureg_src systemValues[SYSTEM_VALUE_MAX];
|
||||||
|
|
||||||
|
unsigned array_sizes[MAX_ARRAYS];
|
||||||
|
|
||||||
const GLuint *inputMapping;
|
const GLuint *inputMapping;
|
||||||
const GLuint *outputMapping;
|
const GLuint *outputMapping;
|
||||||
|
|
||||||
@@ -4129,16 +4153,34 @@ dst_register(struct st_translate *t,
|
|||||||
gl_register_file file,
|
gl_register_file file,
|
||||||
GLuint index)
|
GLuint index)
|
||||||
{
|
{
|
||||||
|
unsigned array;
|
||||||
|
|
||||||
switch(file) {
|
switch(file) {
|
||||||
case PROGRAM_UNDEFINED:
|
case PROGRAM_UNDEFINED:
|
||||||
return ureg_dst_undef();
|
return ureg_dst_undef();
|
||||||
|
|
||||||
case PROGRAM_TEMPORARY:
|
case PROGRAM_TEMPORARY:
|
||||||
|
assert(index >= 0);
|
||||||
|
assert(index < (int) Elements(t->temps));
|
||||||
|
|
||||||
if (ureg_dst_is_undef(t->temps[index]))
|
if (ureg_dst_is_undef(t->temps[index]))
|
||||||
t->temps[index] = ureg_DECL_local_temporary(t->ureg);
|
t->temps[index] = ureg_DECL_local_temporary(t->ureg);
|
||||||
|
|
||||||
return t->temps[index];
|
return t->temps[index];
|
||||||
|
|
||||||
|
case PROGRAM_ARRAY:
|
||||||
|
array = index >> 16;
|
||||||
|
|
||||||
|
assert(array >= 0);
|
||||||
|
assert(array < (int) Elements(t->arrays));
|
||||||
|
|
||||||
|
if (ureg_dst_is_undef(t->arrays[array]))
|
||||||
|
t->arrays[array] = ureg_DECL_array_temporary(
|
||||||
|
t->ureg, t->array_sizes[array], TRUE);
|
||||||
|
|
||||||
|
return ureg_dst_array_offset(t->arrays[array],
|
||||||
|
(int)(index & 0xFFFF) - 0x8000);
|
||||||
|
|
||||||
case PROGRAM_OUTPUT:
|
case PROGRAM_OUTPUT:
|
||||||
if (t->procType == TGSI_PROCESSOR_VERTEX)
|
if (t->procType == TGSI_PROCESSOR_VERTEX)
|
||||||
assert(index < VARYING_SLOT_MAX);
|
assert(index < VARYING_SLOT_MAX);
|
||||||
@@ -4173,11 +4215,8 @@ src_register(struct st_translate *t,
|
|||||||
return ureg_src_undef();
|
return ureg_src_undef();
|
||||||
|
|
||||||
case PROGRAM_TEMPORARY:
|
case PROGRAM_TEMPORARY:
|
||||||
assert(index >= 0);
|
case PROGRAM_ARRAY:
|
||||||
assert(index < (int) Elements(t->temps));
|
return ureg_src(dst_register(t, file, index));
|
||||||
if (ureg_dst_is_undef(t->temps[index]))
|
|
||||||
t->temps[index] = ureg_DECL_local_temporary(t->ureg);
|
|
||||||
return ureg_src(t->temps[index]);
|
|
||||||
|
|
||||||
case PROGRAM_ENV_PARAM:
|
case PROGRAM_ENV_PARAM:
|
||||||
case PROGRAM_LOCAL_PARAM:
|
case PROGRAM_LOCAL_PARAM:
|
||||||
@@ -4259,8 +4298,10 @@ translate_dst(struct st_translate *t,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dst_reg->reladdr != NULL)
|
if (dst_reg->reladdr != NULL) {
|
||||||
|
assert(dst_reg->file != PROGRAM_TEMPORARY);
|
||||||
dst = ureg_dst_indirect(dst, ureg_src(t->address[0]));
|
dst = ureg_dst_indirect(dst, ureg_src(t->address[0]));
|
||||||
|
}
|
||||||
|
|
||||||
return dst;
|
return dst;
|
||||||
}
|
}
|
||||||
@@ -4283,26 +4324,8 @@ translate_src(struct st_translate *t, const st_src_reg *src_reg)
|
|||||||
src = ureg_negate(src);
|
src = ureg_negate(src);
|
||||||
|
|
||||||
if (src_reg->reladdr != NULL) {
|
if (src_reg->reladdr != NULL) {
|
||||||
/* Normally ureg_src_indirect() would be used here, but a stupid compiler
|
assert(src_reg->file != PROGRAM_TEMPORARY);
|
||||||
* bug in g++ makes ureg_src_indirect (an inline C function) erroneously
|
src = ureg_src_indirect(src, ureg_src(t->address[0]));
|
||||||
* set the bit for src.Negate. So we have to do the operation manually
|
|
||||||
* here to work around the compiler's problems. */
|
|
||||||
/*src = ureg_src_indirect(src, ureg_src(t->address[0]));*/
|
|
||||||
struct ureg_src addr = ureg_src(t->address[0]);
|
|
||||||
src.Indirect = 1;
|
|
||||||
src.IndirectFile = addr.File;
|
|
||||||
src.IndirectIndex = addr.Index;
|
|
||||||
src.IndirectSwizzle = addr.SwizzleX;
|
|
||||||
|
|
||||||
if (src_reg->file != PROGRAM_INPUT &&
|
|
||||||
src_reg->file != PROGRAM_OUTPUT) {
|
|
||||||
/* If src_reg->index was negative, it was set to zero in
|
|
||||||
* src_register(). Reassign it now. But don't do this
|
|
||||||
* for input/output regs since they get remapped while
|
|
||||||
* const buffers don't.
|
|
||||||
*/
|
|
||||||
src.Index = src_reg->index;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return src;
|
return src;
|
||||||
@@ -4828,6 +4851,10 @@ st_translate_program(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Copy over array sizes
|
||||||
|
*/
|
||||||
|
memcpy(t->array_sizes, program->array_sizes, sizeof(unsigned) * program->next_array);
|
||||||
|
|
||||||
/* Emit constants and uniforms. TGSI uses a single index space for these,
|
/* Emit constants and uniforms. TGSI uses a single index space for these,
|
||||||
* so we put all the translated regs in t->constants.
|
* so we put all the translated regs in t->constants.
|
||||||
*/
|
*/
|
||||||
|
Reference in New Issue
Block a user