r300/compiler: Use hardware flow control instructions for loops on r500.
This commit is contained in:

committed by
Marek Olšák

parent
3c3b7e02eb
commit
f381c52081
@@ -246,13 +246,14 @@ static void r300_emit_fs_code_to_buffer(
|
||||
if (r300->screen->caps.is_r500) {
|
||||
struct r500_fragment_program_code *code = &generic_code->code.r500;
|
||||
|
||||
shader->cb_code_size = 17 +
|
||||
shader->cb_code_size = 19 +
|
||||
((code->inst_end + 1) * 6) +
|
||||
imm_count * 7;
|
||||
|
||||
NEW_CB(shader->cb_code, shader->cb_code_size);
|
||||
OUT_CB_REG(R500_US_CONFIG, R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO);
|
||||
OUT_CB_REG(R500_US_PIXSIZE, code->max_temp_idx);
|
||||
OUT_CB_REG(R500_US_FC_CTRL, code->us_fc_ctrl);
|
||||
OUT_CB_REG(R500_US_CODE_RANGE,
|
||||
R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(code->inst_end));
|
||||
OUT_CB_REG(R500_US_CODE_OFFSET, 0);
|
||||
|
@@ -103,15 +103,14 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
|
||||
|
||||
debug_program_log(c, "before compilation");
|
||||
|
||||
/* XXX Ideally this should be done only for r3xx, but since
|
||||
* we don't have branching support for r5xx, we use the emulation
|
||||
* on all chipsets. */
|
||||
|
||||
rc_transform_unroll_loops(&c->Base, &loop_state);
|
||||
|
||||
debug_program_log(c, "after transform loops");
|
||||
|
||||
if (!c->Base.is_r500){
|
||||
if (c->Base.is_r500){
|
||||
r500_transform_unroll_loops(&c->Base, &loop_state);
|
||||
debug_program_log(c, "after r500 transform loops");
|
||||
}
|
||||
else{
|
||||
rc_transform_unroll_loops(&c->Base, &loop_state);
|
||||
debug_program_log(c, "after transform loops");
|
||||
|
||||
rc_emulate_branches(&c->Base);
|
||||
debug_program_log(c, "after emulate branches");
|
||||
}
|
||||
@@ -161,14 +160,10 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
|
||||
|
||||
debug_program_log(c, "after deadcode");
|
||||
|
||||
if(c->Base.is_r500){
|
||||
rc_emulate_loops(&loop_state, R500_PFS_MAX_INST);
|
||||
}
|
||||
else{
|
||||
if(!c->Base.is_r500){
|
||||
rc_emulate_loops(&loop_state, R300_PFS_MAX_ALU_INST);
|
||||
debug_program_log(c, "after emulate loops");
|
||||
}
|
||||
|
||||
debug_program_log(c, "after emulate looops");
|
||||
|
||||
rc_optimize(&c->Base);
|
||||
|
||||
|
@@ -30,6 +30,7 @@
|
||||
#include <stdio.h>
|
||||
|
||||
#include "../r300_reg.h"
|
||||
#include "radeon_emulate_loops.h"
|
||||
|
||||
/**
|
||||
* Rewrite IF instructions to use the ALU result special register.
|
||||
@@ -59,6 +60,31 @@ int r500_transform_IF(
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Rewrite loops to make them easier to emit. This is not a local
|
||||
* transformation, because it modifies and reorders an entire block of code.
|
||||
*/
|
||||
void r500_transform_unroll_loops(struct radeon_compiler * c,
|
||||
struct emulate_loop_state *s)
|
||||
{
|
||||
int i;
|
||||
|
||||
rc_transform_unroll_loops(c, s);
|
||||
|
||||
for( i = s->LoopCount - 1; i >= 0; i-- ){
|
||||
struct rc_instruction * inst_continue;
|
||||
if(!s->Loops[i].EndLoop){
|
||||
continue;
|
||||
}
|
||||
/* Insert a continue instruction at the end of the loop. This
|
||||
* is required in order to emit loops correctly. */
|
||||
inst_continue = rc_insert_new_instruction(c,
|
||||
s->Loops[i].EndIf->Prev);
|
||||
inst_continue->U.I.Opcode = RC_OPCODE_CONTINUE;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
|
||||
{
|
||||
unsigned int relevant;
|
||||
@@ -322,6 +348,11 @@ void r500FragmentProgramDump(struct rX00_fragment_program_code *c)
|
||||
case R500_INST_TYPE_FC:
|
||||
fprintf(stderr, "\t2:FC_INST 0x%08x:", code->inst[n].inst2);
|
||||
inst = code->inst[n].inst2;
|
||||
/* JUMP_FUNC JUMP_ANY*/
|
||||
fprintf(stderr, "0x%02x %1x ", inst >> 8 & 0xff,
|
||||
(inst & R500_FC_JUMP_ANY) >> 5);
|
||||
|
||||
/* OP */
|
||||
switch(inst & 0x7){
|
||||
case R500_FC_OP_JUMP:
|
||||
fprintf(stderr, "JUMP");
|
||||
@@ -348,9 +379,8 @@ void r500FragmentProgramDump(struct rX00_fragment_program_code *c)
|
||||
fprintf(stderr, "CONTINUE");
|
||||
break;
|
||||
}
|
||||
fprintf(stderr, " B_ELSE: %1x, JUMP_ANY: %1x", (inst & R500_FC_B_ELSE) >> 4,
|
||||
(inst & R500_FC_JUMP_ANY) >> 5);
|
||||
fprintf(stderr, ", A_OP: ");
|
||||
fprintf(stderr," ");
|
||||
/* A_OP */
|
||||
switch(inst & (0x3 << 6)){
|
||||
case R500_FC_A_OP_NONE:
|
||||
fprintf(stderr, "NONE");
|
||||
@@ -362,11 +392,9 @@ void r500FragmentProgramDump(struct rX00_fragment_program_code *c)
|
||||
fprintf(stderr, "PUSH");
|
||||
break;
|
||||
}
|
||||
fprintf(stderr, "\n\tJUMP_FUNC 0x%02x, B_POP_CNT: %d",
|
||||
(inst >> 8) & 0xff,
|
||||
(inst >> 16) & 0x1f);
|
||||
/* B_OP0 B_OP1 */
|
||||
for(i=0; i<2; i++){
|
||||
fprintf(stderr, ", B_OP%d: ", i);
|
||||
fprintf(stderr, " ");
|
||||
switch(inst & (0x3 << (24 + (i * 2)))){
|
||||
/* R500_FC_B_OP0_NONE
|
||||
* R500_FC_B_OP1_NONE */
|
||||
@@ -383,9 +411,17 @@ void r500FragmentProgramDump(struct rX00_fragment_program_code *c)
|
||||
break;
|
||||
}
|
||||
}
|
||||
fprintf(stderr, ", IGN_UNC: %1x\n", inst & R500_FC_IGNORE_UNCOVERED);
|
||||
/*POP_CNT B_ELSE */
|
||||
fprintf(stderr, " %d %1x", (inst >> 16) & 0x1f, (inst & R500_FC_B_ELSE) >> 4);
|
||||
inst = code->inst[n].inst3;
|
||||
fprintf(stderr, "\t3:FC_ADDR 0x%08x:", inst);
|
||||
/* JUMP_ADDR */
|
||||
fprintf(stderr, " %d", inst >> 16);
|
||||
|
||||
if(code->inst[n].inst2 & R500_FC_IGNORE_UNCOVERED){
|
||||
fprintf(stderr, " IGN_UNC");
|
||||
}
|
||||
inst = code->inst[n].inst3;
|
||||
fprintf(stderr, "\n\t3:FC_ADDR 0x%08x:", inst);
|
||||
fprintf(stderr, "BOOL: 0x%02x, INT: 0x%02x, JUMP_ADDR: %d, JMP_GLBL: %1x\n",
|
||||
inst & 0x1f, (inst >> 8) & 0x1f, (inst >> 16) & 0x1ff, inst >> 31);
|
||||
break;
|
||||
|
@@ -36,6 +36,8 @@
|
||||
#include "radeon_compiler.h"
|
||||
#include "radeon_swizzle.h"
|
||||
|
||||
struct emulate_loop_state;
|
||||
|
||||
extern void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler);
|
||||
|
||||
extern void r500FragmentProgramDump(struct rX00_fragment_program_code *c);
|
||||
@@ -47,4 +49,6 @@ extern int r500_transform_IF(
|
||||
struct rc_instruction * inst,
|
||||
void* data);
|
||||
|
||||
void r500_transform_unroll_loops(struct radeon_compiler * c,
|
||||
struct emulate_loop_state * s);
|
||||
#endif
|
||||
|
@@ -45,6 +45,8 @@
|
||||
|
||||
#include "radeon_program_pair.h"
|
||||
|
||||
#define MAX_BRANCH_DEPTH_FULL 32
|
||||
#define MAX_BRANCH_DEPTH_PARTIAL 4
|
||||
|
||||
#define PROG_CODE \
|
||||
struct r500_fragment_program_code *code = &c->code->code.r500
|
||||
@@ -61,6 +63,10 @@ struct branch_info {
|
||||
int Endif;
|
||||
};
|
||||
|
||||
struct loop_info {
|
||||
int LoopStart;
|
||||
};
|
||||
|
||||
struct emit_state {
|
||||
struct radeon_compiler * C;
|
||||
struct r500_fragment_program_code * Code;
|
||||
@@ -69,7 +75,12 @@ struct emit_state {
|
||||
unsigned int CurrentBranchDepth;
|
||||
unsigned int BranchesReserved;
|
||||
|
||||
struct loop_info * Loops;
|
||||
unsigned int CurrentLoopDepth;
|
||||
unsigned int LoopsReserved;
|
||||
|
||||
unsigned int MaxBranchDepth;
|
||||
|
||||
};
|
||||
|
||||
static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
|
||||
@@ -359,16 +370,49 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst
|
||||
|
||||
s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT;
|
||||
|
||||
if (inst->U.I.Opcode == RC_OPCODE_IF) {
|
||||
if (s->CurrentBranchDepth >= 32) {
|
||||
switch(inst->U.I.Opcode){
|
||||
struct branch_info * branch;
|
||||
struct loop_info * loop;
|
||||
case RC_OPCODE_BGNLOOP:
|
||||
memory_pool_array_reserve(&s->C->Pool, struct loop_info,
|
||||
s->Loops, s->CurrentLoopDepth, s->LoopsReserved, 1);
|
||||
|
||||
loop = &s->Loops[s->CurrentLoopDepth++];
|
||||
|
||||
/* We don't emit an instruction for BGNLOOP, so we need to
|
||||
* decrement the instruction counter, but first we need to
|
||||
* set LoopStart to the current value of inst_end, which
|
||||
* will end up being the first real instruction in the loop.*/
|
||||
loop->LoopStart = s->Code->inst_end--;
|
||||
break;
|
||||
|
||||
case RC_OPCODE_BRK:
|
||||
/* Don't emit an instruction for BRK */
|
||||
s->Code->inst_end--;
|
||||
break;
|
||||
|
||||
case RC_OPCODE_CONTINUE:
|
||||
loop = &s->Loops[s->CurrentLoopDepth - 1];
|
||||
s->Code->inst[newip].inst2 = R500_FC_OP_JUMP |
|
||||
R500_FC_JUMP_FUNC(0xff);
|
||||
s->Code->inst[newip].inst3 = R500_FC_JUMP_ADDR(loop->LoopStart);
|
||||
break;
|
||||
|
||||
case RC_OPCODE_ENDLOOP:
|
||||
/* Don't emit an instruction for ENDLOOP */
|
||||
s->Code->inst_end--;
|
||||
s->CurrentLoopDepth--;
|
||||
break;
|
||||
|
||||
case RC_OPCODE_IF:
|
||||
if ( s->CurrentBranchDepth >= MAX_BRANCH_DEPTH_FULL) {
|
||||
rc_error(s->C, "Branch depth exceeds hardware limit");
|
||||
return;
|
||||
}
|
||||
|
||||
memory_pool_array_reserve(&s->C->Pool, struct branch_info,
|
||||
s->Branches, s->CurrentBranchDepth, s->BranchesReserved, 1);
|
||||
|
||||
struct branch_info * branch = &s->Branches[s->CurrentBranchDepth++];
|
||||
branch = &s->Branches[s->CurrentBranchDepth++];
|
||||
branch->If = newip;
|
||||
branch->Else = -1;
|
||||
branch->Endif = -1;
|
||||
@@ -377,29 +421,50 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst
|
||||
s->MaxBranchDepth = s->CurrentBranchDepth;
|
||||
|
||||
/* actual instruction is filled in at ENDIF time */
|
||||
} else if (inst->U.I.Opcode == RC_OPCODE_ELSE) {
|
||||
break;
|
||||
|
||||
case RC_OPCODE_ELSE:
|
||||
if (!s->CurrentBranchDepth) {
|
||||
rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
|
||||
return;
|
||||
}
|
||||
|
||||
struct branch_info * branch = &s->Branches[s->CurrentBranchDepth - 1];
|
||||
branch = &s->Branches[s->CurrentBranchDepth - 1];
|
||||
branch->Else = newip;
|
||||
|
||||
/* actual instruction is filled in at ENDIF time */
|
||||
} else if (inst->U.I.Opcode == RC_OPCODE_ENDIF) {
|
||||
break;
|
||||
|
||||
case RC_OPCODE_ENDIF:
|
||||
if (!s->CurrentBranchDepth) {
|
||||
rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
|
||||
return;
|
||||
}
|
||||
|
||||
struct branch_info * branch = &s->Branches[s->CurrentBranchDepth - 1];
|
||||
branch->Endif = newip;
|
||||
|
||||
branch = &s->Branches[s->CurrentBranchDepth - 1];
|
||||
|
||||
if(inst->Prev->U.I.Opcode == RC_OPCODE_BRK){
|
||||
branch->Endif = --s->Code->inst_end;
|
||||
s->Code->inst[branch->Endif].inst2 |=
|
||||
R500_FC_B_OP0_DECR;
|
||||
}
|
||||
else{
|
||||
branch->Endif = newip;
|
||||
|
||||
s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP
|
||||
| R500_FC_A_OP_NONE /* no address stack */
|
||||
| R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */
|
||||
| R500_FC_B_OP0_DECR /* decrement branch counter if stay */
|
||||
| R500_FC_B_OP1_NONE /* no branch counter if stay */
|
||||
| R500_FC_B_POP_CNT(1)
|
||||
;
|
||||
s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
|
||||
}
|
||||
s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP
|
||||
| R500_FC_A_OP_NONE /* no address stack */
|
||||
| R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */
|
||||
| R500_FC_B_OP0_INCR /* increment branch counter if stay */
|
||||
| R500_FC_IGNORE_UNCOVERED
|
||||
;
|
||||
|
||||
if (branch->Else >= 0) {
|
||||
@@ -421,17 +486,10 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst
|
||||
s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
|
||||
}
|
||||
|
||||
s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP
|
||||
| R500_FC_A_OP_NONE /* no address stack */
|
||||
| R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */
|
||||
| R500_FC_B_OP0_DECR /* decrement branch counter if stay */
|
||||
| R500_FC_B_OP1_NONE /* no branch counter if stay */
|
||||
| R500_FC_B_POP_CNT(1)
|
||||
;
|
||||
s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
|
||||
|
||||
s->CurrentBranchDepth--;
|
||||
} else {
|
||||
break;
|
||||
default:
|
||||
rc_error(s->C, "%s: unknown opcode %s\n", __FUNCTION__, rc_get_opcode_info(inst->U.I.Opcode)->Name);
|
||||
}
|
||||
}
|
||||
@@ -486,6 +544,10 @@ void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compi
|
||||
code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT;
|
||||
}
|
||||
|
||||
/* Use FULL flow control mode if branches are nested deep enough.
|
||||
* We don not need to enable FULL flow control mode for loops, becasue
|
||||
* we aren't using the hardware loop instructions.
|
||||
*/
|
||||
if (s.MaxBranchDepth >= 4) {
|
||||
if (code->max_temp_idx < 1)
|
||||
code->max_temp_idx = 1;
|
||||
|
@@ -235,6 +235,10 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_f
|
||||
}
|
||||
break;
|
||||
}
|
||||
case RC_OPCODE_CONTINUE:
|
||||
case RC_OPCODE_BRK:
|
||||
case RC_OPCODE_BGNLOOP:
|
||||
break;
|
||||
case RC_OPCODE_ENDIF:
|
||||
push_branch(&s);
|
||||
break;
|
||||
|
@@ -385,6 +385,12 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
|
||||
.IsFlowControl = 1,
|
||||
.NumSrcRegs = 0,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_CONTINUE,
|
||||
.Name = "CONTINUE",
|
||||
.IsFlowControl = 1,
|
||||
.NumSrcRegs = 0
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_REPL_ALPHA,
|
||||
.Name = "REPL_ALPHA",
|
||||
|
@@ -187,6 +187,8 @@ typedef enum {
|
||||
|
||||
RC_OPCODE_ENDLOOP,
|
||||
|
||||
RC_OPCODE_CONTINUE,
|
||||
|
||||
/** special instruction, used in R300-R500 fragment program pair instructions
|
||||
* indicates that the result of the alpha operation shall be replicated
|
||||
* across all other channels */
|
||||
|
Reference in New Issue
Block a user