optimize 965 clip

1. increase clip thread number to 2
  2. do cliptest for -rhw
This commit is contained in:
Zou Nan hai
2007-08-31 13:42:20 +08:00
parent 22bc3cdd49
commit b47c9f8c91
4 changed files with 112 additions and 14 deletions

View File

@@ -147,6 +147,13 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
brw_clip_init_planes(c);
brw_clip_init_clipmask(c);
/* -ve rhw workaround */
brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2),
brw_imm_ud(1<<20));
brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(0x3f));
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
plane_loop = brw_DO(p, BRW_EXECUTE_1);
{
/* if (planemask & 1)

View File

@@ -55,7 +55,7 @@ static void upload_clip_unit( struct brw_context *brw )
/* BRW_NEW_URB_FENCE */
clip.thread4.nr_urb_entries = brw->urb.nr_clip_entries;
clip.thread4.urb_entry_allocation_size = brw->urb.vsize - 1;
clip.thread4.max_threads = 0; /* Hmm, maybe the max is 1 or 2 threads */
clip.thread4.max_threads = 1; /* 2 threads */
if (INTEL_DEBUG & DEBUG_STATS)
clip.thread4.stats_enable = 1;

View File

@@ -42,6 +42,20 @@
#include "brw_util.h"
#include "brw_clip.h"
static struct brw_reg get_tmp( struct brw_clip_compile *c )
{
struct brw_reg tmp = brw_vec4_grf(c->last_tmp, 0);
if (++c->last_tmp > c->prog_data.total_grf)
c->prog_data.total_grf = c->last_tmp;
return tmp;
}
static void release_tmps( struct brw_clip_compile *c )
{
c->last_tmp = c->first_tmp;
}
void brw_clip_tri_alloc_regs( struct brw_clip_compile *c,
@@ -435,15 +449,103 @@ static void maybe_do_clip_tri( struct brw_clip_compile *c )
brw_ENDIF(p, do_clip);
}
static void brw_clip_test( struct brw_clip_compile *c )
{
struct brw_reg t = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
struct brw_reg t1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
struct brw_reg t2 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
struct brw_reg t3 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
struct brw_reg v0 = get_tmp(c);
struct brw_reg v1 = get_tmp(c);
struct brw_reg v2 = get_tmp(c);
struct brw_indirect vt0 = brw_indirect(0, 0);
struct brw_indirect vt1 = brw_indirect(1, 0);
struct brw_indirect vt2 = brw_indirect(2, 0);
struct brw_compile *p = &c->func;
brw_MOV(p, get_addr_reg(vt0), brw_address(c->reg.vertex[0]));
brw_MOV(p, get_addr_reg(vt1), brw_address(c->reg.vertex[1]));
brw_MOV(p, get_addr_reg(vt2), brw_address(c->reg.vertex[2]));
brw_MOV(p, v0, deref_4f(vt0, c->offset[VERT_RESULT_HPOS]));
brw_MOV(p, v1, deref_4f(vt1, c->offset[VERT_RESULT_HPOS]));
brw_MOV(p, v2, deref_4f(vt2, c->offset[VERT_RESULT_HPOS]));
brw_AND(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(~0x3f));
/* test nearz, xmin, ymin plane */
brw_CMP(p, t1, BRW_CONDITIONAL_LE, negate(v0), get_element(v0, 3));
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
brw_CMP(p, t2, BRW_CONDITIONAL_LE, negate(v1), get_element(v1, 3));
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
brw_CMP(p, t3, BRW_CONDITIONAL_LE, negate(v2), get_element(v2, 3));
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
brw_XOR(p, t, t1, t2);
brw_XOR(p, t1, t2, t3);
brw_OR(p, t, t, t1);
brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
get_element(t, 0), brw_imm_ud(0));
brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<5)));
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
get_element(t, 1), brw_imm_ud(0));
brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<3)));
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
get_element(t, 2), brw_imm_ud(0));
brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<1)));
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
/* test farz, xmax, ymax plane */
brw_CMP(p, t1, BRW_CONDITIONAL_L, v0, get_element(v0, 3));
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
brw_CMP(p, t2, BRW_CONDITIONAL_L, v1, get_element(v1, 3));
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
brw_CMP(p, t3, BRW_CONDITIONAL_L, v2, get_element(v2, 3));
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
brw_XOR(p, t, t1, t2);
brw_XOR(p, t1, t2, t3);
brw_OR(p, t, t, t1);
brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
get_element(t, 0), brw_imm_ud(0));
brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<4)));
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
get_element(t, 1), brw_imm_ud(0));
brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<2)));
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
get_element(t, 2), brw_imm_ud(0));
brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<0)));
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
release_tmps(c);
}
void brw_emit_tri_clip( struct brw_clip_compile *c )
{
struct brw_instruction *neg_rhw;
struct brw_compile *p = &c->func;
brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6);
brw_clip_tri_init_vertices(c);
brw_clip_init_clipmask(c);
/* if -ve rhw workaround bit is set,
do cliptest */
brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2),
brw_imm_ud(1<<20));
neg_rhw = brw_IF(p, BRW_EXECUTE_1);
{
brw_clip_test(c);
}
brw_ENDIF(p, neg_rhw);
/* Can't push into do_clip_tri because with polygon (or quad)
* flatshading, need to apply the flatshade here because we don't
* respect the PV when converting to trifan for emit:
@@ -462,6 +564,3 @@ void brw_emit_tri_clip( struct brw_clip_compile *c )
*/
brw_clip_kill_thread(c);
}

View File

@@ -272,6 +272,7 @@ void brw_clip_kill_thread(struct brw_clip_compile *c)
struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c )
{
return brw_address(c->reg.fixed_planes);
@@ -327,8 +328,7 @@ void brw_clip_init_clipmask( struct brw_clip_compile *c )
/* Shift so that lowest outcode bit is rightmost:
*/
brw_MOV(p, c->reg.planemask, incoming);
brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(26));
brw_SHR(p, c->reg.planemask, incoming, brw_imm_ud(26));
if (c->key.nr_userclip) {
struct brw_reg tmp = retype(vec1(get_tmp(c)), BRW_REGISTER_TYPE_UD);
@@ -342,13 +342,5 @@ void brw_clip_init_clipmask( struct brw_clip_compile *c )
release_tmp(c, tmp);
}
/* Test for -ve rhw workaround
*/
brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
brw_AND(p, vec1(brw_null_reg()), incoming, brw_imm_ud(1<<20));
brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(0x3f));
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
}