diff --git a/src/gallium/frontends/nine/nine_shader.c b/src/gallium/frontends/nine/nine_shader.c index 7d93513ed95..9720a40d7f5 100644 --- a/src/gallium/frontends/nine/nine_shader.c +++ b/src/gallium/frontends/nine/nine_shader.c @@ -822,6 +822,26 @@ tx_addr_alloc(struct shader_translator *tx, INT idx) tx->regs.a0 = ureg_DECL_temporary(tx->ureg); } +static inline bool +TEX_if_fetch4(struct shader_translator *tx, struct ureg_dst dst, + unsigned target, struct ureg_src src0, + struct ureg_src src1, INT idx) +{ + struct ureg_dst tmp; + struct ureg_src src_tg4[3] = {src0, ureg_imm1f(tx->ureg, 0.f), src1}; + + if (!(tx->info->fetch4 & (1 << idx))) + return false; + + /* TODO: needs more tests, but this feature is not much used at all */ + + tmp = tx_scratch(tx); + ureg_tex_insn(tx->ureg, TGSI_OPCODE_TG4, &tmp, 1, target, TGSI_RETURN_TYPE_FLOAT, + NULL, 0, src_tg4, 3); + ureg_MOV(tx->ureg, dst, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Z, X, Y, W))); + return true; +} + /* NOTE: It's not very clear on which ps1.1-ps1.3 instructions * the projection should be applied on the texture. It doesn't * apply on texkill. @@ -2933,6 +2953,9 @@ DECL_SPECIAL(TEXLD) tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets)); target = tx->sampler_targets[tx->insn.src[1].idx]; + if (TEX_if_fetch4(tx, dst, target, src[0], src[1], tx->insn.src[1].idx)) + return D3D_OK; + switch (tx->insn.flags) { case 0: ureg_TEX(ureg, dst, target, src[0], src[1]); @@ -2997,6 +3020,9 @@ DECL_SPECIAL(TEXLDD) tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets)); target = tx->sampler_targets[tx->insn.src[1].idx]; + if (TEX_if_fetch4(tx, dst, target, src[0], src[1], tx->insn.src[1].idx)) + return D3D_OK; + ureg_TXD(tx->ureg, dst, target, src[0], src[2], src[3], src[1]); return D3D_OK; } @@ -3013,6 +3039,9 @@ DECL_SPECIAL(TEXLDL) tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets)); target = tx->sampler_targets[tx->insn.src[1].idx]; + if (TEX_if_fetch4(tx, dst, target, src[0], src[1], tx->insn.src[1].idx)) + return D3D_OK; + ureg_TXL(tx->ureg, dst, target, src[0], src[1]); return D3D_OK; } diff --git a/src/gallium/frontends/nine/nine_shader.h b/src/gallium/frontends/nine/nine_shader.h index 5abdbe24472..4a0804b6f35 100644 --- a/src/gallium/frontends/nine/nine_shader.h +++ b/src/gallium/frontends/nine/nine_shader.h @@ -71,6 +71,7 @@ struct nine_shader_info uint8_t fog_mode; uint8_t force_color_in_centroid; uint8_t projected; /* ps 1.1 to 1.3 */ + uint16_t fetch4; unsigned const_i_base; /* in vec4 (16 byte) units */ unsigned const_b_base; /* in vec4 (16 byte) units */ diff --git a/src/gallium/frontends/nine/nine_state.c b/src/gallium/frontends/nine/nine_state.c index 1730eb9b23f..d6a13622146 100644 --- a/src/gallium/frontends/nine/nine_state.c +++ b/src/gallium/frontends/nine/nine_state.c @@ -1352,6 +1352,8 @@ NineDevice9_ResolveZ( struct NineDevice9 *device ) #define ALPHA_TO_COVERAGE_ENABLE MAKEFOURCC('A', '2', 'M', '1') #define ALPHA_TO_COVERAGE_DISABLE MAKEFOURCC('A', '2', 'M', '0') +#define FETCH4_ENABLE MAKEFOURCC('G', 'E', 'T', '4') +#define FETCH4_DISABLE MAKEFOURCC('G', 'E', 'T', '1') /* Nine_context functions. * Serialized through CSMT macros. @@ -1499,6 +1501,18 @@ CSMT_ITEM_NO_WAIT(nine_context_set_sampler_state, { struct nine_context *context = &device->context; + if (unlikely(Type == D3DSAMP_MIPMAPLODBIAS)) { + if (Value == FETCH4_ENABLE || + Value == FETCH4_DISABLE) { + context->rs[NINED3DRS_FETCH4] &= ~(1 << Sampler); + context->rs[NINED3DRS_FETCH4] |= (Value == FETCH4_ENABLE) << Sampler; + context->changed.group |= NINE_STATE_PS_PARAMS_MISC; + if (Value == FETCH4_ENABLE) + WARN_ONCE("FETCH4 support is incomplete. Please report if buggy shadows."); + return; + } + } + if (unlikely(!nine_check_sampler_state_value(Type, Value))) return; @@ -2739,7 +2753,8 @@ static const DWORD nine_render_state_defaults[NINED3DRS_LAST + 1] = [NINED3DRS_VSPOINTSIZE] = FALSE, [NINED3DRS_RTMASK] = 0xf, [NINED3DRS_ALPHACOVERAGE] = FALSE, - [NINED3DRS_MULTISAMPLE] = FALSE + [NINED3DRS_MULTISAMPLE] = FALSE, + [NINED3DRS_FETCH4] = 0 }; static const DWORD nine_tex_stage_state_defaults[NINED3DTSS_LAST + 1] = { diff --git a/src/gallium/frontends/nine/nine_state.h b/src/gallium/frontends/nine/nine_state.h index 7483db61469..d0cf121867b 100644 --- a/src/gallium/frontends/nine/nine_state.h +++ b/src/gallium/frontends/nine/nine_state.h @@ -40,10 +40,11 @@ */ #define NINED3DRS_ALPHACOVERAGE (D3DRS_BLENDOPALPHA + 3) #define NINED3DRS_MULTISAMPLE (D3DRS_BLENDOPALPHA + 4) +#define NINED3DRS_FETCH4 (D3DRS_BLENDOPALPHA + 5) #define D3DRS_LAST D3DRS_BLENDOPALPHA #define D3DSAMP_LAST D3DSAMP_DMAPOFFSET -#define NINED3DRS_LAST NINED3DRS_MULTISAMPLE /* 214 */ +#define NINED3DRS_LAST NINED3DRS_FETCH4 /* 215 */ #define NINED3DSAMP_LAST NINED3DSAMP_CUBETEX /* 16 */ #define NINED3DTSS_LAST D3DTSS_CONSTANT #define NINED3DTS_LAST D3DTS_WORLDMATRIX(255) diff --git a/src/gallium/frontends/nine/pixelshader9.c b/src/gallium/frontends/nine/pixelshader9.c index 4b85c738f79..4b2b2b72e1e 100644 --- a/src/gallium/frontends/nine/pixelshader9.c +++ b/src/gallium/frontends/nine/pixelshader9.c @@ -57,6 +57,7 @@ NinePixelShader9_ctor( struct NinePixelShader9 *This, info.const_i_base = NINE_CONST_I_BASE(device->max_ps_const_f) / 16; info.const_b_base = NINE_CONST_B_BASE(device->max_ps_const_f) / 16; info.sampler_mask_shadow = 0x0; + info.fetch4 = 0x0; info.sampler_ps1xtypes = 0x0; info.fog_enable = 0; info.projected = 0; @@ -208,6 +209,7 @@ NinePixelShader9_GetVariant( struct NinePixelShader9 *This, nine_shader_constant_combination_get(This->c_combinations, (key >> 24) & 0xff); info.add_constants_defs.int_const_added = &This->int_slots_used; info.add_constants_defs.bool_const_added = &This->bool_slots_used; + info.fetch4 = key >> 32 ; info.process_vertices = false; info.swvp_on = false; diff --git a/src/gallium/frontends/nine/pixelshader9.h b/src/gallium/frontends/nine/pixelshader9.h index 6bac90be7a9..1cf5a553856 100644 --- a/src/gallium/frontends/nine/pixelshader9.h +++ b/src/gallium/frontends/nine/pixelshader9.h @@ -76,13 +76,16 @@ NinePixelShader9_UpdateKey( struct NinePixelShader9 *ps, struct nine_context *context ) { uint16_t samplers_shadow; + uint16_t samplers_fetch4; uint16_t samplers_ps1_types; uint8_t projected; uint64_t key; BOOL res; samplers_shadow = (uint16_t)((context->samplers_shadow & NINE_PS_SAMPLERS_MASK) >> NINE_SAMPLER_PS(0)); + samplers_fetch4 = (uint16_t)((context->samplers_fetch4 & NINE_PS_SAMPLERS_MASK) >> NINE_SAMPLER_PS(0)); key = samplers_shadow & ps->sampler_mask; + samplers_fetch4 &= ps->sampler_mask; if (unlikely(ps->byte_code.version < 0x20)) { /* variable targets */ @@ -124,6 +127,7 @@ NinePixelShader9_UpdateKey( struct NinePixelShader9 *ps, (void *)context->ps_const_i, context->ps_const_b)) << 24; + key |= ((uint64_t)(context->rs[NINED3DRS_FETCH4] & samplers_fetch4)) << 32; res = ps->last_key != key; if (res) ps->next_key = key; diff --git a/src/gallium/frontends/nine/vertexshader9.c b/src/gallium/frontends/nine/vertexshader9.c index 600e298a393..d88cfd51dab 100644 --- a/src/gallium/frontends/nine/vertexshader9.c +++ b/src/gallium/frontends/nine/vertexshader9.c @@ -62,6 +62,7 @@ NineVertexShader9_ctor( struct NineVertexShader9 *This, info.const_i_base = NINE_CONST_I_BASE(device->max_vs_const_f) / 16; info.const_b_base = NINE_CONST_B_BASE(device->max_vs_const_f) / 16; info.sampler_mask_shadow = 0x0; + info.fetch4 = 0x0; info.sampler_ps1xtypes = 0x0; info.fog_enable = 0; info.point_size_min = 0; @@ -214,6 +215,7 @@ NineVertexShader9_GetVariant( struct NineVertexShader9 *This, info.const_b_base = NINE_CONST_B_BASE(device->max_vs_const_f) / 16; info.byte_code = This->byte_code.tokens; info.sampler_mask_shadow = key & 0xf; + info.fetch4 = 0x0; info.fog_enable = device->context.rs[D3DRS_FOGENABLE]; info.point_size_min = asfloat(device->context.rs[D3DRS_POINTSIZE_MIN]); info.point_size_max = asfloat(device->context.rs[D3DRS_POINTSIZE_MAX]); @@ -260,6 +262,7 @@ NineVertexShader9_GetVariantProcessVertices( struct NineVertexShader9 *This, info.const_b_base = 0; info.byte_code = This->byte_code.tokens; info.sampler_mask_shadow = 0; + info.fetch4 = 0x0; info.fog_enable = false; info.point_size_min = 0; info.point_size_max = 0;