ac/nir: Fix store_scratch with a non-full writemask
By adding one more helper to ac_llvm_build, we can also easily keep
vector stores together.
Fixes the
tests/spec/glsl-1.30/execution/fs-large-local-array-vec4.shader_test
piglit test.
Fixes: 74470baebb
("ac/nir: Lower large indirect variables to scratch")
Reviewed-by: Marek Olšák <marek.olsak@amd.com
This commit is contained in:
@@ -626,6 +626,22 @@ ac_build_expand(struct ac_llvm_context *ctx,
|
|||||||
return ac_build_gather_values(ctx, chan, dst_channels);
|
return ac_build_gather_values(ctx, chan, dst_channels);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Extract components [start, start + channels) from a vector.
|
||||||
|
*/
|
||||||
|
LLVMValueRef
|
||||||
|
ac_extract_components(struct ac_llvm_context *ctx,
|
||||||
|
LLVMValueRef value,
|
||||||
|
unsigned start,
|
||||||
|
unsigned channels)
|
||||||
|
{
|
||||||
|
LLVMValueRef chan[channels];
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < channels; i++)
|
||||||
|
chan[i] = ac_llvm_extract_elem(ctx, value, i + start);
|
||||||
|
|
||||||
|
return ac_build_gather_values(ctx, chan, channels);
|
||||||
|
}
|
||||||
|
|
||||||
/* Expand a scalar or vector to <4 x type> by filling the remaining channels
|
/* Expand a scalar or vector to <4 x type> by filling the remaining channels
|
||||||
* with undef. Extract at most num_channels components from the input.
|
* with undef. Extract at most num_channels components from the input.
|
||||||
*/
|
*/
|
||||||
|
@@ -190,6 +190,13 @@ LLVMValueRef
|
|||||||
ac_build_gather_values(struct ac_llvm_context *ctx,
|
ac_build_gather_values(struct ac_llvm_context *ctx,
|
||||||
LLVMValueRef *values,
|
LLVMValueRef *values,
|
||||||
unsigned value_count);
|
unsigned value_count);
|
||||||
|
|
||||||
|
LLVMValueRef
|
||||||
|
ac_extract_components(struct ac_llvm_context *ctx,
|
||||||
|
LLVMValueRef value,
|
||||||
|
unsigned start,
|
||||||
|
unsigned channels);
|
||||||
|
|
||||||
LLVMValueRef ac_build_expand_to_vec4(struct ac_llvm_context *ctx,
|
LLVMValueRef ac_build_expand_to_vec4(struct ac_llvm_context *ctx,
|
||||||
LLVMValueRef value,
|
LLVMValueRef value,
|
||||||
unsigned num_channels);
|
unsigned num_channels);
|
||||||
|
@@ -3637,13 +3637,27 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
|
|||||||
offset);
|
offset);
|
||||||
LLVMTypeRef comp_type =
|
LLVMTypeRef comp_type =
|
||||||
LLVMIntTypeInContext(ctx->ac.context, instr->src[0].ssa->bit_size);
|
LLVMIntTypeInContext(ctx->ac.context, instr->src[0].ssa->bit_size);
|
||||||
LLVMTypeRef vec_type =
|
|
||||||
instr->src[0].ssa->num_components == 1 ? comp_type :
|
|
||||||
LLVMVectorType(comp_type, instr->src[0].ssa->num_components);
|
|
||||||
unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
|
unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
|
||||||
ptr = LLVMBuildBitCast(ctx->ac.builder, ptr,
|
ptr = LLVMBuildBitCast(ctx->ac.builder, ptr,
|
||||||
LLVMPointerType(vec_type, addr_space), "");
|
LLVMPointerType(comp_type, addr_space), "");
|
||||||
LLVMBuildStore(ctx->ac.builder, get_src(ctx, instr->src[0]), ptr);
|
LLVMValueRef src = get_src(ctx, instr->src[0]);
|
||||||
|
unsigned wrmask = nir_intrinsic_write_mask(instr);
|
||||||
|
while (wrmask) {
|
||||||
|
int start, count;
|
||||||
|
u_bit_scan_consecutive_range(&wrmask, &start, &count);
|
||||||
|
|
||||||
|
LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, start, false);
|
||||||
|
LLVMValueRef offset_ptr = LLVMBuildGEP(ctx->ac.builder, ptr, &offset, 1, "");
|
||||||
|
LLVMTypeRef vec_type =
|
||||||
|
count == 1 ? comp_type : LLVMVectorType(comp_type, count);
|
||||||
|
offset_ptr = LLVMBuildBitCast(ctx->ac.builder,
|
||||||
|
offset_ptr,
|
||||||
|
LLVMPointerType(vec_type, addr_space),
|
||||||
|
"");
|
||||||
|
LLVMValueRef offset_src =
|
||||||
|
ac_extract_components(&ctx->ac, src, start, count);
|
||||||
|
LLVMBuildStore(ctx->ac.builder, offset_src, offset_ptr);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
|
Reference in New Issue
Block a user