diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir.c b/src/gallium/auxiliary/gallivm/lp_bld_nir.c index 339f955a3e8..8d71fb304a4 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_nir.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_nir.c @@ -325,7 +325,7 @@ icmp32(struct lp_build_nir_context *bld_base, /** * Get a source register value for an ALU instruction. - * This is where swizzled are handled. There should be no negation + * This is where swizzles are handled. There should be no negation * or absolute value modifiers. * num_components indicates the number of components needed in the * returned array or vector. @@ -335,25 +335,63 @@ get_alu_src(struct lp_build_nir_context *bld_base, nir_alu_src src, unsigned num_components) { + assert(!src.negate); + assert(!src.abs); + assert(num_components >= 1); + assert(num_components <= 4); + struct gallivm_state *gallivm = bld_base->base.gallivm; LLVMBuilderRef builder = gallivm->builder; + const unsigned src_components = nir_src_num_components(src.src); + assert(src_components > 0); LLVMValueRef value = get_src(bld_base, src.src); - bool need_swizzle = false; - assert(value); - if (is_aos(bld_base)) - return value; - - unsigned src_components = nir_src_num_components(src.src); - for (unsigned i = 0; i < num_components; ++i) { - assert(src.swizzle[i] < src_components); - if (src.swizzle[i] != i) + /* check if swizzling needed for the src vector */ + bool need_swizzle = false; + for (unsigned i = 0; i < src_components; ++i) { + if (src.swizzle[i] != i) { need_swizzle = true; + break; + } + } + + if (is_aos(bld_base) && !need_swizzle) { + return value; } if (need_swizzle || num_components != src_components) { - if (src_components > 1 && num_components == 1) { + if (is_aos(bld_base) && need_swizzle) { + // Handle swizzle for AOS + assert(LLVMGetTypeKind(LLVMTypeOf(value)) == LLVMVectorTypeKind); + + // swizzle vector of ((r,g,b,a), (r,g,b,a), (r,g,b,a), (r,g,b,a)) + assert(bld_base->base.type.width == 8); + assert(bld_base->base.type.length == 16); + + // Do our own swizzle here since lp_build_swizzle_aos_n() does + // not do what we want. + // Ex: value = {r0,g0,b0,a0, r1,g1,b1,a1, r2,g2,b2,a2, r3,g3,b3,a3}. + // aos swizzle = {2,1,0,3} // swap red/blue + // shuffles = {2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15} + // result = {b0,g0,r0,a0, b1,g1,r1,a1, b2,g2,r2,a2, b3,g3,r3,a3}. + LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH]; + for (unsigned i = 0; i < 16; i++) { + unsigned chan = i % 4; + /* apply src register swizzle */ + if (chan < num_components) { + chan = src.swizzle[chan]; + } else { + chan = src.swizzle[0]; + } + /* apply aos swizzle */ + chan = lp_nir_aos_swizzle(bld_base, chan); + shuffles[i] = lp_build_const_int32(gallivm, (i & ~3) + chan); + } + value = LLVMBuildShuffleVector(builder, value, + LLVMGetUndef(LLVMTypeOf(value)), + LLVMConstVector(shuffles, 16), ""); + } else if (src_components > 1 && num_components == 1) { value = LLVMBuildExtractValue(gallivm->builder, value, src.swizzle[0], ""); } else if (src_components == 1 && num_components > 1) { @@ -369,8 +407,7 @@ get_alu_src(struct lp_build_nir_context *bld_base, value = arr; } } - assert(!src.negate); - assert(!src.abs); + return value; } @@ -1262,14 +1299,6 @@ visit_alu(struct lp_build_nir_context *bld_base, result[0], temp_chan); } } else if (is_aos(bld_base)) { - if (instr->op == nir_op_fmul) { - if (LLVMIsConstant(src[0])) { - src[0] = lp_nir_aos_conv_const(gallivm, src[0], 1); - } - if (LLVMIsConstant(src[1])) { - src[1] = lp_nir_aos_conv_const(gallivm, src[1], 1); - } - } result[0] = do_alu_action(bld_base, instr, src_bit_size, src); } else { /* Loop for R,G,B,A channels */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir.h b/src/gallium/auxiliary/gallivm/lp_bld_nir.h index d02da2b7db5..922208d8603 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_nir.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_nir.h @@ -358,9 +358,8 @@ get_int_bld(struct lp_build_nir_context *bld_base, } -LLVMValueRef -lp_nir_aos_conv_const(struct gallivm_state *gallivm, - LLVMValueRef constval, int nc); +unsigned +lp_nir_aos_swizzle(struct lp_build_nir_context *bld_base, unsigned chan); #endif diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_nir_aos.c index e4a332ae497..36a2304a04d 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_nir_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_nir_aos.c @@ -90,36 +90,6 @@ swizzle_aos(struct lp_build_nir_context *bld_base, } -LLVMValueRef -lp_nir_aos_conv_const(struct gallivm_state *gallivm, - LLVMValueRef constval, int nc) -{ - LLVMValueRef elems[16]; - uint8_t val = 0; - /* convert from 1..4 x f32 to 16 x unorm8 */ - for (unsigned i = 0; i < nc; i++) { - LLVMValueRef value = - LLVMBuildExtractElement(gallivm->builder, constval, - lp_build_const_int32(gallivm, i), ""); - assert(LLVMIsConstant(value)); - unsigned uval = LLVMConstIntGetZExtValue(value); - float f = uif(uval); - val = float_to_ubyte(f); - for (unsigned j = 0; j < 4; j++) { - elems[j * 4 + i] = - LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), val, 0); - } - } - for (unsigned i = nc; i < 4; i++) { - for (unsigned j = 0; j < 4; j++) { - elems[j * 4 + i] = - LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), val, 0); - } - } - return LLVMConstVector(elems, 16); -} - - static void init_var_slots(struct lp_build_nir_context *bld_base, nir_variable *var) @@ -183,10 +153,6 @@ emit_store_var(struct lp_build_nir_context *bld_base, struct gallivm_state *gallivm = bld_base->base.gallivm; unsigned location = var->data.driver_location; - if (LLVMIsConstant(vals)) { - vals = lp_nir_aos_conv_const(gallivm, vals, num_components); - } - if (deref_mode == nir_var_shader_out) { LLVMBuildStore(gallivm->builder, vals, bld->outputs[location]); } @@ -205,6 +171,37 @@ emit_load_reg(struct lp_build_nir_context *bld_base, } +unsigned +lp_nir_aos_swizzle(struct lp_build_nir_context *bld_base, unsigned chan) +{ + struct lp_build_nir_aos_context *bld = lp_nir_aos_context(bld_base); + return bld->swizzles[chan]; +} + + +/* + * If an instruction has a writemask like r0.x = foo and the + * AOS/linear context uses swizzle={2,1,0,3} we need to change + * the writemask to r0.z + */ +static unsigned +swizzle_writemask(struct lp_build_nir_aos_context *bld, + unsigned writemask) +{ + assert(writemask != 0x0); + assert(writemask != 0xf); + + // Ex: swap r/b channels + unsigned new_writemask = 0; + for (unsigned chan = 0; chan < 4; chan++) { + if (writemask & (1 << chan)) { + new_writemask |= 1 << bld->swizzles[chan]; + } + } + return new_writemask; +} + + static void emit_store_reg(struct lp_build_nir_context *bld_base, struct lp_build_context *reg_bld, @@ -214,17 +211,18 @@ emit_store_reg(struct lp_build_nir_context *bld_base, LLVMValueRef reg_storage, LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS]) { + struct lp_build_nir_aos_context *bld = lp_nir_aos_context(bld_base); struct gallivm_state *gallivm = bld_base->base.gallivm; - if (LLVMIsConstant(vals[0])) - vals[0] = lp_nir_aos_conv_const(gallivm, vals[0], 1); - if (writemask == 0xf) { LLVMBuildStore(gallivm->builder, vals[0], reg_storage); return; } - LLVMValueRef cur = LLVMBuildLoad2(gallivm->builder, reg_bld->vec_type, reg_storage, ""); + writemask = swizzle_writemask(bld, writemask); + + LLVMValueRef cur = LLVMBuildLoad2(gallivm->builder, reg_bld->vec_type, + reg_storage, ""); LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; for (unsigned j = 0; j < 16; j++) { @@ -325,21 +323,31 @@ emit_load_const(struct lp_build_nir_context *bld_base, LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS]) { struct lp_build_nir_aos_context *bld = lp_nir_aos_context(bld_base); - struct gallivm_state *gallivm = bld_base->base.gallivm; - LLVMValueRef elems[4]; + LLVMValueRef elems[16]; const int nc = instr->def.num_components; bool do_swizzle = false; if (nc == 4) do_swizzle = true; - for (unsigned i = 0; i < nc; i++) { - int idx = do_swizzle ? bld->swizzles[i] : i; - elems[idx] = LLVMConstInt(LLVMInt32TypeInContext(gallivm->context), - instr->value[i].u32, - bld_base->base.type.sign ? 1 : 0); + /* The constant is something like {float, float, float, float}. + * We need to convert the float values from [0,1] to ubyte in [0,255]. + * We previously checked for values outside [0,1] in + * llvmpipe_nir_fn_is_linear_compat(). + * Also, we convert the (typically) 4-element float constant into a + * swizzled 16-element ubyte constant (z,y,x,w, z,y,x,w, z,y,x,w, z,y,x,w) + * since that's what 'linear' mode operates on. + */ + assert(bld_base->base.type.length <= ARRAY_SIZE(elems)); + for (unsigned i = 0; i < bld_base->base.type.length; i++) { + const unsigned j = do_swizzle ? bld->swizzles[i % nc] : i % nc; + assert(instr->value[j].f32 >= 0.0f); + assert(instr->value[j].f32 <= 1.0f); + const unsigned u8val = float_to_ubyte(instr->value[j].f32); + elems[i] = LLVMConstInt(bld_base->uint_bld.int_elem_type, u8val, 0); } - outval[0] = LLVMConstVector(elems, nc); + outval[0] = LLVMConstVector(elems, bld_base->base.type.length); + outval[1] = outval[2] = outval[3] = NULL; } diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs_analysis.c b/src/gallium/drivers/llvmpipe/lp_state_fs_analysis.c index 302d4a4b858..ad2405317e5 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs_analysis.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs_analysis.c @@ -284,6 +284,25 @@ get_texcoord_provenance(const nir_tex_src *texcoord, } +/* + * Check if all the values of a nir_load_const_instr are 32-bit + * floats in the range [0,1]. If so, return true, else return false. + */ +static bool +check_load_const_in_zero_one(const nir_load_const_instr *load) +{ + if (load->def.bit_size != 32) + return false; + for (unsigned c = 0; c < load->def.num_components; c++) { + float val = load->value[c].f32; + if (val < 0.0 || val > 1.0 || isnan(val)) { + return false; + } + } + return true; +} + + /* * Examine the NIR shader to determine if it's "linear". */ @@ -296,8 +315,14 @@ llvmpipe_nir_fn_is_linear_compat(const struct nir_shader *shader, nir_foreach_instr_safe(instr, block) { switch (instr->type) { case nir_instr_type_deref: - case nir_instr_type_load_const: break; + case nir_instr_type_load_const: { + nir_load_const_instr *load = nir_instr_as_load_const(instr); + if (!check_load_const_in_zero_one(load)) { + return false; + } + break; + } case nir_instr_type_intrinsic: { nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); if (intrin->intrinsic != nir_intrinsic_load_deref && @@ -384,14 +409,8 @@ llvmpipe_nir_fn_is_linear_compat(const struct nir_shader *shader, if (nir_src_is_const(alu->src[s].src)) { nir_load_const_instr *load = nir_instr_as_load_const(alu->src[s].src.ssa->parent_instr); - - if (load->def.bit_size != 32) + if (!check_load_const_in_zero_one(load)) { return false; - for (unsigned c = 0; c < load->def.num_components; c++) { - if (load->value[c].f32 < 0.0 || load->value[c].f32 > 1.0) { - info->unclamped_immediates = true; - return false; - } } } } @@ -437,7 +456,6 @@ llvmpipe_fs_analyse_nir(struct lp_fragment_shader *shader) shader->info.base.num_outputs == 1 && !shader->info.indirect_textures && !shader->info.sampler_texture_units_different && - !shader->info.unclamped_immediates && shader->info.num_texs <= LP_MAX_LINEAR_TEXTURES && llvmpipe_nir_is_linear_compat(shader->base.ir.nir, &shader->info)) { shader->kind = LP_FS_KIND_LLVM_LINEAR; diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs_linear_llvm.c b/src/gallium/drivers/llvmpipe/lp_state_fs_linear_llvm.c index 41bbe84dbe2..3f794759408 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs_linear_llvm.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs_linear_llvm.c @@ -279,7 +279,7 @@ llvmpipe_fs_variant_linear_llvm(struct llvmpipe_context *lp, */ char func_name[256]; - snprintf(func_name, sizeof(func_name), "fs_variant_linear"); + snprintf(func_name, sizeof(func_name), "fs_variant_linear2"); LLVMTypeRef ret_type = pint8t; LLVMTypeRef arg_types[4];