diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir.c b/src/gallium/auxiliary/gallivm/lp_bld_nir.c
index 339f955a3e8..8d71fb304a4 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_nir.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_nir.c
@@ -325,7 +325,7 @@ icmp32(struct lp_build_nir_context *bld_base,
 
 /**
  * Get a source register value for an ALU instruction.
- * This is where swizzled are handled.  There should be no negation
+ * This is where swizzles are handled.  There should be no negation
  * or absolute value modifiers.
  * num_components indicates the number of components needed in the
  * returned array or vector.
@@ -335,25 +335,63 @@ get_alu_src(struct lp_build_nir_context *bld_base,
             nir_alu_src src,
             unsigned num_components)
 {
+   assert(!src.negate);
+   assert(!src.abs);
+   assert(num_components >= 1);
+   assert(num_components <= 4);
+
    struct gallivm_state *gallivm = bld_base->base.gallivm;
    LLVMBuilderRef builder = gallivm->builder;
+   const unsigned src_components = nir_src_num_components(src.src);
+   assert(src_components > 0);
    LLVMValueRef value = get_src(bld_base, src.src);
-   bool need_swizzle = false;
-
    assert(value);
 
-   if (is_aos(bld_base))
-      return value;
-
-   unsigned src_components = nir_src_num_components(src.src);
-   for (unsigned i = 0; i < num_components; ++i) {
-      assert(src.swizzle[i] < src_components);
-      if (src.swizzle[i] != i)
+   /* check if swizzling needed for the src vector */
+   bool need_swizzle = false;
+   for (unsigned i = 0; i < src_components; ++i) {
+      if (src.swizzle[i] != i) {
          need_swizzle = true;
+         break;
+      }
+   }
+
+   if (is_aos(bld_base) && !need_swizzle) {
+      return value;
    }
 
    if (need_swizzle || num_components != src_components) {
-      if (src_components > 1 && num_components == 1) {
+      if (is_aos(bld_base) && need_swizzle) {
+         // Handle swizzle for AOS
+         assert(LLVMGetTypeKind(LLVMTypeOf(value)) == LLVMVectorTypeKind);
+
+         // swizzle vector of ((r,g,b,a), (r,g,b,a), (r,g,b,a), (r,g,b,a))
+         assert(bld_base->base.type.width == 8);
+         assert(bld_base->base.type.length == 16);
+
+         // Do our own swizzle here since lp_build_swizzle_aos_n() does
+         // not do what we want.
+         // Ex: value = {r0,g0,b0,a0, r1,g1,b1,a1, r2,g2,b2,a2, r3,g3,b3,a3}.
+         // aos swizzle = {2,1,0,3}  // swap red/blue
+         // shuffles = {2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15}
+         // result = {b0,g0,r0,a0, b1,g1,r1,a1, b2,g2,r2,a2, b3,g3,r3,a3}.
+         LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH];
+         for (unsigned i = 0; i < 16; i++) {
+            unsigned chan = i % 4;
+            /* apply src register swizzle */
+            if (chan < num_components) {
+               chan = src.swizzle[chan];
+            } else {
+               chan = src.swizzle[0];
+            }
+            /* apply aos swizzle */
+            chan = lp_nir_aos_swizzle(bld_base, chan);
+            shuffles[i] = lp_build_const_int32(gallivm, (i & ~3) + chan);
+         }
+         value = LLVMBuildShuffleVector(builder, value,
+                                        LLVMGetUndef(LLVMTypeOf(value)),
+                                        LLVMConstVector(shuffles, 16), "");
+      } else if (src_components > 1 && num_components == 1) {
          value = LLVMBuildExtractValue(gallivm->builder, value,
                                        src.swizzle[0], "");
       } else if (src_components == 1 && num_components > 1) {
@@ -369,8 +407,7 @@ get_alu_src(struct lp_build_nir_context *bld_base,
          value = arr;
       }
    }
-   assert(!src.negate);
-   assert(!src.abs);
+
    return value;
 }
 
@@ -1262,14 +1299,6 @@ visit_alu(struct lp_build_nir_context *bld_base,
                            result[0], temp_chan);
       }
    } else if (is_aos(bld_base)) {
-      if (instr->op == nir_op_fmul) {
-         if (LLVMIsConstant(src[0])) {
-            src[0] = lp_nir_aos_conv_const(gallivm, src[0], 1);
-         }
-         if (LLVMIsConstant(src[1])) {
-            src[1] = lp_nir_aos_conv_const(gallivm, src[1], 1);
-         }
-      }
       result[0] = do_alu_action(bld_base, instr, src_bit_size, src);
    } else {
       /* Loop for R,G,B,A channels */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir.h b/src/gallium/auxiliary/gallivm/lp_bld_nir.h
index d02da2b7db5..922208d8603 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_nir.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_nir.h
@@ -358,9 +358,8 @@ get_int_bld(struct lp_build_nir_context *bld_base,
 }
 
 
-LLVMValueRef
-lp_nir_aos_conv_const(struct gallivm_state *gallivm,
-                      LLVMValueRef constval, int nc);
+unsigned
+lp_nir_aos_swizzle(struct lp_build_nir_context *bld_base, unsigned chan);
 
 
 #endif
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_nir_aos.c
index e4a332ae497..36a2304a04d 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_nir_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_nir_aos.c
@@ -90,36 +90,6 @@ swizzle_aos(struct lp_build_nir_context *bld_base,
 }
 
 
-LLVMValueRef
-lp_nir_aos_conv_const(struct gallivm_state *gallivm,
-                      LLVMValueRef constval, int nc)
-{
-   LLVMValueRef elems[16];
-   uint8_t val = 0;
-   /* convert from 1..4 x f32 to 16 x unorm8 */
-   for (unsigned i = 0; i < nc; i++) {
-      LLVMValueRef value =
-         LLVMBuildExtractElement(gallivm->builder, constval,
-                                 lp_build_const_int32(gallivm, i), "");
-      assert(LLVMIsConstant(value));
-      unsigned uval = LLVMConstIntGetZExtValue(value);
-      float f = uif(uval);
-      val = float_to_ubyte(f);
-      for (unsigned j = 0; j < 4; j++) {
-         elems[j * 4 + i] =
-            LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), val, 0);
-      }
-   }
-   for (unsigned i = nc; i < 4; i++) {
-      for (unsigned j = 0; j < 4; j++) {
-         elems[j * 4 + i] =
-            LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), val, 0);
-      }
-   }
-   return LLVMConstVector(elems, 16);
-}
-
-
 static void
 init_var_slots(struct lp_build_nir_context *bld_base,
                nir_variable *var)
@@ -183,10 +153,6 @@ emit_store_var(struct lp_build_nir_context *bld_base,
    struct gallivm_state *gallivm = bld_base->base.gallivm;
    unsigned location = var->data.driver_location;
 
-   if (LLVMIsConstant(vals)) {
-      vals = lp_nir_aos_conv_const(gallivm, vals, num_components);
-   }
-
    if (deref_mode == nir_var_shader_out) {
       LLVMBuildStore(gallivm->builder, vals, bld->outputs[location]);
    }
@@ -205,6 +171,37 @@ emit_load_reg(struct lp_build_nir_context *bld_base,
 }
 
 
+unsigned
+lp_nir_aos_swizzle(struct lp_build_nir_context *bld_base, unsigned chan)
+{
+   struct lp_build_nir_aos_context *bld = lp_nir_aos_context(bld_base);
+   return bld->swizzles[chan];
+}
+
+
+/*
+ * If an instruction has a writemask like r0.x = foo and the
+ * AOS/linear context uses swizzle={2,1,0,3} we need to change
+ * the writemask to r0.z
+ */
+static unsigned
+swizzle_writemask(struct lp_build_nir_aos_context *bld,
+                  unsigned writemask)
+{
+   assert(writemask != 0x0);
+   assert(writemask != 0xf);
+
+   // Ex: swap r/b channels
+   unsigned new_writemask = 0;
+   for (unsigned chan = 0; chan < 4; chan++) {
+      if (writemask & (1 << chan)) {
+         new_writemask |= 1 << bld->swizzles[chan];
+      }
+   }
+   return new_writemask;
+}
+
+
 static void
 emit_store_reg(struct lp_build_nir_context *bld_base,
                struct lp_build_context *reg_bld,
@@ -214,17 +211,18 @@ emit_store_reg(struct lp_build_nir_context *bld_base,
                LLVMValueRef reg_storage,
                LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])
 {
+   struct lp_build_nir_aos_context *bld = lp_nir_aos_context(bld_base);
    struct gallivm_state *gallivm = bld_base->base.gallivm;
 
-   if (LLVMIsConstant(vals[0]))
-      vals[0] = lp_nir_aos_conv_const(gallivm, vals[0], 1);
-
    if (writemask == 0xf) {
       LLVMBuildStore(gallivm->builder, vals[0], reg_storage);
       return;
    }
 
-   LLVMValueRef cur = LLVMBuildLoad2(gallivm->builder, reg_bld->vec_type, reg_storage, "");
+   writemask = swizzle_writemask(bld, writemask);
+
+   LLVMValueRef cur = LLVMBuildLoad2(gallivm->builder, reg_bld->vec_type,
+                                     reg_storage, "");
    LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
    LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
    for (unsigned j = 0; j < 16; j++) {
@@ -325,21 +323,31 @@ emit_load_const(struct lp_build_nir_context *bld_base,
                 LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
 {
    struct lp_build_nir_aos_context *bld = lp_nir_aos_context(bld_base);
-   struct gallivm_state *gallivm = bld_base->base.gallivm;
-   LLVMValueRef elems[4];
+   LLVMValueRef elems[16];
    const int nc = instr->def.num_components;
    bool do_swizzle = false;
 
    if (nc == 4)
       do_swizzle = true;
 
-   for (unsigned i = 0; i < nc; i++) {
-      int idx = do_swizzle ? bld->swizzles[i] : i;
-      elems[idx] = LLVMConstInt(LLVMInt32TypeInContext(gallivm->context),
-                                instr->value[i].u32,
-                                bld_base->base.type.sign ? 1 : 0);
+   /* The constant is something like {float, float, float, float}.
+    * We need to convert the float values from [0,1] to ubyte in [0,255].
+    * We previously checked for values outside [0,1] in
+    * llvmpipe_nir_fn_is_linear_compat().
+    * Also, we convert the (typically) 4-element float constant into a
+    * swizzled 16-element ubyte constant (z,y,x,w, z,y,x,w, z,y,x,w, z,y,x,w)
+    * since that's what 'linear' mode operates on.
+    */
+   assert(bld_base->base.type.length <= ARRAY_SIZE(elems));
+   for (unsigned i = 0; i < bld_base->base.type.length; i++) {
+      const unsigned j = do_swizzle ? bld->swizzles[i % nc] : i % nc;
+      assert(instr->value[j].f32 >= 0.0f);
+      assert(instr->value[j].f32 <= 1.0f);
+      const unsigned u8val = float_to_ubyte(instr->value[j].f32);
+      elems[i] = LLVMConstInt(bld_base->uint_bld.int_elem_type, u8val, 0);
    }
-   outval[0] = LLVMConstVector(elems, nc);
+   outval[0] = LLVMConstVector(elems, bld_base->base.type.length);
+   outval[1] = outval[2] = outval[3] = NULL;
 }
 
 
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs_analysis.c b/src/gallium/drivers/llvmpipe/lp_state_fs_analysis.c
index 302d4a4b858..ad2405317e5 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs_analysis.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs_analysis.c
@@ -284,6 +284,25 @@ get_texcoord_provenance(const nir_tex_src *texcoord,
 }
 
 
+/*
+ * Check if all the values of a nir_load_const_instr are 32-bit
+ * floats in the range [0,1].  If so, return true, else return false.
+ */
+static bool
+check_load_const_in_zero_one(const nir_load_const_instr *load)
+{
+   if (load->def.bit_size != 32)
+      return false;
+   for (unsigned c = 0; c < load->def.num_components; c++) {
+      float val = load->value[c].f32;
+      if (val < 0.0 || val > 1.0 || isnan(val)) {
+         return false;
+      }
+   }
+   return true;
+}
+
+
 /*
  * Examine the NIR shader to determine if it's "linear".
  */
@@ -296,8 +315,14 @@ llvmpipe_nir_fn_is_linear_compat(const struct nir_shader *shader,
       nir_foreach_instr_safe(instr, block) {
          switch (instr->type) {
          case nir_instr_type_deref:
-         case nir_instr_type_load_const:
             break;
+         case nir_instr_type_load_const: {
+            nir_load_const_instr *load = nir_instr_as_load_const(instr);
+            if (!check_load_const_in_zero_one(load)) {
+               return false;
+            }
+            break;
+         }
          case nir_instr_type_intrinsic: {
             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
             if (intrin->intrinsic != nir_intrinsic_load_deref &&
@@ -384,14 +409,8 @@ llvmpipe_nir_fn_is_linear_compat(const struct nir_shader *shader,
                   if (nir_src_is_const(alu->src[s].src)) {
                      nir_load_const_instr *load =
                         nir_instr_as_load_const(alu->src[s].src.ssa->parent_instr);
-
-                     if (load->def.bit_size != 32)
+                     if (!check_load_const_in_zero_one(load)) {
                         return false;
-                     for (unsigned c = 0; c < load->def.num_components; c++) {
-                        if (load->value[c].f32 < 0.0 || load->value[c].f32 > 1.0) {
-                           info->unclamped_immediates = true;
-                           return false;
-                        }
                      }
                   }
                }
@@ -437,7 +456,6 @@ llvmpipe_fs_analyse_nir(struct lp_fragment_shader *shader)
        shader->info.base.num_outputs == 1 &&
        !shader->info.indirect_textures &&
        !shader->info.sampler_texture_units_different &&
-       !shader->info.unclamped_immediates &&
        shader->info.num_texs <= LP_MAX_LINEAR_TEXTURES &&
        llvmpipe_nir_is_linear_compat(shader->base.ir.nir, &shader->info)) {
       shader->kind = LP_FS_KIND_LLVM_LINEAR;
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs_linear_llvm.c b/src/gallium/drivers/llvmpipe/lp_state_fs_linear_llvm.c
index 41bbe84dbe2..3f794759408 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs_linear_llvm.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs_linear_llvm.c
@@ -279,7 +279,7 @@ llvmpipe_fs_variant_linear_llvm(struct llvmpipe_context *lp,
     */
 
    char func_name[256];
-   snprintf(func_name, sizeof(func_name), "fs_variant_linear");
+   snprintf(func_name, sizeof(func_name), "fs_variant_linear2");
 
    LLVMTypeRef ret_type = pint8t;
    LLVMTypeRef arg_types[4];