diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 0ff32148e0c..83702ef78b5 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -7856,7 +7856,7 @@ fs_visitor::fixup_nomask_control_flow()
 }
 
 void
-fs_visitor::allocate_registers(unsigned min_dispatch_width, bool allow_spilling)
+fs_visitor::allocate_registers(bool allow_spilling)
 {
    bool allocated;
 
@@ -7905,13 +7905,8 @@ fs_visitor::allocate_registers(unsigned min_dispatch_width, bool allow_spilling)
             fixup_3src_null_dest();
       }
 
-
-      /* We only allow spilling for the last schedule mode and only if the
-       * allow_spilling parameter and dispatch width work out ok.
-       */
       bool can_spill = allow_spilling &&
-                       (i == ARRAY_SIZE(pre_modes) - 1) &&
-                       dispatch_width == min_dispatch_width;
+                       (i == ARRAY_SIZE(pre_modes) - 1);
 
       /* We should only spill registers on the last scheduling. */
       assert(!spilled_any_registers);
@@ -7922,20 +7917,8 @@ fs_visitor::allocate_registers(unsigned min_dispatch_width, bool allow_spilling)
    }
 
    if (!allocated) {
-      if (!allow_spilling)
-         fail("Failure to register allocate and spilling is not allowed.");
-
-      /* We assume that any spilling is worse than just dropping back to
-       * SIMD8.  There's probably actually some intermediate point where
-       * SIMD16 with a couple of spills is still better.
-       */
-      if (dispatch_width > min_dispatch_width) {
-         fail("Failure to register allocate.  Reduce number of "
-              "live scalar values to avoid this.");
-      }
-
-      /* If we failed to allocate, we must have a reason */
-      assert(failed);
+      fail("Failure to register allocate.  Reduce number of "
+           "live scalar values to avoid this.");
    } else if (spilled_any_registers) {
       compiler->shader_perf_log(log_data,
                                 "%s shader triggered register spilling.  "
@@ -8024,7 +8007,7 @@ fs_visitor::run_vs()
    assign_vs_urb_setup();
 
    fixup_3src_null_dest();
-   allocate_registers(8, true);
+   allocate_registers(true /* allow_spilling */);
 
    return !failed;
 }
@@ -8145,7 +8128,7 @@ fs_visitor::run_tcs()
    assign_tcs_urb_setup();
 
    fixup_3src_null_dest();
-   allocate_registers(8, true);
+   allocate_registers(true /* allow_spilling */);
 
    return !failed;
 }
@@ -8179,7 +8162,7 @@ fs_visitor::run_tes()
    assign_tes_urb_setup();
 
    fixup_3src_null_dest();
-   allocate_registers(8, true);
+   allocate_registers(true /* allow_spilling */);
 
    return !failed;
 }
@@ -8228,7 +8211,7 @@ fs_visitor::run_gs()
    assign_gs_urb_setup();
 
    fixup_3src_null_dest();
-   allocate_registers(8, true);
+   allocate_registers(true /* allow_spilling */);
 
    return !failed;
 }
@@ -8337,7 +8320,8 @@ fs_visitor::run_fs(bool allow_spilling, bool do_rep_send)
       assign_urb_setup();
 
       fixup_3src_null_dest();
-      allocate_registers(8, allow_spilling);
+
+      allocate_registers(allow_spilling);
 
       if (failed)
          return false;
@@ -8347,10 +8331,9 @@ fs_visitor::run_fs(bool allow_spilling, bool do_rep_send)
 }
 
 bool
-fs_visitor::run_cs(unsigned min_dispatch_width)
+fs_visitor::run_cs(bool allow_spilling)
 {
    assert(stage == MESA_SHADER_COMPUTE);
-   assert(dispatch_width >= min_dispatch_width);
 
    setup_cs_payload();
 
@@ -8381,7 +8364,7 @@ fs_visitor::run_cs(unsigned min_dispatch_width)
    assign_curb_setup();
 
    fixup_3src_null_dest();
-   allocate_registers(min_dispatch_width, true);
+   allocate_registers(allow_spilling);
 
    if (failed)
       return false;
@@ -8710,6 +8693,7 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
    fs_visitor *v8 = NULL, *v16 = NULL, *v32 = NULL;
    cfg_t *simd8_cfg = NULL, *simd16_cfg = NULL, *simd32_cfg = NULL;
    float throughput = 0;
+   bool has_spilled = false;
 
    v8 = new fs_visitor(compiler, log_data, mem_ctx, &key->base,
                        &prog_data->base, shader, 8, shader_time_index8);
@@ -8725,6 +8709,8 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
       prog_data->reg_blocks_8 = brw_register_blocks(v8->grf_used);
       const performance &perf = v8->performance_analysis.require();
       throughput = MAX2(throughput, perf.throughput);
+      has_spilled = v8->spilled_any_registers;
+      allow_spilling = false;
    }
 
    /* Limit dispatch width to simd8 with dual source blending on gen8.
@@ -8737,7 +8723,8 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
                                "using SIMD8 when dual src blending.\n");
    }
 
-   if (v8->max_dispatch_width >= 16 &&
+   if (!has_spilled &&
+       v8->max_dispatch_width >= 16 &&
        likely(!(INTEL_DEBUG & DEBUG_NO16) || use_rep_send)) {
       /* Try a SIMD16 compile */
       v16 = new fs_visitor(compiler, log_data, mem_ctx, &key->base,
@@ -8753,11 +8740,14 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
          prog_data->reg_blocks_16 = brw_register_blocks(v16->grf_used);
          const performance &perf = v16->performance_analysis.require();
          throughput = MAX2(throughput, perf.throughput);
+         has_spilled = v16->spilled_any_registers;
+         allow_spilling = false;
       }
    }
 
    /* Currently, the compiler only supports SIMD32 on SNB+ */
-   if (v8->max_dispatch_width >= 32 && !use_rep_send &&
+   if (!has_spilled &&
+       v8->max_dispatch_width >= 32 && !use_rep_send &&
        devinfo->gen >= 6 && simd16_cfg &&
        !(INTEL_DEBUG & DEBUG_NO32)) {
       /* Try a SIMD32 compile */
@@ -9085,7 +9075,7 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
       v8 = new fs_visitor(compiler, log_data, mem_ctx, &key->base,
                           &prog_data->base,
                           nir8, 8, shader_time_index);
-      if (!v8->run_cs(min_dispatch_width)) {
+      if (!v8->run_cs(true /* allow_spilling */)) {
          fail_msg = v8->fail_msg;
       } else {
          /* We should always be able to do SIMD32 for compute shaders */
@@ -9097,7 +9087,8 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
       }
    }
 
-   if (likely(!(INTEL_DEBUG & DEBUG_NO16)) &&
+   if ((!v || !v->spilled_any_registers) &&
+       likely(!(INTEL_DEBUG & DEBUG_NO16)) &&
        !fail_msg && min_dispatch_width <= 16 && max_dispatch_width >= 16) {
       /* Try a SIMD16 compile */
       nir_shader *nir16 = compile_cs_to_nir(compiler, mem_ctx, key,
@@ -9108,7 +9099,7 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
       if (v8)
          v16->import_uniforms(v8);
 
-      if (!v16->run_cs(min_dispatch_width)) {
+      if (!v16->run_cs(v == NULL /* allow_spilling */)) {
          compiler->shader_perf_log(log_data,
                                    "SIMD16 shader failed to compile: %s",
                                    v16->fail_msg);
@@ -9127,7 +9118,8 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
       }
    }
 
-   if (!fail_msg && (min_dispatch_width > 16 || (INTEL_DEBUG & DEBUG_DO32)) &&
+   if ((!v || !v->spilled_any_registers) &&
+       !fail_msg && (min_dispatch_width > 16 || (INTEL_DEBUG & DEBUG_DO32)) &&
        max_dispatch_width >= 32) {
       /* Try a SIMD32 compile */
       nir_shader *nir32 = compile_cs_to_nir(compiler, mem_ctx, key,
@@ -9140,7 +9132,7 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
       else if (v16)
          v32->import_uniforms(v16);
 
-      if (!v32->run_cs(min_dispatch_width)) {
+      if (!v32->run_cs(v == NULL /* allow_spilling */)) {
          compiler->shader_perf_log(log_data,
                                    "SIMD32 shader failed to compile: %s",
                                    v32->fail_msg);
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index b9783eb2285..5bf5cdeed41 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -125,9 +125,9 @@ public:
    bool run_tcs();
    bool run_tes();
    bool run_gs();
-   bool run_cs(unsigned min_dispatch_width);
+   bool run_cs(bool allow_spilling);
    void optimize();
-   void allocate_registers(unsigned min_dispatch_width, bool allow_spilling);
+   void allocate_registers(bool allow_spilling);
    void setup_fs_payload_gen4();
    void setup_fs_payload_gen6();
    void setup_vs_payload();