diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 8e4212d2273..57193a12b32 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -78,8 +78,6 @@ offset(const fs_reg ®, const brw::fs_builder &bld, unsigned delta) return offset(reg, bld.dispatch_width(), delta); } -#define UBO_START ((1 << 16) - 4) - struct shader_stats { const char *scheduler_mode; unsigned promoted_constants; diff --git a/src/intel/compiler/brw_nir_analyze_ubo_ranges.c b/src/intel/compiler/brw_nir_analyze_ubo_ranges.c index 34568ac69c9..2fa4dd5908a 100644 --- a/src/intel/compiler/brw_nir_analyze_ubo_ranges.c +++ b/src/intel/compiler/brw_nir_analyze_ubo_ranges.c @@ -202,8 +202,7 @@ brw_nir_analyze_ubo_ranges(const struct brw_compiler *compiler, { const struct intel_device_info *devinfo = compiler->devinfo; - if ((devinfo->verx10 <= 70) || - !compiler->scalar_stage[nir->info.stage]) { + if (devinfo->verx10 <= 70) { memset(out_ranges, 0, 4 * sizeof(struct brw_ubo_range)); return; } diff --git a/src/intel/compiler/brw_shader.h b/src/intel/compiler/brw_shader.h index 7b7035293ed..20e9281b7d0 100644 --- a/src/intel/compiler/brw_shader.h +++ b/src/intel/compiler/brw_shader.h @@ -40,6 +40,8 @@ enum instruction_scheduler_mode { SCHEDULE_POST, }; +#define UBO_START ((1 << 16) - 4) + struct backend_shader { protected: diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp index 4e215c88b0b..56031942fb0 100644 --- a/src/intel/compiler/brw_vec4.cpp +++ b/src/intel/compiler/brw_vec4.cpp @@ -593,7 +593,7 @@ vec4_visitor::split_uniform_registers() */ foreach_block_and_inst(block, vec4_instruction, inst, cfg) { for (int i = 0 ; i < 3; i++) { - if (inst->src[i].file != UNIFORM) + if (inst->src[i].file != UNIFORM || inst->src[i].nr >= UBO_START) continue; assert(!inst->src[i].reladdr); @@ -672,7 +672,7 @@ vec4_visitor::pack_uniform_registers() } for (int i = 0 ; i < 3; i++) { - if (inst->src[i].file != UNIFORM) + if (inst->src[i].file != UNIFORM || inst->src[i].nr >= UBO_START) continue; assert(type_sz(inst->src[i].type) % 4 == 0); @@ -782,7 +782,7 @@ vec4_visitor::pack_uniform_registers() for (int i = 0 ; i < 3; i++) { int src = inst->src[i].nr; - if (inst->src[i].file != UNIFORM) + if (inst->src[i].file != UNIFORM || inst->src[i].nr >= UBO_START) continue; int chan = new_chan[src] / channel_sizes[src]; @@ -977,7 +977,7 @@ vec4_visitor::move_push_constants_to_pull_constants() */ foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) { for (int i = 0 ; i < 3; i++) { - if (inst->src[i].file != UNIFORM || + if (inst->src[i].file != UNIFORM || inst->src[i].nr >= UBO_START || pull_constant_loc[inst->src[i].nr] == -1) continue; @@ -2078,11 +2078,19 @@ vec4_visitor::convert_to_hw_regs() } case UNIFORM: { - reg = stride(byte_offset(brw_vec4_grf( - prog_data->base.dispatch_grf_start_reg + - src.nr / 2, src.nr % 2 * 4), - src.offset), - 0, 4, 1); + if (src.nr >= UBO_START) { + reg = byte_offset(brw_vec4_grf( + prog_data->base.dispatch_grf_start_reg + + ubo_push_start[src.nr - UBO_START] + + src.offset / 32, 0), + src.offset % 32); + } else { + reg = byte_offset(brw_vec4_grf( + prog_data->base.dispatch_grf_start_reg + + src.nr / 2, src.nr % 2 * 4), + src.offset); + } + reg = stride(reg, 0, 4, 1); reg.type = src.type; reg.abs = src.abs; reg.negate = src.negate; diff --git a/src/intel/compiler/brw_vec4_nir.cpp b/src/intel/compiler/brw_vec4_nir.cpp index 636dbfb6ea0..4c5dfd00f84 100644 --- a/src/intel/compiler/brw_vec4_nir.cpp +++ b/src/intel/compiler/brw_vec4_nir.cpp @@ -624,8 +624,6 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) case nir_intrinsic_load_ubo: { src_reg surf_index; - prog_data->base.has_ubo_pull = true; - dest = get_nir_dest(instr->dest); if (nir_src_is_const(instr->src[0])) { @@ -647,10 +645,31 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) surf_index = emit_uniformize(surf_index); } + src_reg push_reg; src_reg offset_reg; if (nir_src_is_const(instr->src[1])) { unsigned load_offset = nir_src_as_uint(instr->src[1]); - offset_reg = brw_imm_ud(load_offset & ~15); + unsigned aligned_offset = load_offset & ~15; + offset_reg = brw_imm_ud(aligned_offset); + + /* See if we've selected this as a push constant candidate */ + if (nir_src_is_const(instr->src[0])) { + const unsigned ubo_block = nir_src_as_uint(instr->src[0]); + const unsigned offset_256b = aligned_offset / 32; + + for (int i = 0; i < 4; i++) { + const struct brw_ubo_range *range = &prog_data->base.ubo_ranges[i]; + if (range->block == ubo_block && + offset_256b >= range->start && + offset_256b < range->start + range->length) { + + push_reg = src_reg(dst_reg(UNIFORM, UBO_START + i)); + push_reg.type = dest.type; + push_reg.offset = aligned_offset - 32 * range->start; + break; + } + } + } } else { offset_reg = src_reg(this, glsl_type::uint_type); emit(MOV(dst_reg(offset_reg), @@ -658,12 +677,15 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) } src_reg packed_consts; - if (nir_dest_bit_size(instr->dest) == 32) { + if (push_reg.file != BAD_FILE) { + packed_consts = push_reg; + } else if (nir_dest_bit_size(instr->dest) == 32) { packed_consts = src_reg(this, glsl_type::vec4_type); emit_pull_constant_load_reg(dst_reg(packed_consts), surf_index, offset_reg, NULL, NULL /* before_block/inst */); + prog_data->base.has_ubo_pull = true; } else { src_reg temp = src_reg(this, glsl_type::dvec4_type); src_reg temp_float = retype(temp, BRW_REGISTER_TYPE_F); @@ -676,6 +698,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) emit(ADD(dst_reg(offset_reg), offset_reg, brw_imm_ud(16u))); emit_pull_constant_load_reg(dst_reg(byte_offset(temp_float, REG_SIZE)), surf_index, offset_reg, NULL, NULL); + prog_data->base.has_ubo_pull = true; packed_consts = src_reg(this, glsl_type::dvec4_type); shuffle_64bit_data(dst_reg(packed_consts), temp, false);