diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 6bb11113978..1f2e2e242a4 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -1538,553 +1538,6 @@ brw_emit_predicate_on_sample_mask(const fs_builder &bld, fs_inst *inst) } } -void -fs_visitor::dump_instructions_to_file(FILE *file) const -{ - if (cfg && grf_used == 0) { - const brw::def_analysis &defs = def_analysis.require(); - const register_pressure *rp = - INTEL_DEBUG(DEBUG_REG_PRESSURE) ? ®pressure_analysis.require() : NULL; - - unsigned ip = 0, max_pressure = 0; - unsigned cf_count = 0; - foreach_block_and_inst(block, fs_inst, inst, cfg) { - if (inst->is_control_flow_end()) - cf_count -= 1; - - if (rp) { - max_pressure = MAX2(max_pressure, rp->regs_live_at_ip[ip]); - fprintf(file, "{%3d} ", rp->regs_live_at_ip[ip]); - } - - for (unsigned i = 0; i < cf_count; i++) - fprintf(file, " "); - dump_instruction(inst, file, &defs); - ip++; - - if (inst->is_control_flow_begin()) - cf_count += 1; - } - if (rp) - fprintf(file, "Maximum %3d registers live at once.\n", max_pressure); - } else if (cfg && exec_list_is_empty(&instructions)) { - foreach_block_and_inst(block, fs_inst, inst, cfg) { - dump_instruction(inst, file); - } - } else { - foreach_in_list(fs_inst, inst, &instructions) { - dump_instruction(inst, file); - } - } -} - -void -fs_visitor::dump_instructions(const char *name) const -{ - FILE *file = stderr; - if (name && __normal_user()) { - file = fopen(name, "w"); - if (!file) - file = stderr; - } - - dump_instructions_to_file(file); - - if (file != stderr) { - fclose(file); - } -} - -static const char * -brw_instruction_name(const struct brw_isa_info *isa, enum opcode op) -{ - const struct intel_device_info *devinfo = isa->devinfo; - - switch (op) { - case 0 ... NUM_BRW_OPCODES - 1: - /* The DO instruction doesn't exist on Gfx9+, but we use it to mark the - * start of a loop in the IR. - */ - if (op == BRW_OPCODE_DO) - return "do"; - - /* DPAS instructions may transiently exist on platforms that do not - * support DPAS. They will eventually be lowered, but in the meantime it - * must be possible to query the instruction name. - */ - if (devinfo->verx10 < 125 && op == BRW_OPCODE_DPAS) - return "dpas"; - - assert(brw_opcode_desc(isa, op)->name); - return brw_opcode_desc(isa, op)->name; - case FS_OPCODE_FB_WRITE_LOGICAL: - return "fb_write_logical"; - case FS_OPCODE_FB_READ_LOGICAL: - return "fb_read_logical"; - - case SHADER_OPCODE_RCP: - return "rcp"; - case SHADER_OPCODE_RSQ: - return "rsq"; - case SHADER_OPCODE_SQRT: - return "sqrt"; - case SHADER_OPCODE_EXP2: - return "exp2"; - case SHADER_OPCODE_LOG2: - return "log2"; - case SHADER_OPCODE_POW: - return "pow"; - case SHADER_OPCODE_INT_QUOTIENT: - return "int_quot"; - case SHADER_OPCODE_INT_REMAINDER: - return "int_rem"; - case SHADER_OPCODE_SIN: - return "sin"; - case SHADER_OPCODE_COS: - return "cos"; - - case SHADER_OPCODE_SEND: - return "send"; - - case SHADER_OPCODE_UNDEF: - return "undef"; - - case SHADER_OPCODE_TEX_LOGICAL: - return "tex_logical"; - case SHADER_OPCODE_TXD_LOGICAL: - return "txd_logical"; - case SHADER_OPCODE_TXF_LOGICAL: - return "txf_logical"; - case SHADER_OPCODE_TXL_LOGICAL: - return "txl_logical"; - case SHADER_OPCODE_TXS_LOGICAL: - return "txs_logical"; - case FS_OPCODE_TXB_LOGICAL: - return "txb_logical"; - case SHADER_OPCODE_TXF_CMS_W_LOGICAL: - return "txf_cms_w_logical"; - case SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL: - return "txf_cms_w_gfx12_logical"; - case SHADER_OPCODE_TXF_MCS_LOGICAL: - return "txf_mcs_logical"; - case SHADER_OPCODE_LOD_LOGICAL: - return "lod_logical"; - case SHADER_OPCODE_TG4_LOGICAL: - return "tg4_logical"; - case SHADER_OPCODE_TG4_OFFSET_LOGICAL: - return "tg4_offset_logical"; - case SHADER_OPCODE_TG4_OFFSET_LOD_LOGICAL: - return "tg4_offset_lod_logical"; - case SHADER_OPCODE_TG4_OFFSET_BIAS_LOGICAL: - return "tg4_offset_bias_logical"; - case SHADER_OPCODE_TG4_BIAS_LOGICAL: - return "tg4_b_logical"; - case SHADER_OPCODE_TG4_EXPLICIT_LOD_LOGICAL: - return "tg4_l_logical"; - case SHADER_OPCODE_TG4_IMPLICIT_LOD_LOGICAL: - return "tg4_i_logical"; - case SHADER_OPCODE_SAMPLEINFO_LOGICAL: - return "sampleinfo_logical"; - - case SHADER_OPCODE_IMAGE_SIZE_LOGICAL: - return "image_size_logical"; - - case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: - return "untyped_atomic_logical"; - case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: - return "untyped_surface_read_logical"; - case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: - return "untyped_surface_write_logical"; - case SHADER_OPCODE_UNALIGNED_OWORD_BLOCK_READ_LOGICAL: - return "unaligned_oword_block_read_logical"; - case SHADER_OPCODE_OWORD_BLOCK_WRITE_LOGICAL: - return "oword_block_write_logical"; - case SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL: - return "a64_untyped_read_logical"; - case SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL: - return "a64_oword_block_read_logical"; - case SHADER_OPCODE_A64_UNALIGNED_OWORD_BLOCK_READ_LOGICAL: - return "a64_unaligned_oword_block_read_logical"; - case SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL: - return "a64_oword_block_write_logical"; - case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL: - return "a64_untyped_write_logical"; - case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL: - return "a64_byte_scattered_read_logical"; - case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL: - return "a64_byte_scattered_write_logical"; - case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL: - return "a64_untyped_atomic_logical"; - case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: - return "typed_atomic_logical"; - case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: - return "typed_surface_read_logical"; - case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: - return "typed_surface_write_logical"; - case SHADER_OPCODE_MEMORY_FENCE: - return "memory_fence"; - case FS_OPCODE_SCHEDULING_FENCE: - return "scheduling_fence"; - case SHADER_OPCODE_INTERLOCK: - /* For an interlock we actually issue a memory fence via sendc. */ - return "interlock"; - - case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL: - return "byte_scattered_read_logical"; - case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL: - return "byte_scattered_write_logical"; - case SHADER_OPCODE_DWORD_SCATTERED_READ_LOGICAL: - return "dword_scattered_read_logical"; - case SHADER_OPCODE_DWORD_SCATTERED_WRITE_LOGICAL: - return "dword_scattered_write_logical"; - - case SHADER_OPCODE_LOAD_PAYLOAD: - return "load_payload"; - case FS_OPCODE_PACK: - return "pack"; - - case SHADER_OPCODE_SCRATCH_HEADER: - return "scratch_header"; - - case SHADER_OPCODE_URB_WRITE_LOGICAL: - return "urb_write_logical"; - case SHADER_OPCODE_URB_READ_LOGICAL: - return "urb_read_logical"; - - case SHADER_OPCODE_FIND_LIVE_CHANNEL: - return "find_live_channel"; - case SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL: - return "find_last_live_channel"; - case SHADER_OPCODE_LOAD_LIVE_CHANNELS: - return "load_live_channels"; - case FS_OPCODE_LOAD_LIVE_CHANNELS: - return "fs_load_live_channels"; - - case SHADER_OPCODE_BROADCAST: - return "broadcast"; - case SHADER_OPCODE_SHUFFLE: - return "shuffle"; - case SHADER_OPCODE_SEL_EXEC: - return "sel_exec"; - case SHADER_OPCODE_QUAD_SWIZZLE: - return "quad_swizzle"; - case SHADER_OPCODE_CLUSTER_BROADCAST: - return "cluster_broadcast"; - - case SHADER_OPCODE_GET_BUFFER_SIZE: - return "get_buffer_size"; - - case FS_OPCODE_DDX_COARSE: - return "ddx_coarse"; - case FS_OPCODE_DDX_FINE: - return "ddx_fine"; - case FS_OPCODE_DDY_COARSE: - return "ddy_coarse"; - case FS_OPCODE_DDY_FINE: - return "ddy_fine"; - - case FS_OPCODE_PIXEL_X: - return "pixel_x"; - case FS_OPCODE_PIXEL_Y: - return "pixel_y"; - - case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: - return "uniform_pull_const"; - case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL: - return "varying_pull_const_logical"; - - case FS_OPCODE_PACK_HALF_2x16_SPLIT: - return "pack_half_2x16_split"; - - case SHADER_OPCODE_HALT_TARGET: - return "halt_target"; - - case FS_OPCODE_INTERPOLATE_AT_SAMPLE: - return "interp_sample"; - case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: - return "interp_shared_offset"; - case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: - return "interp_per_slot_offset"; - - case SHADER_OPCODE_BARRIER: - return "barrier"; - case SHADER_OPCODE_MULH: - return "mulh"; - case SHADER_OPCODE_ISUB_SAT: - return "isub_sat"; - case SHADER_OPCODE_USUB_SAT: - return "usub_sat"; - case SHADER_OPCODE_MOV_INDIRECT: - return "mov_indirect"; - case SHADER_OPCODE_MOV_RELOC_IMM: - return "mov_reloc_imm"; - - case RT_OPCODE_TRACE_RAY_LOGICAL: - return "rt_trace_ray_logical"; - - case SHADER_OPCODE_RND_MODE: - return "rnd_mode"; - case SHADER_OPCODE_FLOAT_CONTROL_MODE: - return "float_control_mode"; - case SHADER_OPCODE_BTD_SPAWN_LOGICAL: - return "btd_spawn_logical"; - case SHADER_OPCODE_BTD_RETIRE_LOGICAL: - return "btd_retire_logical"; - case SHADER_OPCODE_READ_ARCH_REG: - return "read_arch_reg"; - case SHADER_OPCODE_LOAD_SUBGROUP_INVOCATION: - return "load_subgroup_invocation"; - } - - unreachable("not reached"); -} - - -void -fs_visitor::dump_instruction_to_file(const fs_inst *inst, FILE *file, const brw::def_analysis *defs) const -{ - if (inst->predicate) { - fprintf(file, "(%cf%d.%d) ", - inst->predicate_inverse ? '-' : '+', - inst->flag_subreg / 2, - inst->flag_subreg % 2); - } - - fprintf(file, "%s", brw_instruction_name(&compiler->isa, inst->opcode)); - if (inst->saturate) - fprintf(file, ".sat"); - if (inst->conditional_mod) { - fprintf(file, "%s", conditional_modifier[inst->conditional_mod]); - if (!inst->predicate && - (inst->opcode != BRW_OPCODE_SEL && - inst->opcode != BRW_OPCODE_CSEL && - inst->opcode != BRW_OPCODE_IF && - inst->opcode != BRW_OPCODE_WHILE)) { - fprintf(file, ".f%d.%d", inst->flag_subreg / 2, - inst->flag_subreg % 2); - } - } - fprintf(file, "(%d) ", inst->exec_size); - - if (inst->mlen) { - fprintf(file, "(mlen: %d) ", inst->mlen); - } - - if (inst->ex_mlen) { - fprintf(file, "(ex_mlen: %d) ", inst->ex_mlen); - } - - if (inst->eot) { - fprintf(file, "(EOT) "); - } - - switch (inst->dst.file) { - case VGRF: - if (defs && defs->get(inst->dst)) - fprintf(file, "%%%d", inst->dst.nr); - else - fprintf(file, "v%d", inst->dst.nr); - break; - case FIXED_GRF: - fprintf(file, "g%d", inst->dst.nr); - if (inst->dst.subnr != 0) - fprintf(file, ".%d", inst->dst.subnr / brw_type_size_bytes(inst->dst.type)); - break; - case BAD_FILE: - fprintf(file, "(null)"); - break; - case UNIFORM: - fprintf(file, "***u%d***", inst->dst.nr); - break; - case ATTR: - fprintf(file, "***attr%d***", inst->dst.nr); - break; - case ARF: - switch (inst->dst.nr & 0xF0) { - case BRW_ARF_NULL: - fprintf(file, "null"); - break; - case BRW_ARF_ADDRESS: - fprintf(file, "a0.%d", inst->dst.subnr); - break; - case BRW_ARF_ACCUMULATOR: - if (inst->dst.subnr == 0) - fprintf(file, "acc%d", inst->dst.nr & 0x0F); - else - fprintf(file, "acc%d.%d", inst->dst.nr & 0x0F, inst->dst.subnr); - - break; - case BRW_ARF_FLAG: - fprintf(file, "f%d.%d", inst->dst.nr & 0xf, inst->dst.subnr); - break; - default: - fprintf(file, "arf%d.%d", inst->dst.nr & 0xf, inst->dst.subnr); - break; - } - break; - case IMM: - unreachable("not reached"); - } - - if (inst->dst.offset || - (inst->dst.file == VGRF && - alloc.sizes[inst->dst.nr] * REG_SIZE != inst->size_written)) { - const unsigned reg_size = (inst->dst.file == UNIFORM ? 4 : REG_SIZE); - fprintf(file, "+%d.%d", inst->dst.offset / reg_size, - inst->dst.offset % reg_size); - } - - if (inst->dst.stride != 1) - fprintf(file, "<%u>", inst->dst.stride); - fprintf(file, ":%s", brw_reg_type_to_letters(inst->dst.type)); - - for (int i = 0; i < inst->sources; i++) { - fprintf(file, ", "); - - if (inst->src[i].negate) - fprintf(file, "-"); - if (inst->src[i].abs) - fprintf(file, "|"); - switch (inst->src[i].file) { - case VGRF: - if (defs && defs->get(inst->src[i])) - fprintf(file, "%%%d", inst->src[i].nr); - else - fprintf(file, "v%d", inst->src[i].nr); - break; - case FIXED_GRF: - fprintf(file, "g%d", inst->src[i].nr); - break; - case ATTR: - fprintf(file, "attr%d", inst->src[i].nr); - break; - case UNIFORM: - fprintf(file, "u%d", inst->src[i].nr); - break; - case BAD_FILE: - fprintf(file, "(null)"); - break; - case IMM: - switch (inst->src[i].type) { - case BRW_TYPE_HF: - fprintf(file, "%-ghf", _mesa_half_to_float(inst->src[i].ud & 0xffff)); - break; - case BRW_TYPE_F: - fprintf(file, "%-gf", inst->src[i].f); - break; - case BRW_TYPE_DF: - fprintf(file, "%fdf", inst->src[i].df); - break; - case BRW_TYPE_W: - fprintf(file, "%dw", (int)(int16_t)inst->src[i].d); - break; - case BRW_TYPE_D: - fprintf(file, "%dd", inst->src[i].d); - break; - case BRW_TYPE_UW: - fprintf(file, "%duw", inst->src[i].ud & 0xffff); - break; - case BRW_TYPE_UD: - fprintf(file, "%uu", inst->src[i].ud); - break; - case BRW_TYPE_Q: - fprintf(file, "%" PRId64 "q", inst->src[i].d64); - break; - case BRW_TYPE_UQ: - fprintf(file, "%" PRIu64 "uq", inst->src[i].u64); - break; - case BRW_TYPE_VF: - fprintf(file, "[%-gF, %-gF, %-gF, %-gF]", - brw_vf_to_float((inst->src[i].ud >> 0) & 0xff), - brw_vf_to_float((inst->src[i].ud >> 8) & 0xff), - brw_vf_to_float((inst->src[i].ud >> 16) & 0xff), - brw_vf_to_float((inst->src[i].ud >> 24) & 0xff)); - break; - case BRW_TYPE_V: - case BRW_TYPE_UV: - fprintf(file, "%08x%s", inst->src[i].ud, - inst->src[i].type == BRW_TYPE_V ? "V" : "UV"); - break; - default: - fprintf(file, "???"); - break; - } - break; - case ARF: - switch (inst->src[i].nr & 0xF0) { - case BRW_ARF_NULL: - fprintf(file, "null"); - break; - case BRW_ARF_ADDRESS: - fprintf(file, "a0.%d", inst->src[i].subnr); - break; - case BRW_ARF_ACCUMULATOR: - if (inst->src[i].subnr == 0) - fprintf(file, "acc%d", inst->src[i].nr & 0x0F); - else - fprintf(file, "acc%d.%d", inst->src[i].nr & 0x0F, inst->src[i].subnr); - - break; - case BRW_ARF_FLAG: - fprintf(file, "f%d.%d", inst->src[i].nr & 0xf, inst->src[i].subnr); - break; - default: - fprintf(file, "arf%d.%d", inst->src[i].nr & 0xf, inst->src[i].subnr); - break; - } - break; - } - - if (inst->src[i].file == FIXED_GRF && inst->src[i].subnr != 0) { - assert(inst->src[i].offset == 0); - - fprintf(file, ".%d", inst->src[i].subnr / brw_type_size_bytes(inst->src[i].type)); - } else if (inst->src[i].offset || - (inst->src[i].file == VGRF && - alloc.sizes[inst->src[i].nr] * REG_SIZE != inst->size_read(i))) { - const unsigned reg_size = (inst->src[i].file == UNIFORM ? 4 : REG_SIZE); - fprintf(file, "+%d.%d", inst->src[i].offset / reg_size, - inst->src[i].offset % reg_size); - } - - if (inst->src[i].abs) - fprintf(file, "|"); - - if (inst->src[i].file != IMM) { - unsigned stride; - if (inst->src[i].file == ARF || inst->src[i].file == FIXED_GRF) { - unsigned hstride = inst->src[i].hstride; - stride = (hstride == 0 ? 0 : (1 << (hstride - 1))); - } else { - stride = inst->src[i].stride; - } - if (stride != 1) - fprintf(file, "<%u>", stride); - - fprintf(file, ":%s", brw_reg_type_to_letters(inst->src[i].type)); - } - } - - fprintf(file, " "); - - if (inst->force_writemask_all) - fprintf(file, "NoMask "); - - if (inst->exec_size != dispatch_width) - fprintf(file, "group%d ", inst->group); - - if (inst->has_no_mask_send_params) - fprintf(file, "NoMaskParams "); - - if (inst->sched.pipe != TGL_PIPE_NONE) { - fprintf(file, "{ "); - brw_print_swsb(file, devinfo, inst->sched); - fprintf(file, " } "); - } - - fprintf(file, "\n"); -} - brw::register_pressure::register_pressure(const fs_visitor *v) { const fs_live_variables &live = v->live_analysis.require(); @@ -2604,29 +2057,3 @@ namespace brw { } } -void -brw_print_swsb(FILE *f, const struct intel_device_info *devinfo, const tgl_swsb swsb) -{ - if (swsb.pipe == TGL_PIPE_NONE) - return; - - if (swsb.regdist) { - fprintf(f, "%s@%d", - (devinfo && devinfo->verx10 < 125 ? "" : - swsb.pipe == TGL_PIPE_FLOAT ? "F" : - swsb.pipe == TGL_PIPE_INT ? "I" : - swsb.pipe == TGL_PIPE_LONG ? "L" : - swsb.pipe == TGL_PIPE_ALL ? "A" : - swsb.pipe == TGL_PIPE_MATH ? "M" : "" ), - swsb.regdist); - } - - if (swsb.mode) { - if (swsb.regdist) - fprintf(f, " "); - - fprintf(f, "$%d%s", swsb.sbid, - (swsb.mode & TGL_SBID_SET ? "" : - swsb.mode & TGL_SBID_DST ? ".dst" : ".src")); - } -} diff --git a/src/intel/compiler/brw_print.cpp b/src/intel/compiler/brw_print.cpp new file mode 100644 index 00000000000..5c46fbe10b5 --- /dev/null +++ b/src/intel/compiler/brw_print.cpp @@ -0,0 +1,588 @@ +/* + * Copyright © 2010 Intel Corporation + * SPDX-License-Identifier: MIT + */ + +#include "brw_cfg.h" +#include "brw_fs.h" +#include "brw_private.h" +#include "dev/intel_debug.h" +#include "util/half_float.h" + +using namespace brw; + +void +fs_visitor::dump_instructions_to_file(FILE *file) const +{ + if (cfg && grf_used == 0) { + const brw::def_analysis &defs = def_analysis.require(); + const register_pressure *rp = + INTEL_DEBUG(DEBUG_REG_PRESSURE) ? ®pressure_analysis.require() : NULL; + + unsigned ip = 0, max_pressure = 0; + unsigned cf_count = 0; + foreach_block_and_inst(block, fs_inst, inst, cfg) { + if (inst->is_control_flow_end()) + cf_count -= 1; + + if (rp) { + max_pressure = MAX2(max_pressure, rp->regs_live_at_ip[ip]); + fprintf(file, "{%3d} ", rp->regs_live_at_ip[ip]); + } + + for (unsigned i = 0; i < cf_count; i++) + fprintf(file, " "); + dump_instruction(inst, file, &defs); + ip++; + + if (inst->is_control_flow_begin()) + cf_count += 1; + } + if (rp) + fprintf(file, "Maximum %3d registers live at once.\n", max_pressure); + } else if (cfg && exec_list_is_empty(&instructions)) { + foreach_block_and_inst(block, fs_inst, inst, cfg) { + dump_instruction(inst, file); + } + } else { + foreach_in_list(fs_inst, inst, &instructions) { + dump_instruction(inst, file); + } + } +} + +void +fs_visitor::dump_instructions(const char *name) const +{ + FILE *file = stderr; + if (name && __normal_user()) { + file = fopen(name, "w"); + if (!file) + file = stderr; + } + + dump_instructions_to_file(file); + + if (file != stderr) { + fclose(file); + } +} + +static const char * +brw_instruction_name(const struct brw_isa_info *isa, enum opcode op) +{ + const struct intel_device_info *devinfo = isa->devinfo; + + switch (op) { + case 0 ... NUM_BRW_OPCODES - 1: + /* The DO instruction doesn't exist on Gfx9+, but we use it to mark the + * start of a loop in the IR. + */ + if (op == BRW_OPCODE_DO) + return "do"; + + /* DPAS instructions may transiently exist on platforms that do not + * support DPAS. They will eventually be lowered, but in the meantime it + * must be possible to query the instruction name. + */ + if (devinfo->verx10 < 125 && op == BRW_OPCODE_DPAS) + return "dpas"; + + assert(brw_opcode_desc(isa, op)->name); + return brw_opcode_desc(isa, op)->name; + case FS_OPCODE_FB_WRITE_LOGICAL: + return "fb_write_logical"; + case FS_OPCODE_FB_READ_LOGICAL: + return "fb_read_logical"; + + case SHADER_OPCODE_RCP: + return "rcp"; + case SHADER_OPCODE_RSQ: + return "rsq"; + case SHADER_OPCODE_SQRT: + return "sqrt"; + case SHADER_OPCODE_EXP2: + return "exp2"; + case SHADER_OPCODE_LOG2: + return "log2"; + case SHADER_OPCODE_POW: + return "pow"; + case SHADER_OPCODE_INT_QUOTIENT: + return "int_quot"; + case SHADER_OPCODE_INT_REMAINDER: + return "int_rem"; + case SHADER_OPCODE_SIN: + return "sin"; + case SHADER_OPCODE_COS: + return "cos"; + + case SHADER_OPCODE_SEND: + return "send"; + + case SHADER_OPCODE_UNDEF: + return "undef"; + + case SHADER_OPCODE_TEX_LOGICAL: + return "tex_logical"; + case SHADER_OPCODE_TXD_LOGICAL: + return "txd_logical"; + case SHADER_OPCODE_TXF_LOGICAL: + return "txf_logical"; + case SHADER_OPCODE_TXL_LOGICAL: + return "txl_logical"; + case SHADER_OPCODE_TXS_LOGICAL: + return "txs_logical"; + case FS_OPCODE_TXB_LOGICAL: + return "txb_logical"; + case SHADER_OPCODE_TXF_CMS_W_LOGICAL: + return "txf_cms_w_logical"; + case SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL: + return "txf_cms_w_gfx12_logical"; + case SHADER_OPCODE_TXF_MCS_LOGICAL: + return "txf_mcs_logical"; + case SHADER_OPCODE_LOD_LOGICAL: + return "lod_logical"; + case SHADER_OPCODE_TG4_LOGICAL: + return "tg4_logical"; + case SHADER_OPCODE_TG4_OFFSET_LOGICAL: + return "tg4_offset_logical"; + case SHADER_OPCODE_TG4_OFFSET_LOD_LOGICAL: + return "tg4_offset_lod_logical"; + case SHADER_OPCODE_TG4_OFFSET_BIAS_LOGICAL: + return "tg4_offset_bias_logical"; + case SHADER_OPCODE_TG4_BIAS_LOGICAL: + return "tg4_b_logical"; + case SHADER_OPCODE_TG4_EXPLICIT_LOD_LOGICAL: + return "tg4_l_logical"; + case SHADER_OPCODE_TG4_IMPLICIT_LOD_LOGICAL: + return "tg4_i_logical"; + case SHADER_OPCODE_SAMPLEINFO_LOGICAL: + return "sampleinfo_logical"; + + case SHADER_OPCODE_IMAGE_SIZE_LOGICAL: + return "image_size_logical"; + + case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: + return "untyped_atomic_logical"; + case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: + return "untyped_surface_read_logical"; + case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: + return "untyped_surface_write_logical"; + case SHADER_OPCODE_UNALIGNED_OWORD_BLOCK_READ_LOGICAL: + return "unaligned_oword_block_read_logical"; + case SHADER_OPCODE_OWORD_BLOCK_WRITE_LOGICAL: + return "oword_block_write_logical"; + case SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL: + return "a64_untyped_read_logical"; + case SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL: + return "a64_oword_block_read_logical"; + case SHADER_OPCODE_A64_UNALIGNED_OWORD_BLOCK_READ_LOGICAL: + return "a64_unaligned_oword_block_read_logical"; + case SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL: + return "a64_oword_block_write_logical"; + case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL: + return "a64_untyped_write_logical"; + case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL: + return "a64_byte_scattered_read_logical"; + case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL: + return "a64_byte_scattered_write_logical"; + case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL: + return "a64_untyped_atomic_logical"; + case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: + return "typed_atomic_logical"; + case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: + return "typed_surface_read_logical"; + case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: + return "typed_surface_write_logical"; + case SHADER_OPCODE_MEMORY_FENCE: + return "memory_fence"; + case FS_OPCODE_SCHEDULING_FENCE: + return "scheduling_fence"; + case SHADER_OPCODE_INTERLOCK: + /* For an interlock we actually issue a memory fence via sendc. */ + return "interlock"; + + case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL: + return "byte_scattered_read_logical"; + case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL: + return "byte_scattered_write_logical"; + case SHADER_OPCODE_DWORD_SCATTERED_READ_LOGICAL: + return "dword_scattered_read_logical"; + case SHADER_OPCODE_DWORD_SCATTERED_WRITE_LOGICAL: + return "dword_scattered_write_logical"; + + case SHADER_OPCODE_LOAD_PAYLOAD: + return "load_payload"; + case FS_OPCODE_PACK: + return "pack"; + + case SHADER_OPCODE_SCRATCH_HEADER: + return "scratch_header"; + + case SHADER_OPCODE_URB_WRITE_LOGICAL: + return "urb_write_logical"; + case SHADER_OPCODE_URB_READ_LOGICAL: + return "urb_read_logical"; + + case SHADER_OPCODE_FIND_LIVE_CHANNEL: + return "find_live_channel"; + case SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL: + return "find_last_live_channel"; + case SHADER_OPCODE_LOAD_LIVE_CHANNELS: + return "load_live_channels"; + case FS_OPCODE_LOAD_LIVE_CHANNELS: + return "fs_load_live_channels"; + + case SHADER_OPCODE_BROADCAST: + return "broadcast"; + case SHADER_OPCODE_SHUFFLE: + return "shuffle"; + case SHADER_OPCODE_SEL_EXEC: + return "sel_exec"; + case SHADER_OPCODE_QUAD_SWIZZLE: + return "quad_swizzle"; + case SHADER_OPCODE_CLUSTER_BROADCAST: + return "cluster_broadcast"; + + case SHADER_OPCODE_GET_BUFFER_SIZE: + return "get_buffer_size"; + + case FS_OPCODE_DDX_COARSE: + return "ddx_coarse"; + case FS_OPCODE_DDX_FINE: + return "ddx_fine"; + case FS_OPCODE_DDY_COARSE: + return "ddy_coarse"; + case FS_OPCODE_DDY_FINE: + return "ddy_fine"; + + case FS_OPCODE_PIXEL_X: + return "pixel_x"; + case FS_OPCODE_PIXEL_Y: + return "pixel_y"; + + case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: + return "uniform_pull_const"; + case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL: + return "varying_pull_const_logical"; + + case FS_OPCODE_PACK_HALF_2x16_SPLIT: + return "pack_half_2x16_split"; + + case SHADER_OPCODE_HALT_TARGET: + return "halt_target"; + + case FS_OPCODE_INTERPOLATE_AT_SAMPLE: + return "interp_sample"; + case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: + return "interp_shared_offset"; + case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: + return "interp_per_slot_offset"; + + case SHADER_OPCODE_BARRIER: + return "barrier"; + case SHADER_OPCODE_MULH: + return "mulh"; + case SHADER_OPCODE_ISUB_SAT: + return "isub_sat"; + case SHADER_OPCODE_USUB_SAT: + return "usub_sat"; + case SHADER_OPCODE_MOV_INDIRECT: + return "mov_indirect"; + case SHADER_OPCODE_MOV_RELOC_IMM: + return "mov_reloc_imm"; + + case RT_OPCODE_TRACE_RAY_LOGICAL: + return "rt_trace_ray_logical"; + + case SHADER_OPCODE_RND_MODE: + return "rnd_mode"; + case SHADER_OPCODE_FLOAT_CONTROL_MODE: + return "float_control_mode"; + case SHADER_OPCODE_BTD_SPAWN_LOGICAL: + return "btd_spawn_logical"; + case SHADER_OPCODE_BTD_RETIRE_LOGICAL: + return "btd_retire_logical"; + case SHADER_OPCODE_READ_ARCH_REG: + return "read_arch_reg"; + case SHADER_OPCODE_LOAD_SUBGROUP_INVOCATION: + return "load_subgroup_invocation"; + } + + unreachable("not reached"); +} + + +void +fs_visitor::dump_instruction_to_file(const fs_inst *inst, FILE *file, const brw::def_analysis *defs) const +{ + if (inst->predicate) { + fprintf(file, "(%cf%d.%d) ", + inst->predicate_inverse ? '-' : '+', + inst->flag_subreg / 2, + inst->flag_subreg % 2); + } + + fprintf(file, "%s", brw_instruction_name(&compiler->isa, inst->opcode)); + if (inst->saturate) + fprintf(file, ".sat"); + if (inst->conditional_mod) { + fprintf(file, "%s", conditional_modifier[inst->conditional_mod]); + if (!inst->predicate && + (inst->opcode != BRW_OPCODE_SEL && + inst->opcode != BRW_OPCODE_CSEL && + inst->opcode != BRW_OPCODE_IF && + inst->opcode != BRW_OPCODE_WHILE)) { + fprintf(file, ".f%d.%d", inst->flag_subreg / 2, + inst->flag_subreg % 2); + } + } + fprintf(file, "(%d) ", inst->exec_size); + + if (inst->mlen) { + fprintf(file, "(mlen: %d) ", inst->mlen); + } + + if (inst->ex_mlen) { + fprintf(file, "(ex_mlen: %d) ", inst->ex_mlen); + } + + if (inst->eot) { + fprintf(file, "(EOT) "); + } + + switch (inst->dst.file) { + case VGRF: + if (defs && defs->get(inst->dst)) + fprintf(file, "%%%d", inst->dst.nr); + else + fprintf(file, "v%d", inst->dst.nr); + break; + case FIXED_GRF: + fprintf(file, "g%d", inst->dst.nr); + if (inst->dst.subnr != 0) + fprintf(file, ".%d", inst->dst.subnr / brw_type_size_bytes(inst->dst.type)); + break; + case BAD_FILE: + fprintf(file, "(null)"); + break; + case UNIFORM: + fprintf(file, "***u%d***", inst->dst.nr); + break; + case ATTR: + fprintf(file, "***attr%d***", inst->dst.nr); + break; + case ARF: + switch (inst->dst.nr & 0xF0) { + case BRW_ARF_NULL: + fprintf(file, "null"); + break; + case BRW_ARF_ADDRESS: + fprintf(file, "a0.%d", inst->dst.subnr); + break; + case BRW_ARF_ACCUMULATOR: + if (inst->dst.subnr == 0) + fprintf(file, "acc%d", inst->dst.nr & 0x0F); + else + fprintf(file, "acc%d.%d", inst->dst.nr & 0x0F, inst->dst.subnr); + + break; + case BRW_ARF_FLAG: + fprintf(file, "f%d.%d", inst->dst.nr & 0xf, inst->dst.subnr); + break; + default: + fprintf(file, "arf%d.%d", inst->dst.nr & 0xf, inst->dst.subnr); + break; + } + break; + case IMM: + unreachable("not reached"); + } + + if (inst->dst.offset || + (inst->dst.file == VGRF && + alloc.sizes[inst->dst.nr] * REG_SIZE != inst->size_written)) { + const unsigned reg_size = (inst->dst.file == UNIFORM ? 4 : REG_SIZE); + fprintf(file, "+%d.%d", inst->dst.offset / reg_size, + inst->dst.offset % reg_size); + } + + if (inst->dst.stride != 1) + fprintf(file, "<%u>", inst->dst.stride); + fprintf(file, ":%s", brw_reg_type_to_letters(inst->dst.type)); + + for (int i = 0; i < inst->sources; i++) { + fprintf(file, ", "); + + if (inst->src[i].negate) + fprintf(file, "-"); + if (inst->src[i].abs) + fprintf(file, "|"); + switch (inst->src[i].file) { + case VGRF: + if (defs && defs->get(inst->src[i])) + fprintf(file, "%%%d", inst->src[i].nr); + else + fprintf(file, "v%d", inst->src[i].nr); + break; + case FIXED_GRF: + fprintf(file, "g%d", inst->src[i].nr); + break; + case ATTR: + fprintf(file, "attr%d", inst->src[i].nr); + break; + case UNIFORM: + fprintf(file, "u%d", inst->src[i].nr); + break; + case BAD_FILE: + fprintf(file, "(null)"); + break; + case IMM: + switch (inst->src[i].type) { + case BRW_TYPE_HF: + fprintf(file, "%-ghf", _mesa_half_to_float(inst->src[i].ud & 0xffff)); + break; + case BRW_TYPE_F: + fprintf(file, "%-gf", inst->src[i].f); + break; + case BRW_TYPE_DF: + fprintf(file, "%fdf", inst->src[i].df); + break; + case BRW_TYPE_W: + fprintf(file, "%dw", (int)(int16_t)inst->src[i].d); + break; + case BRW_TYPE_D: + fprintf(file, "%dd", inst->src[i].d); + break; + case BRW_TYPE_UW: + fprintf(file, "%duw", inst->src[i].ud & 0xffff); + break; + case BRW_TYPE_UD: + fprintf(file, "%uu", inst->src[i].ud); + break; + case BRW_TYPE_Q: + fprintf(file, "%" PRId64 "q", inst->src[i].d64); + break; + case BRW_TYPE_UQ: + fprintf(file, "%" PRIu64 "uq", inst->src[i].u64); + break; + case BRW_TYPE_VF: + fprintf(file, "[%-gF, %-gF, %-gF, %-gF]", + brw_vf_to_float((inst->src[i].ud >> 0) & 0xff), + brw_vf_to_float((inst->src[i].ud >> 8) & 0xff), + brw_vf_to_float((inst->src[i].ud >> 16) & 0xff), + brw_vf_to_float((inst->src[i].ud >> 24) & 0xff)); + break; + case BRW_TYPE_V: + case BRW_TYPE_UV: + fprintf(file, "%08x%s", inst->src[i].ud, + inst->src[i].type == BRW_TYPE_V ? "V" : "UV"); + break; + default: + fprintf(file, "???"); + break; + } + break; + case ARF: + switch (inst->src[i].nr & 0xF0) { + case BRW_ARF_NULL: + fprintf(file, "null"); + break; + case BRW_ARF_ADDRESS: + fprintf(file, "a0.%d", inst->src[i].subnr); + break; + case BRW_ARF_ACCUMULATOR: + if (inst->src[i].subnr == 0) + fprintf(file, "acc%d", inst->src[i].nr & 0x0F); + else + fprintf(file, "acc%d.%d", inst->src[i].nr & 0x0F, inst->src[i].subnr); + + break; + case BRW_ARF_FLAG: + fprintf(file, "f%d.%d", inst->src[i].nr & 0xf, inst->src[i].subnr); + break; + default: + fprintf(file, "arf%d.%d", inst->src[i].nr & 0xf, inst->src[i].subnr); + break; + } + break; + } + + if (inst->src[i].file == FIXED_GRF && inst->src[i].subnr != 0) { + assert(inst->src[i].offset == 0); + + fprintf(file, ".%d", inst->src[i].subnr / brw_type_size_bytes(inst->src[i].type)); + } else if (inst->src[i].offset || + (inst->src[i].file == VGRF && + alloc.sizes[inst->src[i].nr] * REG_SIZE != inst->size_read(i))) { + const unsigned reg_size = (inst->src[i].file == UNIFORM ? 4 : REG_SIZE); + fprintf(file, "+%d.%d", inst->src[i].offset / reg_size, + inst->src[i].offset % reg_size); + } + + if (inst->src[i].abs) + fprintf(file, "|"); + + if (inst->src[i].file != IMM) { + unsigned stride; + if (inst->src[i].file == ARF || inst->src[i].file == FIXED_GRF) { + unsigned hstride = inst->src[i].hstride; + stride = (hstride == 0 ? 0 : (1 << (hstride - 1))); + } else { + stride = inst->src[i].stride; + } + if (stride != 1) + fprintf(file, "<%u>", stride); + + fprintf(file, ":%s", brw_reg_type_to_letters(inst->src[i].type)); + } + } + + fprintf(file, " "); + + if (inst->force_writemask_all) + fprintf(file, "NoMask "); + + if (inst->exec_size != dispatch_width) + fprintf(file, "group%d ", inst->group); + + if (inst->has_no_mask_send_params) + fprintf(file, "NoMaskParams "); + + if (inst->sched.pipe != TGL_PIPE_NONE) { + fprintf(file, "{ "); + brw_print_swsb(file, devinfo, inst->sched); + fprintf(file, " } "); + } + + fprintf(file, "\n"); +} + + +void +brw_print_swsb(FILE *f, const struct intel_device_info *devinfo, const tgl_swsb swsb) +{ + if (swsb.pipe == TGL_PIPE_NONE) + return; + + if (swsb.regdist) { + fprintf(f, "%s@%d", + (devinfo && devinfo->verx10 < 125 ? "" : + swsb.pipe == TGL_PIPE_FLOAT ? "F" : + swsb.pipe == TGL_PIPE_INT ? "I" : + swsb.pipe == TGL_PIPE_LONG ? "L" : + swsb.pipe == TGL_PIPE_ALL ? "A" : + swsb.pipe == TGL_PIPE_MATH ? "M" : "" ), + swsb.regdist); + } + + if (swsb.mode) { + if (swsb.regdist) + fprintf(f, " "); + + fprintf(f, "$%d%s", swsb.sbid, + (swsb.mode & TGL_SBID_SET ? "" : + swsb.mode & TGL_SBID_DST ? ".dst" : ".src")); + } +} + diff --git a/src/intel/compiler/meson.build b/src/intel/compiler/meson.build index 12a8a8de201..77403ccf55e 100644 --- a/src/intel/compiler/meson.build +++ b/src/intel/compiler/meson.build @@ -101,6 +101,7 @@ libintel_compiler_brw_files = files( 'brw_nir_rt_builder.h', 'brw_packed_float.c', 'brw_predicated_break.cpp', + 'brw_print.cpp', 'brw_prim.h', 'brw_private.h', 'brw_reg.h',