amd: fix typos

Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22432>
This commit is contained in:
Harri Nieminen
2023-04-12 07:10:19 +03:00
committed by Marge Bot
parent 1aab6820fb
commit aea48a4ff1
35 changed files with 60 additions and 60 deletions

View File

@@ -309,7 +309,7 @@ void ac_get_hs_info(struct radeon_info *info,
* store the task payload which is passed to mesh shaders.
*
* The driver only needs to create this BO once,
* and it will always be able to accomodate the maximum needed
* and it will always be able to accommodate the maximum needed
* task payload size.
*
* The following memory layout is used:

View File

@@ -241,7 +241,7 @@ cull_small_primitive_line(nir_builder *b, nir_ssa_def *pos[3][4],
* it doesn't exit it. If a line is entirely inside a corner diamond, it can be culled
* because it doesn't enter any diamond and thus can't exit any diamond.
*
* The viewport is rotated by 45 degress to turn diamonds into squares, and a bounding
* The viewport is rotated by 45 degrees to turn diamonds into squares, and a bounding
* box test is used to determine whether a line is entirely inside any square (diamond).
*
* The line width doesn't matter. Wide lines only duplicate filled pixels in either X or
@@ -264,7 +264,7 @@ cull_small_primitive_line(nir_builder *b, nir_ssa_def *pos[3][4],
v1[chan] = nir_ffma(b, pos[1][chan], vp_scale, vp_translate);
}
/* Rotate the viewport by 45 degress, so that diamonds become squares. */
/* Rotate the viewport by 45 degrees, so that diamonds become squares. */
rotate_45degrees(b, v0);
rotate_45degrees(b, v1);

View File

@@ -62,7 +62,7 @@ emit_split_buffer_load(nir_builder *b, nir_ssa_def *desc, nir_ssa_def *v_off, ni
unsigned full_dwords = total_bytes / 4u;
unsigned remaining_bytes = total_bytes - full_dwords * 4u;
/* Accomodate max number of split 64-bit loads */
/* Accommodate max number of split 64-bit loads */
nir_ssa_def *comps[NIR_MAX_VEC_COMPONENTS * 2u];
/* Assume that 1x32-bit load is better than 1x16-bit + 1x8-bit */

View File

@@ -1494,7 +1494,7 @@ add_deferred_attribute_culling(nir_builder *b, nir_cf_list *original_extracted_c
/* Run culling algorithms if culling is enabled.
*
* NGG culling can be enabled or disabled in runtime.
* This is determined by a SGPR shader argument which is acccessed
* This is determined by a SGPR shader argument which is accessed
* by the following NIR intrinsic.
*/
@@ -3233,7 +3233,7 @@ ngg_gs_build_streamout(nir_builder *b, lower_ngg_gs_state *s)
/* We want to export primitives to streamout buffer in sequence,
* but not all vertices are alive or mark end of a primitive, so
* there're "holes". We don't need continous invocations to write
* there're "holes". We don't need continuous invocations to write
* primitives to streamout buffer like final vertex export, so
* just repack to get the sequence (export_seq) is enough, no need
* to do compaction.
@@ -4018,7 +4018,7 @@ ms_emit_arrayed_outputs(nir_builder *b,
nir_ssa_def *zero = nir_imm_int(b, 0);
u_foreach_bit64(slot, mask) {
/* Should not occour here, handled separately. */
/* Should not occur here, handled separately. */
assert(slot != VARYING_SLOT_PRIMITIVE_COUNT && slot != VARYING_SLOT_PRIMITIVE_INDICES);
unsigned component_mask = s->output_info[slot].components_mask;

View File

@@ -151,8 +151,8 @@ emit_ps_mrtz_export(nir_builder *b, lower_ps_state *s)
uint64_t outputs_written = b->shader->info.outputs_written;
/* use outputs_written to determine export format as we use it to set
* R_028710_SPI_SHADER_Z_FORMAT instead of relying on the real store ouput,
* because store ouput may be optimized out.
* R_028710_SPI_SHADER_Z_FORMAT instead of relying on the real store output,
* because store output may be optimized out.
*/
unsigned format =
ac_get_spi_shader_z_format(outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH),
@@ -352,7 +352,7 @@ emit_ps_color_export(nir_builder *b, lower_ps_state *s, gl_frag_result slot, uns
pack_op = nir_op_pack_snorm_2x16;
break;
default:
unreachable("unsupport color export format");
unreachable("unsupported color export format");
break;
}

View File

@@ -192,7 +192,7 @@ static void ac_sqtt_fill_cpu_info(struct sqtt_file_chunk_cpu_info *chunk)
if (os_get_total_physical_memory(&system_ram_size))
chunk->system_ram_size = system_ram_size / (1024 * 1024);
/* Parse cpuinfo to get more detailled information. */
/* Parse cpuinfo to get more detailed information. */
f = fopen("/proc/cpuinfo", "r");
if (!f)
return;

View File

@@ -364,7 +364,7 @@ unsigned ac_get_tbuffer_format(enum amd_gfx_level gfx_level, unsigned dfmt, unsi
// Use the regularity properties of the combined format enum.
//
// Note: float is incompatible with 8-bit data formats,
// [us]{norm,scaled} are incomparible with 32-bit data formats.
// [us]{norm,scaled} are incompatible with 32-bit data formats.
// [us]scaled are not writable.
switch (nfmt) {
case V_008F0C_BUF_NUM_FORMAT_UNORM:

View File

@@ -4073,7 +4073,7 @@ void ac_check_shadowed_regs(enum amd_gfx_level gfx_level, enum radeon_family fam
unsigned end_reg_offset = reg_offset + count * 4;
unsigned end_range_offset = ranges[i].offset + ranges[i].size;
/* Test if the ranges interect. */
/* Test if the ranges intersect. */
if (MAX2(ranges[i].offset, reg_offset) < MIN2(end_range_offset, end_reg_offset)) {
/* Assertion: A register can be listed only once. */
assert(!found);

View File

@@ -793,7 +793,7 @@ static int gfx6_compute_level(ADDR_HANDLE addrlib, const struct ac_surf_config *
if (ret == ADDR_OK) {
/* If the DCC memory isn't properly
* aligned, the data are interleaved
* accross slices.
* across slices.
*/
if (AddrDccOut->dccRamSizeAligned)
dcc_level->dcc_slice_fast_clear_size = AddrDccOut->dccFastClearSize;

View File

@@ -327,7 +327,7 @@ Waiting for the VMEM/DS instruction to finish, a VALU or export instruction, or
### VALUTransUseHazard
Triggered by:
A VALU instrction reading a VGPR written by a transcendental VALU instruction without 6+ VALU or 2+
A VALU instruction reading a VGPR written by a transcendental VALU instruction without 6+ VALU or 2+
transcendental instructions in-between.
Mitigated by:

View File

@@ -105,7 +105,7 @@ This means that we need to insert `s_waitcnt` instructions (and its variants) so
#### Resolve hazards and insert NOPs
Some instructions require wait states or other instructions to resolve hazards which are not handled by the hardware.
This pass makes sure that no known hazards occour.
This pass makes sure that no known hazards occur.
#### Emit program - Assembler
@@ -118,7 +118,7 @@ Which software stage gets executed on which hardware stage depends on what kind
An important difference is that VS is always the first stage to run in SW models,
whereas HW VS refers to the last HW stage before fragment shading in GCN/RDNA terminology.
That's why, among other things, the HW VS is no longer used to execute the SW VS when tesselation or geometry shading are used.
That's why, among other things, the HW VS is no longer used to execute the SW VS when tessellation or geometry shading are used.
#### Glossary of software stages

View File

@@ -786,7 +786,7 @@ get_alu_src_vop3p(struct isel_context* ctx, nir_alu_src src)
/* extract a full dword if possible */
if (tmp.bytes() >= (dword + 1) * 4) {
/* if the source is splitted into components, use p_create_vector */
/* if the source is split into components, use p_create_vector */
auto it = ctx->allocated_vec.find(tmp.id());
if (it != ctx->allocated_vec.end()) {
unsigned index = dword << 1;
@@ -5549,7 +5549,7 @@ mtbuf_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigne
ac_get_safe_fetch_size(bld.program->gfx_level, vtx_info, const_offset, max_components,
alignment, max_fetched_components);
const unsigned fetch_fmt = vtx_info->hw_format[max_fetched_components - 1];
/* Adjust bytes needed in case we need to do a smaller load due to aligment.
/* Adjust bytes needed in case we need to do a smaller load due to alignment.
* If a larger format is selected, it's still OK to load a smaller amount from it.
*/
bytes_needed = MIN2(bytes_needed, max_fetched_components * info.component_size);

View File

@@ -2037,11 +2037,11 @@ static constexpr Stage vertex_geometry_gs(HWStage::GS, SWStage::VS_GS);
static constexpr Stage vertex_tess_control_hs(HWStage::HS, SWStage::VS_TCS);
static constexpr Stage tess_eval_geometry_gs(HWStage::GS, SWStage::TES_GS);
/* pre-GFX9 */
static constexpr Stage vertex_ls(HWStage::LS, SWStage::VS); /* vertex before tesselation control */
static constexpr Stage vertex_ls(HWStage::LS, SWStage::VS); /* vertex before tessellation control */
static constexpr Stage vertex_es(HWStage::ES, SWStage::VS); /* vertex before geometry */
static constexpr Stage tess_control_hs(HWStage::HS, SWStage::TCS);
static constexpr Stage tess_eval_es(HWStage::ES,
SWStage::TES); /* tesselation evaluation before geometry */
SWStage::TES); /* tessellation evaluation before geometry */
static constexpr Stage geometry_gs(HWStage::GS, SWStage::GS);
/* Raytracing */
static constexpr Stage raytracing_cs(HWStage::CS, SWStage::RT);

View File

@@ -71,7 +71,7 @@ aco_opcode
get_reduce_opcode(amd_gfx_level gfx_level, ReduceOp op)
{
/* Because some 16-bit instructions are already VOP3 on GFX10, we use the
* 32-bit opcodes (VOP2) which allows to remove the tempory VGPR and to use
* 32-bit opcodes (VOP2) which allows to remove the temporary VGPR and to use
* DPP with the arithmetic instructions. This requires to sign-extend.
*/
switch (op) {
@@ -718,7 +718,7 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c
for (unsigned i = 0; i < src.size(); i++) {
if (!identity[i].isConstant() ||
identity[i].constantValue()) { /* bound_ctrl should take care of this overwise */
identity[i].constantValue()) { /* bound_ctrl should take care of this otherwise */
if (ctx->program->gfx_level < GFX10)
assert((identity[i].isConstant() && !identity[i].isLiteral()) ||
identity[i].physReg() == PhysReg{sitmp + i});

View File

@@ -205,7 +205,7 @@ class Opcode(object):
- name is the name of the opcode (prepend nir_op_ for the enum name)
- all types are strings that get nir_type_ prepended to them
- input_types is a list of types
- algebraic_properties is a space-seperated string, where nir_op_is_ is
- algebraic_properties is a space-separated string, where nir_op_is_ is
prepended before each entry
- const_expr is an expression or series of statements that computes the
constant value of the opcode given the constant values of its inputs.

View File

@@ -1135,7 +1135,7 @@ apply_extract(opt_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, ssa_info&
sel.offset() == 0 &&
((sel.size() == 2 && instr->operands[0].constantValue() >= 16u) ||
(sel.size() == 1 && instr->operands[0].constantValue() >= 24u))) {
/* The undesireable upper bits are already shifted out. */
/* The undesirable upper bits are already shifted out. */
return;
} else if (instr->opcode == aco_opcode::v_mul_u32_u24 && ctx.program->gfx_level >= GFX10 &&
!instr->usesModifiers() && sel.size() == 2 && !sel.sign_extend() &&
@@ -3567,7 +3567,7 @@ apply_ds_extract(opt_ctx& ctx, aco_ptr<Instruction>& extract)
unsigned sign_ext = extract->operands[3].constantValue();
unsigned dst_bitsize = extract->definitions[0].bytes() * 8u;
/* TODO: These are doable, but probably don't occour too often. */
/* TODO: These are doable, but probably don't occur too often. */
if (extract_idx || sign_ext || dst_bitsize != 32)
return false;

View File

@@ -694,7 +694,7 @@ schedule_SMEM(sched_ctx& ctx, Block* block, std::vector<RegisterDemand>& registe
break;
/* don't use LDS/GDS instructions to hide latency since it can
* significanly worsen LDS scheduling */
* significantly worsen LDS scheduling */
if (candidate->isDS() || !can_move_down) {
add_to_hazard_query(&hq, candidate.get());
ctx.mv.downwards_skip(cursor);

View File

@@ -881,7 +881,7 @@ add_coupling_code(spill_ctx& ctx, Block* block, unsigned block_idx)
Temp var = phi->operands[i].getTemp();
std::map<Temp, Temp>::iterator rename_it = ctx.renames[pred_idx].find(var);
/* prevent the definining instruction from being DCE'd if it could be rematerialized */
/* prevent the defining instruction from being DCE'd if it could be rematerialized */
if (rename_it == ctx.renames[preds[i]].end() && ctx.remat.count(var))
ctx.unused_remats.erase(ctx.remat[var].instr);
@@ -1001,7 +1001,7 @@ add_coupling_code(spill_ctx& ctx, Block* block, unsigned block_idx)
ctx.renames[pred_idx].find(phi->operands[i].getTemp());
if (it != ctx.renames[pred_idx].end()) {
phi->operands[i].setTemp(it->second);
/* prevent the definining instruction from being DCE'd if it could be rematerialized */
/* prevent the defining instruction from being DCE'd if it could be rematerialized */
} else {
auto remat_it = ctx.remat.find(phi->operands[i].getTemp());
if (remat_it != ctx.remat.end()) {
@@ -1117,7 +1117,7 @@ add_coupling_code(spill_ctx& ctx, Block* block, unsigned block_idx)
tmp = rename;
} else {
tmp = pair.first;
/* prevent the definining instruction from being DCE'd if it could be rematerialized */
/* prevent the defining instruction from being DCE'd if it could be rematerialized */
if (ctx.remat.count(tmp))
ctx.unused_remats.erase(ctx.remat[tmp].instr);
}
@@ -1162,7 +1162,7 @@ process_block(spill_ctx& ctx, unsigned block_idx, Block* block, RegisterDemand s
std::vector<aco_ptr<Instruction>> instructions;
unsigned idx = 0;
/* phis are handled separetely */
/* phis are handled separately */
while (block->instructions[idx]->opcode == aco_opcode::p_phi ||
block->instructions[idx]->opcode == aco_opcode::p_linear_phi) {
instructions.emplace_back(std::move(block->instructions[idx++]));
@@ -1191,7 +1191,7 @@ process_block(spill_ctx& ctx, unsigned block_idx, Block* block, RegisterDemand s
if (rename_it != ctx.renames[block_idx].end()) {
op.setTemp(rename_it->second);
} else {
/* prevent its definining instruction from being DCE'd if it could be rematerialized */
/* prevent its defining instruction from being DCE'd if it could be rematerialized */
auto remat_it = ctx.remat.find(op.getTemp());
if (remat_it != ctx.remat.end()) {
ctx.unused_remats.erase(remat_it->second.instr);

View File

@@ -459,7 +459,7 @@ public:
free(buffer);
}
/* Delete copy-constructor and -assigment to avoid double free() */
/* Delete copy-constructor and -assignment to avoid double free() */
monotonic_buffer_resource(const monotonic_buffer_resource&) = delete;
monotonic_buffer_resource& operator=(const monotonic_buffer_resource&) = delete;

View File

@@ -6,5 +6,5 @@ The submit ioctl is stubbed out to not execute anything.
Export `MESA_LOADER_DRIVER_OVERRIDE=r300
LD_PRELOAD=$prefix/lib/libradeon_noop_drm_shim.so`. (or r600 for r600-class HW)
By default, rv515 is exposed. The chip can be selected an enviornment
By default, rv515 is exposed. The chip can be selected an environment
variable like `RADEON_GPU_ID=CAYMAN` or `RADEON_GPU_ID=0x6740`.

View File

@@ -872,7 +872,7 @@ void ac_prepare_cube_coords(struct ac_llvm_context *ctx, bool is_deriv, bool is_
*
* where d is the depth of the texture array and layer
* comes from the component indicated in the tables below.
* Workaroudn for an issue where the layer is taken from a
* Workaround for an issue where the layer is taken from a
* helper invocation which happens to fall on a different
* layer due to extrapolation."
*
@@ -1972,7 +1972,7 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx, struct ac_image_
if (atomic) {
data_type = LLVMTypeOf(a->data[0]);
} else if (a->opcode == ac_image_store || a->opcode == ac_image_store_mip) {
/* Image stores might have been shrinked using the format. */
/* Image stores might have been shrunk using the format. */
data_type = LLVMTypeOf(a->data[0]);
dmask = (1 << ac_get_llvm_num_components(a->data[0])) - 1;
} else {

View File

@@ -450,7 +450,7 @@ struct waterfall_context {
* to implement the body.
*
* params:
* - ctx is the usal nir context
* - ctx is the usual nir context
* - wctx is a temporary struct containing some loop info. Can be left uninitialized.
* - value is the possibly divergent value for which we built the loop
* - divergent is whether value is actually divergent. If false we just pass

View File

@@ -218,7 +218,7 @@ combined_node_cost(uint32_t lds_base, uint32_t i, uint32_t j)
* tree depth for internal nodes)
*
* Dividing area by both relative costs will make it more likely that we merge nodes with
* a hight child cost.
* a high child cost.
*/
float p_i = aabb_surface_area(shared_bounds[i - lds_base]) / area;
float p_j = aabb_surface_area(shared_bounds[j - lds_base]) / area;

View File

@@ -356,7 +356,7 @@ lower_load_vs_input(nir_builder *b, nir_intrinsic_instr *intrin, lower_vs_inputs
nir_ssa_def *load = loads[0];
/* Extract the channels we actually need when we couldn't skip starting
* components or had to emit more than one load instrinsic.
* components or had to emit more than one load intrinsic.
*/
if (num_loads > 0 && (first_used_channel > skipped_start || num_loads != 1))
load = nir_extract_bits(b, loads, num_loads, (first_used_channel - skipped_start) * bit_size,

View File

@@ -207,10 +207,10 @@ radix_sort_vk_destroy(radix_sort_vk_t * rs, //
// keyvals_alignment : Alignment of each keyval buffer
//
// internal_size : Minimum size of internal buffer
// internal_aligment : Alignment of the internal buffer
// internal_alignment : Alignment of the internal buffer
//
// indirect_size : Minimum size of indirect buffer
// indirect_aligment : Alignment of the indirect buffer
// indirect_alignment : Alignment of the indirect buffer
//
// .keyvals_even/odd
// -----------------

View File

@@ -174,7 +174,7 @@ radv_CreateDescriptorSetLayout(VkDevice _device, const VkDescriptorSetLayoutCrea
size += ycbcr_sampler_count * sizeof(struct vk_ycbcr_conversion_state);
}
/* We need to allocate decriptor set layouts off the device allocator with DEVICE scope because
/* We need to allocate descriptor set layouts off the device allocator with DEVICE scope because
* they are reference counted and may not be destroyed when vkDestroyDescriptorSetLayout is
* called.
*/

View File

@@ -70,7 +70,7 @@ class radv_llvm_per_thread_info {
struct ac_compiler_passes *passes;
};
/* we have to store a linked list per thread due to the possiblity of multiple gpus being required */
/* we have to store a linked list per thread due to the possibility of multiple gpus being required */
static thread_local std::list<radv_llvm_per_thread_info> radv_llvm_per_thread_list;
bool

View File

@@ -2315,7 +2315,7 @@ radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm
if (device->instance->perftest_flags & RADV_PERFTEST_CS_WAVE_32)
device->cs_wave_size = 32;
/* For pixel shaders, wave64 is recommanded. */
/* For pixel shaders, wave64 is recommended. */
if (device->instance->perftest_flags & RADV_PERFTEST_PS_WAVE_32)
device->ps_wave_size = 32;
@@ -2641,7 +2641,7 @@ radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice,
device->ws->query_value(device->ws, RADEON_GTT_USAGE);
uint64_t total_usage = MAX2(total_internal_usage, total_system_usage);
/* Compute the total free space that can be allocated for this process accross all heaps. */
/* Compute the total free space that can be allocated for this process across all heaps. */
uint64_t total_free_space = total_heap_size - MIN2(total_heap_size, total_usage);
memoryBudget->heapBudget[vram_vis_heap_idx] = total_free_space + total_internal_usage;
@@ -2673,7 +2673,7 @@ radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice,
uint64_t total_usage = MAX2(total_internal_usage, total_system_usage);
/* Compute the total free space that can be allocated for this process accross all heaps. */
/* Compute the total free space that can be allocated for this process across all heaps. */
uint64_t total_free_space = total_heap_size - MIN2(total_heap_size, total_usage);
/* Compute the remaining visible VRAM size for this process. */

View File

@@ -4106,7 +4106,7 @@ radv_graphics_pipeline_init(struct radv_graphics_pipeline *pipeline, struct radv
if (enable_mrt_compaction) {
blend.spi_shader_col_format = radv_compact_spi_shader_col_format(ps, &blend);
/* In presense of MRT holes (ie. the FS exports MRT1 but not MRT0), the compiler will remap
/* In presence of MRT holes (ie. the FS exports MRT1 but not MRT0), the compiler will remap
* them, so that only MRT0 is exported and the driver will compact SPI_SHADER_COL_FORMAT to
* match what the FS actually exports. Though, to make sure the hw remapping works as
* expected, we should also clear color attachments without exports in CB_SHADER_MASK.

View File

@@ -1789,7 +1789,7 @@ struct radv_cmd_buffer {
} ace_internal;
/**
* Whether a query pool has been resetted and we have to flush caches.
* Whether a query pool has been reset and we have to flush caches.
*/
bool pending_reset_query;

View File

@@ -1466,7 +1466,7 @@ emit_query_flush(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *poo
if (cmd_buffer->pending_reset_query) {
if (pool->size >= RADV_BUFFER_OPS_CS_THRESHOLD) {
/* Only need to flush caches if the query pool size is
* large enough to be resetted using the compute shader
* large enough to be reset using the compute shader
* path. Small pools don't need any cache flushes
* because we use a CP dma clear.
*/

View File

@@ -1242,7 +1242,7 @@ radv_rra_dump_trace(VkQueue vk_queue, char *filename)
rra_dump_chunk_description(accel_struct_offsets[i],
sizeof(struct rra_accel_struct_chunk_header), accel_struct_size,
"RawAccelStruc", RADV_RRA_CHUNK_ID_ACCEL_STRUCT, file);
"RawAccelStruct", RADV_RRA_CHUNK_ID_ACCEL_STRUCT, file);
}
uint64_t file_end = (uint64_t)ftell(file);

View File

@@ -108,7 +108,7 @@ intersect_ray_amd_software_box(struct radv_device *device, nir_builder *b, nir_s
nir_store_var(b, child_indices,
nir_imm_ivec4(b, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu), 0xf);
/* Need to remove infinities here because otherwise we get nasty NaN propogation
/* Need to remove infinities here because otherwise we get nasty NaN propagation
* if the direction has 0s in it. */
/* inv_dir = clamp(inv_dir, -FLT_MAX, FLT_MAX); */
inv_dir = nir_fclamp(b, inv_dir, nir_imm_float(b, -FLT_MAX), nir_imm_float(b, FLT_MAX));
@@ -238,7 +238,7 @@ intersect_ray_amd_software_tri(struct radv_device *device, nir_builder *b, nir_s
nir_ssa_def *k_indices[3] = {kx, ky, kz};
nir_ssa_def *k = nir_vec(b, k_indices, 3);
/* Swap kx and ky dimensions to preseve winding order */
/* Swap kx and ky dimensions to preserve winding order */
unsigned swap_xy_swizzle[4] = {1, 0, 2, 3};
k = nir_bcsel(b, nir_flt(b, nir_vector_extract(b, dir, kz), nir_imm_float(b, 0.0f)),
nir_swizzle(b, k, swap_xy_swizzle, 3), k);

View File

@@ -2080,7 +2080,7 @@ radv_aco_build_shader_binary(void **bin, const struct ac_shader_config *config,
size += code_dw * sizeof(uint32_t) + sizeof(struct radv_shader_binary_legacy);
/* We need to calloc to prevent unintialized data because this will be used
/* We need to calloc to prevent uninitialized data because this will be used
* directly for the disk cache. Uninitialized data can appear because of
* padding in the struct or because legacy_binary->data can be at an offset
* from the start less than sizeof(radv_shader_binary_legacy). */

View File

@@ -307,7 +307,7 @@ radv_copy_thread_trace_info_regs(struct radv_device *device, struct radeon_cmdbu
if (pdevice->rad_info.gfx_level >= GFX11) {
/* On GFX11, SQ_THREAD_TRACE_WPTR is incremented from the "initial WPTR address" instead of 0.
* To get the number of bytes (in units of 32 bytes) written by SQTT, the workaround is to
* substract SQ_THREAD_TRACE_WPTR from the "initial WPTR address" as follow:
* subtract SQ_THREAD_TRACE_WPTR from the "initial WPTR address" as follow:
*
* 1) get the current buffer base address for this SE
* 2) shift right by 5 bits because SQ_THREAD_TRACE_WPTR is 32-byte aligned