radv: Add minimal subgroup support.
Deliberately not implementing workgroup scopes as that is not needed for core vulkan. Reviewed-by: Dave Airlie <airlied@redhat.com>
This commit is contained in:
@@ -3883,6 +3883,46 @@ visit_load_local_invocation_index(struct ac_nir_context *ctx)
|
|||||||
return LLVMBuildAdd(ctx->ac.builder, result, thread_id, "");
|
return LLVMBuildAdd(ctx->ac.builder, result, thread_id, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static LLVMValueRef
|
||||||
|
visit_load_subgroup_id(struct ac_nir_context *ctx)
|
||||||
|
{
|
||||||
|
if (ctx->stage == MESA_SHADER_COMPUTE) {
|
||||||
|
LLVMValueRef result;
|
||||||
|
result = LLVMBuildAnd(ctx->ac.builder, ctx->abi->tg_size,
|
||||||
|
LLVMConstInt(ctx->ac.i32, 0xfc0, false), "");
|
||||||
|
return LLVMBuildLShr(ctx->ac.builder, result, LLVMConstInt(ctx->ac.i32, 6, false), "");
|
||||||
|
} else {
|
||||||
|
return LLVMConstInt(ctx->ac.i32, 0, false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static LLVMValueRef
|
||||||
|
visit_load_num_subgroups(struct ac_nir_context *ctx)
|
||||||
|
{
|
||||||
|
if (ctx->stage == MESA_SHADER_COMPUTE) {
|
||||||
|
return LLVMBuildAnd(ctx->ac.builder, ctx->abi->tg_size,
|
||||||
|
LLVMConstInt(ctx->ac.i32, 0x3f, false), "");
|
||||||
|
} else {
|
||||||
|
return LLVMConstInt(ctx->ac.i32, 1, false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static LLVMValueRef
|
||||||
|
visit_first_invocation(struct ac_nir_context *ctx)
|
||||||
|
{
|
||||||
|
LLVMValueRef active_set = ac_build_ballot(&ctx->ac, ctx->ac.i32_1);
|
||||||
|
|
||||||
|
/* The second argument is whether cttz(0) should be defined, but we do not care. */
|
||||||
|
LLVMValueRef args[] = {active_set, LLVMConstInt(ctx->ac.i1, 0, false)};
|
||||||
|
LLVMValueRef result = ac_build_intrinsic(&ctx->ac,
|
||||||
|
"llvm.cttz.i64",
|
||||||
|
ctx->ac.i64, args, 2,
|
||||||
|
AC_FUNC_ATTR_NOUNWIND |
|
||||||
|
AC_FUNC_ATTR_READNONE);
|
||||||
|
|
||||||
|
return LLVMBuildTrunc(ctx->ac.builder, result, ctx->ac.i32, "");
|
||||||
|
}
|
||||||
|
|
||||||
static LLVMValueRef
|
static LLVMValueRef
|
||||||
visit_load_shared(struct ac_nir_context *ctx,
|
visit_load_shared(struct ac_nir_context *ctx,
|
||||||
const nir_intrinsic_instr *instr)
|
const nir_intrinsic_instr *instr)
|
||||||
@@ -4411,6 +4451,15 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
|
|||||||
case nir_intrinsic_load_local_invocation_index:
|
case nir_intrinsic_load_local_invocation_index:
|
||||||
result = visit_load_local_invocation_index(ctx);
|
result = visit_load_local_invocation_index(ctx);
|
||||||
break;
|
break;
|
||||||
|
case nir_intrinsic_load_subgroup_id:
|
||||||
|
result = visit_load_subgroup_id(ctx);
|
||||||
|
break;
|
||||||
|
case nir_intrinsic_load_num_subgroups:
|
||||||
|
result = visit_load_num_subgroups(ctx);
|
||||||
|
break;
|
||||||
|
case nir_intrinsic_first_invocation:
|
||||||
|
result = visit_first_invocation(ctx);
|
||||||
|
break;
|
||||||
case nir_intrinsic_load_push_constant:
|
case nir_intrinsic_load_push_constant:
|
||||||
result = visit_load_push_constant(ctx, instr);
|
result = visit_load_push_constant(ctx, instr);
|
||||||
break;
|
break;
|
||||||
|
@@ -61,6 +61,8 @@ gather_intrinsic_info(const nir_shader *nir, const nir_intrinsic_instr *instr,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case nir_intrinsic_load_local_invocation_index:
|
case nir_intrinsic_load_local_invocation_index:
|
||||||
|
case nir_intrinsic_load_subgroup_id:
|
||||||
|
case nir_intrinsic_load_num_subgroups:
|
||||||
info->cs.uses_local_invocation_idx = true;
|
info->cs.uses_local_invocation_idx = true;
|
||||||
break;
|
break;
|
||||||
case nir_intrinsic_load_sample_id:
|
case nir_intrinsic_load_sample_id:
|
||||||
|
@@ -866,6 +866,15 @@ void radv_GetPhysicalDeviceProperties2(
|
|||||||
properties->minImportedHostPointerAlignment = 4096;
|
properties->minImportedHostPointerAlignment = 4096;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES: {
|
||||||
|
VkPhysicalDeviceSubgroupProperties *properties =
|
||||||
|
(VkPhysicalDeviceSubgroupProperties*)ext;
|
||||||
|
properties->subgroupSize = 64;
|
||||||
|
properties->supportedStages = VK_SHADER_STAGE_ALL;
|
||||||
|
properties->supportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT;
|
||||||
|
properties->quadOperationsInAllStages = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@@ -210,6 +210,7 @@ radv_shader_compile_to_nir(struct radv_device *device,
|
|||||||
.tessellation = true,
|
.tessellation = true,
|
||||||
.int64 = true,
|
.int64 = true,
|
||||||
.multiview = true,
|
.multiview = true,
|
||||||
|
.subgroup_basic = true,
|
||||||
.variable_pointers = true,
|
.variable_pointers = true,
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
@@ -266,6 +267,15 @@ radv_shader_compile_to_nir(struct radv_device *device,
|
|||||||
nir_lower_global_vars_to_local(nir);
|
nir_lower_global_vars_to_local(nir);
|
||||||
nir_remove_dead_variables(nir, nir_var_local);
|
nir_remove_dead_variables(nir, nir_var_local);
|
||||||
ac_lower_indirect_derefs(nir, device->physical_device->rad_info.chip_class);
|
ac_lower_indirect_derefs(nir, device->physical_device->rad_info.chip_class);
|
||||||
|
nir_lower_subgroups(nir, &(struct nir_lower_subgroups_options) {
|
||||||
|
.subgroup_size = 64,
|
||||||
|
.ballot_bit_size = 64,
|
||||||
|
.lower_to_scalar = 1,
|
||||||
|
.lower_subgroup_masks = 1,
|
||||||
|
.lower_shuffle = 1,
|
||||||
|
.lower_quad = 1,
|
||||||
|
});
|
||||||
|
|
||||||
radv_optimize_nir(nir);
|
radv_optimize_nir(nir);
|
||||||
|
|
||||||
return nir;
|
return nir;
|
||||||
|
Reference in New Issue
Block a user