rusticl: translate spirv to nir and first steps to kernel arg handling

Signed-off-by: Karol Herbst <kherbst@redhat.com>
Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15439>
This commit is contained in:
Karol Herbst
2022-04-28 22:07:49 +02:00
committed by Marge Bot
parent 46ee5988cd
commit 5795ee0e08
16 changed files with 1108 additions and 49 deletions

View File

@@ -966,6 +966,7 @@ if with_gallium_rusticl
add_languages('rust', required: true)
with_clc = true
with_libclc = true
endif
dep_clc = null_dep

View File

@@ -1079,18 +1079,27 @@ extern "C" fn cl_enqueue_unmap_mem_object(
}
extern "C" fn cl_enqueue_ndrange_kernel(
_command_queue: cl_command_queue,
_kernel: cl_kernel,
_work_dim: cl_uint,
_global_work_offset: *const usize,
_global_work_size: *const usize,
_local_work_size: *const usize,
_num_events_in_wait_list: cl_uint,
_event_wait_list: *const cl_event,
_event: *mut cl_event,
command_queue: cl_command_queue,
kernel: cl_kernel,
work_dim: cl_uint,
global_work_offset: *const usize,
global_work_size: *const usize,
local_work_size: *const usize,
num_events_in_wait_list: cl_uint,
event_wait_list: *const cl_event,
event: *mut cl_event,
) -> cl_int {
println!("cl_enqueue_ndrange_kernel not implemented");
CL_OUT_OF_HOST_MEMORY
match_err!(enqueue_ndrange_kernel(
command_queue,
kernel,
work_dim,
global_work_offset,
global_work_size,
local_work_size,
num_events_in_wait_list,
event_wait_list,
event
))
}
extern "C" fn cl_get_extension_function_address(

View File

@@ -1,14 +1,17 @@
extern crate mesa_rust_util;
extern crate rusticl_opencl_gen;
use crate::api::event::create_and_queue;
use crate::api::icd::*;
use crate::api::util::*;
use crate::core::event::*;
use crate::core::kernel::*;
use self::mesa_rust_util::string::*;
use self::rusticl_opencl_gen::*;
use std::collections::HashSet;
use std::slice;
use std::sync::Arc;
impl CLInfo<cl_kernel_info> for cl_kernel {
@@ -73,6 +76,19 @@ impl CLInfoObj<cl_kernel_work_group_info, cl_device_id> for cl_kernel {
}
}
const ZERO_ARR: [usize; 3] = [0; 3];
/// # Safety
///
/// This function is only safe when called on an array of `work_dim` length
unsafe fn kernel_work_arr_or_default<'a>(arr: *const usize, work_dim: cl_uint) -> &'a [usize] {
if !arr.is_null() {
slice::from_raw_parts(arr, work_dim as usize)
} else {
&ZERO_ARR
}
}
pub fn create_kernel(
program: cl_program,
kernel_name: *const ::std::os::raw::c_char,
@@ -108,6 +124,7 @@ pub fn create_kernel(
// CL_INVALID_KERNEL_DEFINITION if the function definition for __kernel function given by
// kernel_name such as the number of arguments, the argument types are not the same for all
// devices for which the program executable has been built.
let nirs = p.nirs(&name);
let kernel_args: HashSet<_> = devs.iter().map(|d| p.args(d, &name)).collect();
if kernel_args.len() != 1 {
return Err(CL_INVALID_KERNEL_DEFINITION);
@@ -116,6 +133,7 @@ pub fn create_kernel(
Ok(cl_kernel::from_arc(Kernel::new(
name,
p,
nirs,
kernel_args.into_iter().next().unwrap(),
)))
}
@@ -123,24 +141,177 @@ pub fn create_kernel(
pub fn set_kernel_arg(
kernel: cl_kernel,
arg_index: cl_uint,
_arg_size: usize,
_arg_value: *const ::std::os::raw::c_void,
arg_size: usize,
arg_value: *const ::std::os::raw::c_void,
) -> CLResult<()> {
let k = kernel.get_arc()?;
// CL_INVALID_ARG_INDEX if arg_index is not a valid argument index.
if arg_index as usize >= k.args.len() {
return Err(CL_INVALID_ARG_INDEX);
if let Some(arg) = k.args.get(arg_index as usize) {
// CL_INVALID_ARG_SIZE if arg_size does not match the size of the data type for an argument
// that is not a memory object or if the argument is a memory object and
// arg_size != sizeof(cl_mem) or if arg_size is zero and the argument is declared with the
// local qualifier or if the argument is a sampler and arg_size != sizeof(cl_sampler).
match arg.kind {
KernelArgType::MemLocal => {
if arg_size == 0 {
return Err(CL_INVALID_ARG_SIZE);
}
}
_ => {
if arg.size != arg_size {
return Err(CL_INVALID_ARG_SIZE);
}
}
}
// CL_INVALID_ARG_VALUE if arg_value specified is not a valid value.
match arg.kind {
// If the argument is declared with the local qualifier, the arg_value entry must be
// NULL.
KernelArgType::MemLocal => {
if !arg_value.is_null() {
return Err(CL_INVALID_ARG_VALUE);
}
}
// If the argument is of type sampler_t, the arg_value entry must be a pointer to the
// sampler object.
KernelArgType::Constant | KernelArgType::Sampler => {
if arg_value.is_null() {
return Err(CL_INVALID_ARG_VALUE);
}
}
_ => {}
};
// let's create the arg now
let arg = unsafe {
if arg.dead {
KernelArgValue::None
} else {
match arg.kind {
KernelArgType::Constant => KernelArgValue::Constant(
slice::from_raw_parts(arg_value.cast(), arg_size).to_vec(),
),
KernelArgType::MemConstant | KernelArgType::MemGlobal => {
let ptr: *const cl_mem = arg_value.cast();
if ptr.is_null() || (*ptr).is_null() {
KernelArgValue::None
} else {
KernelArgValue::MemObject((*ptr).get_ref()?)
}
}
KernelArgType::MemLocal => KernelArgValue::LocalMem(arg_size),
KernelArgType::Sampler => {
let ptr: *const cl_sampler = arg_value.cast();
KernelArgValue::Sampler((*ptr).get_ref()?)
}
}
}
};
k.values.get(arg_index as usize).unwrap().replace(Some(arg));
Ok(())
} else {
Err(CL_INVALID_ARG_INDEX)
}
//• CL_INVALID_ARG_VALUE if arg_value specified is not a valid value.
//• CL_INVALID_MEM_OBJECT for an argument declared to be a memory object when the specified arg_value is not a valid memory object.
//• CL_INVALID_SAMPLER for an argument declared to be of type sampler_t when the specified arg_value is not a valid sampler object.
//• CL_INVALID_DEVICE_QUEUE for an argument declared to be of type queue_t when the specified arg_value is not a valid device queue object. This error code is missing before version 2.0.
//• CL_INVALID_ARG_SIZE if arg_size does not match the size of the data type for an argument that is not a memory object or if the argument is a memory object and arg_size != sizeof(cl_mem) or if arg_size is zero and the argument is declared with the local qualifier or if the argument is a sampler and arg_size != sizeof(cl_sampler).
//• CL_MAX_SIZE_RESTRICTION_EXCEEDED if the size in bytes of the memory object (if the argument is a memory object) or arg_size (if the argument is declared with local qualifier) exceeds a language- specified maximum size restriction for this argument, such as the MaxByteOffset SPIR-V decoration. This error code is missing before version 2.2.
//• CL_INVALID_ARG_VALUE if the argument is an image declared with the read_only qualifier and arg_value refers to an image object created with cl_mem_flags of CL_MEM_WRITE_ONLY or if the image argument is declared with the write_only qualifier and arg_value refers to an image object created with cl_mem_flags of CL_MEM_READ_ONLY.
println!("set_kernel_arg not implemented");
Err(CL_OUT_OF_HOST_MEMORY)
//• CL_MAX_SIZE_RESTRICTION_EXCEEDED if the size in bytes of the memory object (if the argument is a memory object) or arg_size (if the argument is declared with local qualifier) exceeds a language- specified maximum size restriction for this argument, such as the MaxByteOffset SPIR-V decoration. This error code is missing before version 2.2.
}
pub fn enqueue_ndrange_kernel(
command_queue: cl_command_queue,
kernel: cl_kernel,
work_dim: cl_uint,
global_work_offset: *const usize,
global_work_size: *const usize,
local_work_size: *const usize,
num_events_in_wait_list: cl_uint,
event_wait_list: *const cl_event,
event: *mut cl_event,
) -> CLResult<()> {
let q = command_queue.get_arc()?;
let k = kernel.get_arc()?;
let evs = event_list_from_cl(&q, num_events_in_wait_list, event_wait_list)?;
// CL_INVALID_CONTEXT if context associated with command_queue and kernel are not the same
if q.context != k.prog.context {
return Err(CL_INVALID_CONTEXT);
}
// CL_INVALID_PROGRAM_EXECUTABLE if there is no successfully built program executable available
// for device associated with command_queue.
if k.prog.status(&q.device) != CL_BUILD_SUCCESS as cl_build_status {
return Err(CL_INVALID_PROGRAM_EXECUTABLE);
}
// CL_INVALID_KERNEL_ARGS if the kernel argument values have not been specified.
if k.values.iter().any(|v| v.borrow().is_none()) {
return Err(CL_INVALID_KERNEL_ARGS);
}
// CL_INVALID_WORK_DIMENSION if work_dim is not a valid value (i.e. a value between 1 and
// CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS).
if work_dim == 0 || work_dim > q.device.max_grid_dimensions() {
return Err(CL_INVALID_WORK_DIMENSION);
}
// we assume the application gets it right and doesn't pass shorter arrays then actually needed.
let global_work_size = unsafe { kernel_work_arr_or_default(global_work_size, work_dim) };
let local_work_size = unsafe { kernel_work_arr_or_default(local_work_size, work_dim) };
let global_work_offset = unsafe { kernel_work_arr_or_default(global_work_offset, work_dim) };
if q.device.address_bits() == 32 {
for (s, o) in global_work_size.iter().zip(global_work_offset) {
// CL_INVALID_GLOBAL_WORK_SIZE if any of the values specified in global_work_size[0], …
// global_work_size[work_dim - 1] exceed the maximum value representable by size_t on
// the device on which the kernel-instance will be enqueued.
if *s > u32::MAX as usize {
return Err(CL_INVALID_GLOBAL_WORK_SIZE);
}
// CL_INVALID_GLOBAL_OFFSET if the value specified in global_work_size + the
// corresponding values in global_work_offset for any dimensions is greater than the
// maximum value representable by size t on the device on which the kernel-instance
// will be enqueued
if s + o > u32::MAX as usize {
return Err(CL_INVALID_GLOBAL_OFFSET);
}
}
}
// CL_INVALID_WORK_ITEM_SIZE if the number of work-items specified in any of
// local_work_size[0], … local_work_size[work_dim - 1] is greater than the corresponding values
// specified by CL_DEVICE_MAX_WORK_ITEM_SIZES[0], …, CL_DEVICE_MAX_WORK_ITEM_SIZES[work_dim - 1].
if local_work_size.iter().gt(q.device.max_block_sizes().iter()) {
return Err(CL_INVALID_WORK_ITEM_SIZE);
}
// If global_work_size is NULL, or the value in any passed dimension is 0 then the kernel
// command will trivially succeed after its event dependencies are satisfied and subsequently
// update its completion event.
let cb: EventSig = if global_work_size.contains(&0) {
Box::new(|_, _| Ok(()))
} else {
k.launch(
&q,
work_dim,
local_work_size,
global_work_size,
global_work_offset,
)
};
create_and_queue(q, CL_COMMAND_NDRANGE_KERNEL, evs, event, false, cb)
//• CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and does not match the required work-group size for kernel in the program source.
//• CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and is not consistent with the required number of sub-groups for kernel in the program source.
//• CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and the total number of work-items in the work-group computed as local_work_size[0] × … local_work_size[work_dim - 1] is greater than the value specified by CL_KERNEL_WORK_GROUP_SIZE in the Kernel Object Device Queries table.
//• CL_INVALID_WORK_GROUP_SIZE if the work-group size must be uniform and the local_work_size is not NULL, is not equal to the required work-group size specified in the kernel source, or the global_work_size is not evenly divisible by the local_work_size.
//• CL_MISALIGNED_SUB_BUFFER_OFFSET if a sub-buffer object is specified as the value for an argument that is a buffer object and the offset specified when the sub-buffer object is created is not aligned to CL_DEVICE_MEM_BASE_ADDR_ALIGN value for device associated with queue. This error code
//• CL_INVALID_IMAGE_SIZE if an image object is specified as an argument value and the image dimensions (image width, height, specified or compute row and/or slice pitch) are not supported by device associated with queue.
//• CL_IMAGE_FORMAT_NOT_SUPPORTED if an image object is specified as an argument value and the image format (image channel order and data type) is not supported by device associated with queue.
//• CL_OUT_OF_RESOURCES if there is a failure to queue the execution instance of kernel on the command-queue because of insufficient resources needed to execute the kernel. For example, the explicitly specified local_work_size causes a failure to execute the kernel because of insufficient resources such as registers or local memory. Another example would be the number of read-only image args used in kernel exceed the CL_DEVICE_MAX_READ_IMAGE_ARGS value for device or the number of write-only and read-write image args used in kernel exceed the CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS value for device or the number of samplers used in kernel exceed CL_DEVICE_MAX_SAMPLERS for device.
//• CL_MEM_OBJECT_ALLOCATION_FAILURE if there is a failure to allocate memory for data store associated with image or buffer objects specified as arguments to kernel.
//• CL_INVALID_OPERATION if SVM pointers are passed as arguments to a kernel and the device does not support SVM or if system pointers are passed as arguments to a kernel and/or stored inside SVM allocations passed as kernel arguments and the device does not support fine grain system SVM allocations.
}

View File

@@ -30,7 +30,7 @@ impl Context {
})
}
pub fn create_buffer(&self, size: usize) -> CLResult<HashMap<Arc<Device>, PipeResource>> {
pub fn create_buffer(&self, size: usize) -> CLResult<HashMap<Arc<Device>, Arc<PipeResource>>> {
let adj_size: u32 = size.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?;
let mut res = HashMap::new();
for dev in &self.devs {
@@ -38,7 +38,7 @@ impl Context {
.screen()
.resource_create_buffer(adj_size)
.ok_or(CL_OUT_OF_RESOURCES);
res.insert(Arc::clone(dev), resource?);
res.insert(Arc::clone(dev), Arc::new(resource?));
}
Ok(res)
}
@@ -47,7 +47,7 @@ impl Context {
&self,
size: usize,
user_ptr: *mut c_void,
) -> CLResult<HashMap<Arc<Device>, PipeResource>> {
) -> CLResult<HashMap<Arc<Device>, Arc<PipeResource>>> {
let adj_size: u32 = size.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?;
let mut res = HashMap::new();
for dev in &self.devs {
@@ -55,7 +55,7 @@ impl Context {
.screen()
.resource_create_buffer_from_user(adj_size, user_ptr)
.ok_or(CL_OUT_OF_RESOURCES);
res.insert(Arc::clone(dev), resource?);
res.insert(Arc::clone(dev), Arc::new(resource?));
}
Ok(res)
}

View File

@@ -10,6 +10,8 @@ use crate::core::util::*;
use crate::core::version::*;
use crate::impl_cl_type_trait;
use self::mesa_rust::compiler::clc::*;
use self::mesa_rust::compiler::nir::*;
use self::mesa_rust::pipe::context::*;
use self::mesa_rust::pipe::device::load_screens;
use self::mesa_rust::pipe::screen::*;
@@ -27,7 +29,7 @@ use std::sync::MutexGuard;
pub struct Device {
pub base: CLObjectBase<CL_INVALID_DEVICE>,
screen: Arc<PipeScreen>,
pub screen: Arc<PipeScreen>,
pub cl_version: CLVersion,
pub clc_version: CLVersion,
pub clc_versions: Vec<cl_name_version>,
@@ -36,6 +38,7 @@ pub struct Device {
pub extension_string: String,
pub extensions: Vec<cl_name_version>,
pub formats: HashMap<cl_image_format, HashMap<cl_mem_object_type, cl_mem_flags>>,
pub lib_clc: NirShader,
helper_ctx: Mutex<Arc<PipeContext>>,
}
@@ -43,6 +46,11 @@ impl_cl_type_trait!(cl_device_id, Device, CL_INVALID_DEVICE);
impl Device {
fn new(screen: Arc<PipeScreen>) -> Option<Arc<Device>> {
if !Self::check_valid(&screen) {
return None;
}
let lib_clc = spirv::SPIRVBin::get_lib_clc(&screen);
let mut d = Self {
base: CLObjectBase::new(),
helper_ctx: Mutex::new(screen.create_context().unwrap()),
@@ -55,12 +63,9 @@ impl Device {
extension_string: String::from(""),
extensions: Vec::new(),
formats: HashMap::new(),
lib_clc: lib_clc?,
};
if !d.check_valid() {
return None;
}
d.fill_format_tables();
// check if we are embedded or full profile first
@@ -112,17 +117,21 @@ impl Device {
}
}
fn check_valid(&self) -> bool {
if self.screen.param(pipe_cap::PIPE_CAP_COMPUTE) == 0 ||
fn check_valid(screen: &PipeScreen) -> bool {
if screen.param(pipe_cap::PIPE_CAP_COMPUTE) == 0 ||
// even though we use PIPE_SHADER_IR_NIR, PIPE_SHADER_IR_NIR_SERIALIZED marks CL support by the driver
self.shader_param(pipe_shader_cap::PIPE_SHADER_CAP_SUPPORTED_IRS) & (1 << (pipe_shader_ir::PIPE_SHADER_IR_NIR_SERIALIZED as i32)) == 0
screen.shader_param(pipe_shader_type::PIPE_SHADER_COMPUTE, pipe_shader_cap::PIPE_SHADER_CAP_SUPPORTED_IRS) & (1 << (pipe_shader_ir::PIPE_SHADER_IR_NIR_SERIALIZED as i32)) == 0
{
return false;
}
// CL_DEVICE_MAX_PARAMETER_SIZE
// For this minimum value, only a maximum of 128 arguments can be passed to a kernel
if self.param_max_size() < 128 {
if ComputeParam::<u64>::compute_param(
screen,
pipe_compute_cap::PIPE_COMPUTE_CAP_MAX_INPUT_SIZE,
) < 128
{
return false;
}
true

View File

@@ -3,37 +3,430 @@ extern crate mesa_rust_gen;
extern crate rusticl_opencl_gen;
use crate::api::icd::*;
use crate::core::device::*;
use crate::core::event::*;
use crate::core::memory::*;
use crate::core::program::*;
use crate::core::queue::*;
use crate::impl_cl_type_trait;
use self::mesa_rust::compiler::clc::*;
use self::mesa_rust::compiler::nir::*;
use self::mesa_rust_gen::*;
use self::rusticl_opencl_gen::*;
use std::cell::RefCell;
use std::collections::HashMap;
use std::collections::HashSet;
use std::convert::TryInto;
use std::ptr;
use std::sync::Arc;
// ugh, we are not allowed to take refs, so...
pub enum KernelArgValue {
None,
Constant(Vec<u8>),
MemObject(&'static Mem),
Sampler(&'static Sampler),
LocalMem(usize),
}
#[derive(PartialEq, Eq)]
pub enum KernelArgType {
Constant, // for anything passed by value
Sampler,
MemGlobal,
MemConstant,
MemLocal,
}
#[derive(Hash, PartialEq, Eq)]
pub enum InternalKernelArgType {
ConstantBuffer,
}
pub struct KernelArg {
spirv: spirv::SPIRVKernelArg,
pub kind: KernelArgType,
pub size: usize,
pub offset: usize,
pub dead: bool,
}
#[derive(Hash, PartialEq, Eq)]
pub struct InternalKernelArg {
pub kind: InternalKernelArgType,
pub size: usize,
pub offset: usize,
}
impl KernelArg {
fn from_spirv_nir(spirv: Vec<spirv::SPIRVKernelArg>, nir: &mut NirShader) -> Vec<Self> {
let nir_arg_map: HashMap<_, _> = nir
.variables_with_mode(
nir_variable_mode::nir_var_uniform | nir_variable_mode::nir_var_image,
)
.map(|v| (v.data.location, v))
.collect();
let mut res = Vec::new();
for (i, s) in spirv.into_iter().enumerate() {
let nir = nir_arg_map.get(&(i as i32)).unwrap();
let kind = match s.address_qualifier {
clc_kernel_arg_address_qualifier::CLC_KERNEL_ARG_ADDRESS_PRIVATE => {
if unsafe { glsl_type_is_sampler(nir.type_) } {
KernelArgType::Sampler
} else {
KernelArgType::Constant
}
}
clc_kernel_arg_address_qualifier::CLC_KERNEL_ARG_ADDRESS_CONSTANT => {
KernelArgType::MemConstant
}
clc_kernel_arg_address_qualifier::CLC_KERNEL_ARG_ADDRESS_LOCAL => {
KernelArgType::MemLocal
}
clc_kernel_arg_address_qualifier::CLC_KERNEL_ARG_ADDRESS_GLOBAL => {
KernelArgType::MemGlobal
}
};
res.push(Self {
spirv: s,
size: unsafe { glsl_get_cl_size(nir.type_) } as usize,
// we'll update it later in the 2nd pass
kind: kind,
offset: 0,
dead: true,
});
}
res
}
fn assign_locations(
args: &mut [Self],
internal_args: &mut [InternalKernelArg],
nir: &mut NirShader,
) {
for var in nir.variables_with_mode(
nir_variable_mode::nir_var_uniform | nir_variable_mode::nir_var_image,
) {
if let Some(arg) = args.get_mut(var.data.location as usize) {
arg.offset = var.data.driver_location as usize;
arg.dead = false;
} else {
internal_args
.get_mut(var.data.location as usize - args.len())
.unwrap()
.offset = var.data.driver_location as usize;
}
}
}
}
#[repr(C)]
pub struct Kernel {
pub base: CLObjectBase<CL_INVALID_KERNEL>,
pub prog: Arc<Program>,
pub name: String,
pub args: Vec<spirv::SPIRVKernelArg>,
pub args: Vec<KernelArg>,
pub values: Vec<RefCell<Option<KernelArgValue>>>,
internal_args: Vec<InternalKernelArg>,
nirs: HashMap<Arc<Device>, NirShader>,
}
impl_cl_type_trait!(cl_kernel, Kernel, CL_INVALID_KERNEL);
fn create_kernel_arr(vals: &[usize], val: u32) -> [u32; 3] {
let mut res = [val; 3];
for (i, v) in vals.iter().enumerate() {
res[i] = (*v).try_into().expect("64 bit work groups not supported");
}
res
}
// mostly like clc_spirv_to_dxil
// does not DCEe uniforms or images!
fn lower_and_optimize_nir_pre_inputs(nir: &mut NirShader, lib_clc: &NirShader) {
nir.set_workgroup_size(&[0; 3]);
nir.structurize();
while {
let mut progress = false;
progress |= nir.pass0(nir_copy_prop);
progress |= nir.pass0(nir_opt_copy_prop_vars);
progress |= nir.pass0(nir_opt_deref);
progress |= nir.pass0(nir_opt_dce);
progress |= nir.pass0(nir_opt_undef);
progress |= nir.pass0(nir_opt_constant_folding);
progress |= nir.pass0(nir_opt_cse);
progress |= nir.pass0(nir_lower_vars_to_ssa);
progress |= nir.pass0(nir_opt_algebraic);
progress
} {}
nir.inline(lib_clc);
nir.remove_non_entrypoints();
while {
let mut progress = false;
progress |= nir.pass0(nir_copy_prop);
progress |= nir.pass0(nir_opt_copy_prop_vars);
progress |= nir.pass0(nir_opt_deref);
progress |= nir.pass0(nir_opt_dce);
progress |= nir.pass0(nir_opt_undef);
progress |= nir.pass0(nir_opt_constant_folding);
progress |= nir.pass0(nir_opt_cse);
progress |= nir.pass0(nir_split_var_copies);
progress |= nir.pass0(nir_lower_var_copies);
progress |= nir.pass0(nir_lower_vars_to_ssa);
progress |= nir.pass0(nir_opt_algebraic);
progress |= nir.pass1(
nir_opt_if,
nir_opt_if_options::nir_opt_if_aggressive_last_continue
| nir_opt_if_options::nir_opt_if_optimize_phi_true_false,
);
progress |= nir.pass0(nir_opt_dead_cf);
progress |= nir.pass0(nir_opt_remove_phis);
// we don't want to be too aggressive here, but it kills a bit of CFG
progress |= nir.pass3(nir_opt_peephole_select, 1, true, true);
progress |= nir.pass1(
nir_lower_vec3_to_vec4,
nir_variable_mode::nir_var_mem_generic | nir_variable_mode::nir_var_uniform,
);
progress
} {}
// TODO variable initializers
// TODO lower memcpy
nir.pass0(nir_dedup_inline_samplers);
nir.pass2(
nir_lower_vars_to_explicit_types,
nir_variable_mode::nir_var_function_temp,
Some(glsl_get_cl_type_size_align),
);
// TODO printf
nir.pass0(nir_split_var_copies);
nir.pass0(nir_opt_copy_prop_vars);
nir.pass0(nir_lower_var_copies);
nir.pass0(nir_lower_vars_to_ssa);
nir.pass0(nir_lower_alu);
nir.pass0(nir_opt_dce);
nir.pass0(nir_opt_deref);
}
fn lower_and_optimize_nir_late(
dev: &Device,
nir: &mut NirShader,
args: usize,
) -> Vec<InternalKernelArg> {
let mut res = Vec::new();
let mut lower_state = rusticl_lower_state::default();
nir.pass2(
nir_remove_dead_variables,
nir_variable_mode::nir_var_uniform
| nir_variable_mode::nir_var_mem_constant
| nir_variable_mode::nir_var_function_temp,
ptr::null(),
);
nir.pass1(nir_lower_readonly_images_to_tex, false);
nir.pass2(
nir_remove_dead_variables,
nir_variable_mode::nir_var_mem_shared | nir_variable_mode::nir_var_function_temp,
ptr::null(),
);
nir.reset_scratch_size();
nir.pass2(
nir_lower_vars_to_explicit_types,
nir_variable_mode::nir_var_mem_constant,
Some(glsl_get_cl_type_size_align),
);
nir.extract_constant_initializers();
// TODO printf
// TODO 32 bit devices
if nir.has_constant() {
res.push(InternalKernelArg {
kind: InternalKernelArgType::ConstantBuffer,
offset: 0,
size: 8,
});
lower_state.const_buf = nir.add_var(
nir_variable_mode::nir_var_uniform,
unsafe { glsl_uint64_t_type() },
args + res.len() - 1,
"constant_buffer_addr",
);
}
nir.pass2(
nir_lower_vars_to_explicit_types,
nir_variable_mode::nir_var_mem_shared
| nir_variable_mode::nir_var_function_temp
| nir_variable_mode::nir_var_uniform
| nir_variable_mode::nir_var_mem_global,
Some(glsl_get_cl_type_size_align),
);
nir.pass2(
nir_lower_explicit_io,
nir_variable_mode::nir_var_mem_global | nir_variable_mode::nir_var_mem_constant,
nir_address_format::nir_address_format_64bit_global,
);
nir.pass1(rusticl_lower_intrinsics, &mut lower_state);
nir.pass2(
nir_lower_explicit_io,
nir_variable_mode::nir_var_mem_shared
| nir_variable_mode::nir_var_function_temp
| nir_variable_mode::nir_var_uniform,
nir_address_format::nir_address_format_32bit_offset_as_64bit,
);
nir.pass0(nir_lower_system_values);
let compute_options = nir_lower_compute_system_values_options::default();
nir.pass1(nir_lower_compute_system_values, &compute_options);
nir.pass0(nir_opt_deref);
nir.pass0(nir_lower_vars_to_ssa);
// TODO whatever clc is doing here
nir.pass1(nir_lower_convert_alu_types, None);
nir.pass0(nir_opt_dce);
dev.screen.finalize_nir(nir);
res
}
impl Kernel {
pub fn new(name: String, prog: Arc<Program>, args: Vec<spirv::SPIRVKernelArg>) -> Arc<Kernel> {
pub fn new(
name: String,
prog: Arc<Program>,
mut nirs: HashMap<Arc<Device>, NirShader>,
args: Vec<spirv::SPIRVKernelArg>,
) -> Arc<Kernel> {
nirs.iter_mut()
.for_each(|(d, n)| lower_and_optimize_nir_pre_inputs(n, &d.lib_clc));
let nir = nirs.values_mut().next().unwrap();
let mut args = KernelArg::from_spirv_nir(args, nir);
// can't use vec!...
let values = args.iter().map(|_| RefCell::new(None)).collect();
let internal_args: HashSet<_> = nirs
.iter_mut()
.map(|(d, n)| lower_and_optimize_nir_late(d, n, args.len()))
.collect();
// we want the same internal args for every compiled kernel, for now
assert!(internal_args.len() == 1);
let mut internal_args = internal_args.into_iter().next().unwrap();
nirs.values_mut()
.for_each(|n| KernelArg::assign_locations(&mut args, &mut internal_args, n));
Arc::new(Self {
base: CLObjectBase::new(),
prog: prog,
name: name,
args: args,
values: values,
internal_args: internal_args,
// caller has to verify all kernels have the same sig
nirs: nirs,
})
}
// the painful part is, that host threads are allowed to modify the kernel object once it was
// enqueued, so return a closure with all req data included.
pub fn launch(
&self,
q: &Arc<Queue>,
work_dim: u32,
block: &[usize],
grid: &[usize],
offsets: &[usize],
) -> EventSig {
let nir = self.nirs.get(&q.device).unwrap();
let mut block = create_kernel_arr(block, 1);
let mut grid = create_kernel_arr(grid, 1);
let offsets = create_kernel_arr(offsets, 0);
let mut input: Vec<u8> = Vec::new();
let mut resource_info = Vec::new();
let mut local_size: u32 = nir.shared_size();
for i in 0..3 {
if block[i] == 0 {
block[i] = 1;
} else {
grid[i] /= block[i];
}
}
for (arg, val) in self.args.iter().zip(&self.values) {
if arg.dead {
continue;
}
match val.borrow().as_ref().unwrap() {
KernelArgValue::Constant(c) => input.extend_from_slice(c),
KernelArgValue::MemObject(mem) => {
input.extend_from_slice(&mem.offset.to_ne_bytes());
resource_info.push((Some(mem.get_res_of_dev(&q.device).clone()), arg.offset));
}
KernelArgValue::LocalMem(size) => {
// TODO 32 bit
input.extend_from_slice(&[0; 8]);
local_size += *size as u32;
}
KernelArgValue::None => {
assert!(
arg.kind == KernelArgType::MemGlobal
|| arg.kind == KernelArgType::MemConstant
);
input.extend_from_slice(&[0; 8]);
}
_ => panic!("unhandled arg type"),
}
}
for arg in &self.internal_args {
match arg.kind {
InternalKernelArgType::ConstantBuffer => {
input.extend_from_slice(&[0; 8]);
let buf = nir.get_constant_buffer();
let res = Arc::new(
q.device
.screen()
.resource_create_buffer(buf.len() as u32)
.unwrap(),
);
q.device.helper_ctx().buffer_subdata(
&res,
0,
buf.as_ptr().cast(),
buf.len() as u32,
);
resource_info.push((Some(res), arg.offset));
}
}
}
let cso = q
.device
.helper_ctx()
.create_compute_state(nir, input.len() as u32, local_size);
Box::new(move |_, ctx| {
let mut input = input.clone();
let mut resources = Vec::with_capacity(resource_info.len());
let mut globals: Vec<*mut u32> = Vec::new();
for (res, offset) in resource_info.clone() {
resources.push(res);
globals.push(unsafe { input.as_mut_ptr().add(offset) }.cast());
}
ctx.bind_compute_state(cso);
ctx.set_global_binding(resources.as_slice(), &mut globals);
ctx.launch_grid(work_dim, block, grid, offsets, &input);
ctx.clear_global_binding(globals.len() as u32);
ctx.delete_compute_state(cso);
ctx.memory_barrier(PIPE_BARRIER_GLOBAL_BUFFER);
Ok(())
})
}
pub fn access_qualifier(&self, idx: cl_uint) -> cl_kernel_arg_access_qualifier {
let aq = self.args[idx as usize].access_qualifier;
let aq = self.args[idx as usize].spirv.access_qualifier;
if aq
== clc_kernel_arg_access_qualifier::CLC_KERNEL_ARG_ACCESS_READ
@@ -50,7 +443,7 @@ impl Kernel {
}
pub fn address_qualifier(&self, idx: cl_uint) -> cl_kernel_arg_address_qualifier {
match self.args[idx as usize].address_qualifier {
match self.args[idx as usize].spirv.address_qualifier {
clc_kernel_arg_address_qualifier::CLC_KERNEL_ARG_ADDRESS_PRIVATE => {
CL_KERNEL_ARG_ADDRESS_PRIVATE
}
@@ -67,7 +460,7 @@ impl Kernel {
}
pub fn type_qualifier(&self, idx: cl_uint) -> cl_kernel_arg_type_qualifier {
let tq = self.args[idx as usize].type_qualifier;
let tq = self.args[idx as usize].spirv.type_qualifier;
let zero = clc_kernel_arg_type_qualifier(0);
let mut res = CL_KERNEL_ARG_TYPE_NONE;
@@ -87,10 +480,10 @@ impl Kernel {
}
pub fn arg_name(&self, idx: cl_uint) -> &String {
&self.args[idx as usize].name
&self.args[idx as usize].spirv.name
}
pub fn arg_type_name(&self, idx: cl_uint) -> &String {
&self.args[idx as usize].type_name
&self.args[idx as usize].spirv.type_name
}
}

View File

@@ -36,7 +36,7 @@ pub struct Mem {
pub image_desc: cl_image_desc,
pub image_elem_size: u8,
pub cbs: Mutex<Vec<Box<dyn Fn(cl_mem)>>>,
res: Option<HashMap<Arc<Device>, PipeResource>>,
res: Option<HashMap<Arc<Device>, Arc<PipeResource>>>,
maps: Mutex<HashMap<*mut c_void, (u32, PipeTransfer)>>,
}
@@ -187,7 +187,7 @@ impl Mem {
ptr::eq(a, b)
}
fn get_res(&self) -> &HashMap<Arc<Device>, PipeResource> {
fn get_res(&self) -> &HashMap<Arc<Device>, Arc<PipeResource>> {
self.parent
.as_ref()
.map_or(self, |p| p.as_ref())
@@ -196,6 +196,10 @@ impl Mem {
.unwrap()
}
pub fn get_res_of_dev(&self, dev: &Arc<Device>) -> &Arc<PipeResource> {
self.get_res().get(dev).unwrap()
}
fn to_parent<'a>(&'a self, offset: &mut usize) -> &'a Self {
if let Some(parent) = &self.parent {
offset.add_assign(self.offset);

View File

@@ -1,4 +1,5 @@
extern crate mesa_rust;
extern crate mesa_rust_gen;
extern crate rusticl_opencl_gen;
use crate::api::icd::*;
@@ -7,6 +8,8 @@ use crate::core::device::*;
use crate::impl_cl_type_trait;
use self::mesa_rust::compiler::clc::*;
use self::mesa_rust::compiler::nir::*;
use self::mesa_rust_gen::*;
use self::rusticl_opencl_gen::*;
use std::collections::HashMap;
@@ -227,4 +230,28 @@ impl Program {
}),
})
}
pub fn nirs(&self, kernel: &str) -> HashMap<Arc<Device>, NirShader> {
let mut lock = self.build_info();
let mut res = HashMap::new();
for d in &self.devs {
let info = Self::dev_build_info(&mut lock, d);
if info.status != CL_BUILD_SUCCESS as cl_build_status {
continue;
}
let nir = info
.spirv
.as_ref()
.unwrap()
.to_nir(
kernel,
d.screen
.nir_shader_compiler_options(pipe_shader_type::PIPE_SHADER_COMPUTE),
&d.lib_clc,
)
.unwrap();
res.insert(d.clone(), nir);
}
res
}
}

View File

@@ -1,6 +1,9 @@
extern crate mesa_rust_gen;
extern crate mesa_rust_util;
use crate::compiler::nir::*;
use crate::pipe::screen::*;
use self::mesa_rust_gen::*;
use self::mesa_rust_util::string::*;
@@ -167,6 +170,70 @@ impl SPIRVBin {
.collect(),
}
}
fn get_spirv_options(library: bool, clc_shader: *const nir_shader) -> spirv_to_nir_options {
spirv_to_nir_options {
create_library: library,
environment: nir_spirv_execution_environment::NIR_SPIRV_OPENCL,
clc_shader: clc_shader,
float_controls_execution_mode: float_controls::FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32
as u16,
caps: spirv_supported_capabilities {
address: true,
float64: true,
int8: true,
int16: true,
int64: true,
kernel: true,
kernel_image: true,
linkage: true,
..Default::default()
},
constant_addr_format: nir_address_format::nir_address_format_64bit_global,
global_addr_format: nir_address_format::nir_address_format_64bit_global, // TODO 32 bit devices
shared_addr_format: nir_address_format::nir_address_format_32bit_offset_as_64bit,
temp_addr_format: nir_address_format::nir_address_format_32bit_offset_as_64bit,
// default
debug: spirv_to_nir_options__bindgen_ty_1::default(),
..Default::default()
}
}
pub fn to_nir(
&self,
entry_point: &str,
nir_options: *const nir_shader_compiler_options,
libclc: &NirShader,
) -> Option<NirShader> {
let c_entry = CString::new(entry_point.as_bytes()).unwrap();
let spirv_options = Self::get_spirv_options(false, libclc.get_nir());
let nir = unsafe {
spirv_to_nir(
self.spirv.data.cast(),
self.spirv.size / 4,
ptr::null_mut(), // spec
0, // spec count
gl_shader_stage::MESA_SHADER_KERNEL,
c_entry.as_ptr(),
&spirv_options,
nir_options,
)
};
NirShader::new(nir)
}
pub fn get_lib_clc(screen: &PipeScreen) -> Option<NirShader> {
let nir_options = screen.nir_shader_compiler_options(pipe_shader_type::PIPE_SHADER_COMPUTE);
let spirv_options = Self::get_spirv_options(true, ptr::null());
let shader_cache = screen.shader_cache();
NirShader::new(unsafe {
nir_load_libclc_shader(64, shader_cache, &spirv_options, nir_options)
})
}
}
impl Drop for SPIRVBin {

View File

@@ -2,12 +2,215 @@ extern crate mesa_rust_gen;
use self::mesa_rust_gen::*;
use std::convert::TryInto;
use std::ffi::c_void;
use std::ffi::CString;
use std::marker::PhantomData;
use std::ptr;
use std::ptr::NonNull;
use std::slice;
// from https://internals.rust-lang.org/t/discussion-on-offset-of/7440/2
macro_rules! offset_of {
($Struct:path, $field:ident) => {{
// Using a separate function to minimize unhygienic hazards
// (e.g. unsafety of #[repr(packed)] field borrows).
// Uncomment `const` when `const fn`s can juggle pointers.
/*const*/
fn offset() -> usize {
let u = std::mem::MaybeUninit::<$Struct>::uninit();
// Use pattern-matching to avoid accidentally going through Deref.
let &$Struct { $field: ref f, .. } = unsafe { &*u.as_ptr() };
let o = (f as *const _ as usize).wrapping_sub(&u as *const _ as usize);
// Triple check that we are within `u` still.
assert!((0..=std::mem::size_of_val(&u)).contains(&o));
o
}
offset()
}};
}
pub struct ExecListIter<'a, T> {
n: &'a mut exec_node,
offset: usize,
_marker: PhantomData<T>,
}
impl<'a, T> ExecListIter<'a, T> {
fn new(l: &'a mut exec_list, offset: usize) -> Self {
Self {
n: &mut l.head_sentinel,
offset: offset,
_marker: PhantomData,
}
}
}
impl<'a, T: 'a> Iterator for ExecListIter<'a, T> {
type Item = &'a mut T;
fn next(&mut self) -> Option<Self::Item> {
self.n = unsafe { &mut *self.n.next };
if self.n.next.is_null() {
None
} else {
let t: *mut c_void = (self.n as *mut exec_node).cast();
Some(unsafe { &mut *(t.sub(self.offset).cast()) })
}
}
}
pub struct NirShader {
nir: NonNull<nir_shader>,
}
impl NirShader {
pub fn new(nir: *mut nir_shader) -> Option<Self> {
NonNull::new(nir).map(|nir| Self { nir: nir })
}
pub fn print(&self) {
unsafe { nir_print_shader(self.nir.as_ptr(), stderr) };
}
pub fn get_nir(&self) -> *mut nir_shader {
self.nir.as_ptr()
}
pub fn dup_for_driver(&self) -> *mut nir_shader {
unsafe { nir_shader_clone(ptr::null_mut(), self.nir.as_ptr()) }
}
pub fn pass0<R>(&mut self, pass: unsafe extern "C" fn(*mut nir_shader) -> R) -> R {
unsafe { pass(self.nir.as_ptr()) }
}
pub fn pass1<R, A>(
&mut self,
pass: unsafe extern "C" fn(*mut nir_shader, a: A) -> R,
a: A,
) -> R {
unsafe { pass(self.nir.as_ptr(), a) }
}
pub fn pass2<R, A, B>(
&mut self,
pass: unsafe extern "C" fn(*mut nir_shader, a: A, b: B) -> R,
a: A,
b: B,
) -> R {
unsafe { pass(self.nir.as_ptr(), a, b) }
}
pub fn pass3<R, A, B, C>(
&mut self,
pass: unsafe extern "C" fn(*mut nir_shader, a: A, b: B, c: C) -> R,
a: A,
b: B,
c: C,
) -> R {
unsafe { pass(self.nir.as_ptr(), a, b, c) }
}
pub fn structurize(&mut self) {
self.pass0(nir_lower_goto_ifs);
self.pass0(nir_opt_dead_cf);
}
pub fn inline(&mut self, libclc: &NirShader) {
self.pass1(
nir_lower_variable_initializers,
nir_variable_mode::nir_var_function_temp,
);
self.pass0(nir_lower_returns);
self.pass1(nir_lower_libclc, libclc.nir.as_ptr());
self.pass0(nir_inline_functions);
}
pub fn remove_non_entrypoints(&mut self) {
unsafe { nir_remove_non_entrypoints(self.nir.as_ptr()) };
}
pub fn variables(&mut self) -> ExecListIter<nir_variable> {
ExecListIter::new(
&mut unsafe { self.nir.as_mut() }.variables,
offset_of!(nir_variable, node),
)
}
pub fn reset_scratch_size(&self) {
unsafe {
(*self.nir.as_ptr()).scratch_size = 0;
}
}
pub fn shared_size(&self) -> u32 {
unsafe { (*self.nir.as_ptr()).info.shared_size }
}
pub fn set_workgroup_size(&self, workgroup: &[u16; 3]) {
let mut nir = self.nir.as_ptr();
unsafe {
(*nir).info.set_workgroup_size_variable(workgroup[0] == 0);
(*nir).info.workgroup_size[0] = workgroup[0];
(*nir).info.workgroup_size[1] = workgroup[1];
(*nir).info.workgroup_size[2] = workgroup[2];
}
}
pub fn variables_with_mode(
&mut self,
mode: nir_variable_mode,
) -> impl Iterator<Item = &mut nir_variable> {
self.variables()
.filter(move |v| v.data.mode() & mode.0 != 0)
}
pub fn extract_constant_initializers(&self) {
let nir = self.nir.as_ptr();
unsafe {
if (*nir).constant_data_size > 0 {
assert!((*nir).constant_data.is_null());
(*nir).constant_data = rzalloc_size(nir.cast(), (*nir).constant_data_size as usize);
nir_gather_explicit_io_initializers(
nir,
(*nir).constant_data,
(*nir).constant_data_size as usize,
nir_variable_mode::nir_var_mem_constant,
);
}
}
}
pub fn has_constant(&self) -> bool {
unsafe {
!self.nir.as_ref().constant_data.is_null() && self.nir.as_ref().constant_data_size > 0
}
}
pub fn get_constant_buffer(&self) -> &[u8] {
unsafe {
let nir = self.nir.as_ref();
slice::from_raw_parts(nir.constant_data.cast(), nir.constant_data_size as usize)
}
}
pub fn add_var(
&self,
mode: nir_variable_mode,
glsl_type: *const glsl_type,
loc: usize,
name: &str,
) -> *mut nir_variable {
let name = CString::new(name).unwrap();
unsafe {
let var = nir_variable_create(self.nir.as_ptr(), mode, glsl_type, name.as_ptr());
(*var).data.location = loc.try_into().unwrap();
var
}
}
}
impl Drop for NirShader {
fn drop(&mut self) {
unsafe { ralloc_free(self.nir.as_ptr().cast()) };

View File

@@ -1,5 +1,6 @@
extern crate mesa_rust_gen;
use crate::compiler::nir::*;
use crate::pipe::resource::*;
use crate::pipe::transfer::*;
@@ -97,6 +98,84 @@ impl PipeContext {
unsafe { self.pipe.as_ref().blit.unwrap()(self.pipe.as_ptr(), &blit_info) }
}
pub fn create_compute_state(
&self,
nir: &NirShader,
input_mem: u32,
local_mem: u32,
) -> *mut c_void {
let state = pipe_compute_state {
ir_type: pipe_shader_ir::PIPE_SHADER_IR_NIR,
prog: nir.dup_for_driver().cast(),
req_input_mem: input_mem,
req_local_mem: local_mem,
req_private_mem: 0,
};
unsafe { self.pipe.as_ref().create_compute_state.unwrap()(self.pipe.as_ptr(), &state) }
}
pub fn bind_compute_state(&self, state: *mut c_void) {
unsafe { self.pipe.as_ref().bind_compute_state.unwrap()(self.pipe.as_ptr(), state) }
}
pub fn delete_compute_state(&self, state: *mut c_void) {
unsafe { self.pipe.as_ref().delete_compute_state.unwrap()(self.pipe.as_ptr(), state) }
}
pub fn launch_grid(
&self,
work_dim: u32,
block: [u32; 3],
grid: [u32; 3],
grid_base: [u32; 3],
input: &[u8],
) {
let info = pipe_grid_info {
pc: 0,
input: input.as_ptr().cast(),
work_dim: work_dim,
block: block,
last_block: [0; 3],
grid: grid,
grid_base: grid_base,
indirect: ptr::null_mut(),
indirect_offset: 0,
};
unsafe { self.pipe.as_ref().launch_grid.unwrap()(self.pipe.as_ptr(), &info) }
}
pub fn set_global_binding(&self, res: &[Option<Arc<PipeResource>>], out: &mut [*mut u32]) {
let mut res: Vec<_> = res
.iter()
.map(|o| o.as_ref().map_or(ptr::null_mut(), |r| r.pipe()))
.collect();
unsafe {
self.pipe.as_ref().set_global_binding.unwrap()(
self.pipe.as_ptr(),
0,
res.len() as u32,
res.as_mut_ptr(),
out.as_mut_ptr(),
)
}
}
pub fn clear_global_binding(&self, count: u32) {
unsafe {
self.pipe.as_ref().set_global_binding.unwrap()(
self.pipe.as_ptr(),
0,
count,
ptr::null_mut(),
ptr::null_mut(),
)
}
}
pub fn memory_barrier(&self, barriers: u32) {
unsafe { self.pipe.as_ref().memory_barrier.unwrap()(self.pipe.as_ptr(), barriers) }
}
}
impl Drop for PipeContext {
@@ -109,8 +188,14 @@ impl Drop for PipeContext {
fn has_required_cbs(c: &pipe_context) -> bool {
c.destroy.is_some()
&& c.bind_compute_state.is_some()
&& c.blit.is_some()
&& c.buffer_map.is_some()
&& c.buffer_subdata.is_some()
&& c.buffer_unmap.is_some()
&& c.create_compute_state.is_some()
&& c.delete_compute_state.is_some()
&& c.launch_grid.is_some()
&& c.memory_barrier.is_some()
&& c.set_global_binding.is_some()
}

View File

@@ -1,6 +1,7 @@
extern crate mesa_rust_gen;
extern crate mesa_rust_util;
use crate::compiler::nir::NirShader;
use crate::pipe::context::*;
use crate::pipe::device::*;
use crate::pipe::resource::*;
@@ -151,6 +152,39 @@ impl PipeScreen {
let s = &mut unsafe { *self.screen };
unsafe { s.is_format_supported.unwrap()(self.screen, format, target, 0, 0, bindings) }
}
pub fn nir_shader_compiler_options(
&self,
shader: pipe_shader_type,
) -> *const nir_shader_compiler_options {
unsafe {
(*self.screen).get_compiler_options.unwrap()(
self.screen,
pipe_shader_ir::PIPE_SHADER_IR_NIR,
shader,
)
.cast()
}
}
pub fn shader_cache(&self) -> *mut disk_cache {
let s = &mut unsafe { *self.screen };
if let Some(func) = s.get_disk_shader_cache {
unsafe { func(self.screen) }
} else {
ptr::null_mut()
}
}
pub fn finalize_nir(&self, nir: &NirShader) {
let s = &mut unsafe { *self.screen };
if let Some(func) = s.finalize_nir {
unsafe {
func(s, nir.get_nir().cast());
}
}
}
}
impl Drop for PipeScreen {
@@ -165,6 +199,7 @@ fn has_required_cbs(screen: *mut pipe_screen) -> bool {
let s = unsafe { *screen };
s.context_create.is_some()
&& s.destroy.is_some()
&& s.get_compiler_options.is_some()
&& s.get_compute_param.is_some()
&& s.get_name.is_some()
&& s.get_param.is_some()

View File

@@ -149,13 +149,16 @@ rusticl_mesa_bindings_inline_wrapper = static_library(
'mesa_bindings_inline_wrapper',
[
'rusticl_mesa_inline_bindings_wrapper.c',
'rusticl_mesa_inline_bindings_wrapper.h'
'rusticl_mesa_inline_bindings_wrapper.h',
'rusticl_nir.c',
'rusticl_nir.h',
],
gnu_symbol_visibility : 'hidden',
include_directories : [
inc_gallium,
inc_gallium_aux,
inc_include,
inc_nir,
inc_src,
],
c_args : pre_args,
@@ -179,16 +182,23 @@ rusticl_mesa_bindings_rs = rust.bindgen(
args : [
rusticl_bindgen_args,
'--whitelist-function', 'clc_.*',
'--whitelist-function', 'glsl_.*',
'--whitelist-function', 'nir_.*',
'--whitelist-function', 'pipe_.*',
'--whitelist-function', 'ralloc_.*',
'--whitelist-function', 'rusticl_.*',
'--whitelist-function', 'rz?alloc_.*',
'--whitelist-function', 'spirv_.*',
'--whitelist-type', 'pipe_endian',
'--whitelist-type', 'clc_kernel_arg_access_qualifier',
'--bitfield-enum', 'clc_kernel_arg_access_qualifier',
'--whitelist-type', 'clc_kernel_arg_type_qualifier',
'--bitfield-enum', 'clc_kernel_arg_type_qualifier',
'--bitfield-enum', 'nir_opt_if_options',
'--bitfield-enum', 'nir_variable_mode',
'--whitelist-type', 'float_controls',
'--whitelist-var', 'PIPE_.*',
'--bitfield-enum', 'pipe_map_flags',
'--allowlist-var', 'stderr',
],
)

View File

@@ -1,11 +1,13 @@
#include "rusticl_mesa_inline_bindings_wrapper.h"
#include "compiler/clc/clc.h"
#include "nir.h"
#include "nir_types.h"
#include "spirv/nir_spirv.h"
#include "pipe/p_context.h"
#include "pipe/p_defines.h"
#include "pipe/p_screen.h"
#include "pipe/p_state.h"
#include "pipe-loader/pipe_loader.h"
#include "rusticl_nir.h"

View File

@@ -0,0 +1,38 @@
#include "nir.h"
#include "nir_builder.h"
#include "rusticl_nir.h"
static bool
rusticl_lower_intrinsics_filter(const nir_instr* instr, const void* state)
{
return instr->type == nir_instr_type_intrinsic;
}
static nir_ssa_def*
rusticl_lower_intrinsics_instr(
nir_builder *b,
nir_instr *instr,
void* _state
) {
nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(instr);
struct rusticl_lower_state *state = _state;
switch (intrinsic->intrinsic) {
case nir_intrinsic_load_constant_base_ptr:
return nir_load_var(b, state->const_buf);
default:
return NULL;
}
}
bool
rusticl_lower_intrinsics(nir_shader *nir, struct rusticl_lower_state* state)
{
return nir_shader_lower_instructions(
nir,
rusticl_lower_intrinsics_filter,
rusticl_lower_intrinsics_instr,
state
);
}

View File

@@ -0,0 +1,5 @@
struct rusticl_lower_state {
nir_variable *const_buf;
};
bool rusticl_lower_intrinsics(nir_shader *nir, struct rusticl_lower_state *state);