rusticl/kernel: move most of the code in launch inside the closure

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29527>
This commit is contained in:
Karol Herbst
2024-06-03 18:52:27 +02:00
committed by Marge Bot
parent 436122cb10
commit bb2453c649

View File

@@ -940,70 +940,77 @@ impl Kernel {
grid: &[usize], grid: &[usize],
offsets: &[usize], offsets: &[usize],
) -> CLResult<EventSig> { ) -> CLResult<EventSig> {
let nir_kernel_build = self.builds.get(q.device).unwrap().clone(); // Clone all the data we need to execute this kernel
let kernel_info = Arc::clone(&self.kernel_info);
let arg_values = self.arg_values().clone();
let nir_kernel_build = Arc::clone(&self.builds[q.device]);
// operations we want to report errors to the clients
let mut block = create_kernel_arr::<u32>(block, 1)?; let mut block = create_kernel_arr::<u32>(block, 1)?;
let mut grid = create_kernel_arr::<usize>(grid, 1)?; let mut grid = create_kernel_arr::<usize>(grid, 1)?;
let offsets = create_kernel_arr::<usize>(offsets, 0)?; let offsets = create_kernel_arr::<usize>(offsets, 0)?;
let mut workgroup_id_offset_loc = None;
let mut input: Vec<u8> = Vec::new();
let mut resource_info = Vec::new();
// Set it once so we get the alignment padding right
let static_local_size: u64 = nir_kernel_build.shared_size;
let mut variable_local_size: u64 = static_local_size;
let printf_size = q.device.printf_buffer_size() as u32;
let mut samplers = Vec::new();
let mut iviews = Vec::new();
let mut sviews = Vec::new();
let mut tex_formats: Vec<u16> = Vec::new();
let mut tex_orders: Vec<u16> = Vec::new();
let mut img_formats: Vec<u16> = Vec::new();
let mut img_orders: Vec<u16> = Vec::new();
let null_ptr;
let null_ptr_v3;
if q.device.address_bits() == 64 {
null_ptr = [0u8; 8].as_slice();
null_ptr_v3 = [0u8; 24].as_slice();
} else {
null_ptr = [0u8; 4].as_slice();
null_ptr_v3 = [0u8; 12].as_slice();
};
self.optimize_local_size(q.device, &mut grid, &mut block); self.optimize_local_size(q.device, &mut grid, &mut block);
let arg_values = self.arg_values(); Ok(Box::new(move |q, ctx| {
for (arg, val) in self.kernel_info.args.iter().zip(arg_values.iter()) { let mut workgroup_id_offset_loc = None;
if arg.dead { let mut input = Vec::new();
continue; let mut resource_info = Vec::new();
} // Set it once so we get the alignment padding right
let static_local_size: u64 = nir_kernel_build.shared_size;
let mut variable_local_size: u64 = static_local_size;
let printf_size = q.device.printf_buffer_size() as u32;
let mut samplers = Vec::new();
let mut iviews = Vec::new();
let mut sviews = Vec::new();
let mut tex_formats: Vec<u16> = Vec::new();
let mut tex_orders: Vec<u16> = Vec::new();
let mut img_formats: Vec<u16> = Vec::new();
let mut img_orders: Vec<u16> = Vec::new();
if arg.kind != KernelArgType::Image let null_ptr;
&& arg.kind != KernelArgType::RWImage let null_ptr_v3;
&& arg.kind != KernelArgType::Texture if q.device.address_bits() == 64 {
&& arg.kind != KernelArgType::Sampler null_ptr = [0u8; 8].as_slice();
{ null_ptr_v3 = [0u8; 24].as_slice();
input.resize(arg.offset, 0); } else {
} null_ptr = [0u8; 4].as_slice();
match val.as_ref().unwrap() { null_ptr_v3 = [0u8; 12].as_slice();
KernelArgValue::Constant(c) => input.extend_from_slice(c), };
KernelArgValue::Buffer(buffer) => {
let res = buffer.get_res_of_dev(q.device)?; for (arg, val) in kernel_info.args.iter().zip(arg_values.iter()) {
if q.device.address_bits() == 64 { if arg.dead {
let offset: u64 = buffer.offset as u64; continue;
input.extend_from_slice(&offset.to_ne_bytes());
} else {
let offset: u32 = buffer.offset as u32;
input.extend_from_slice(&offset.to_ne_bytes());
}
resource_info.push((res.clone(), arg.offset));
} }
KernelArgValue::Image(image) => {
let res = image.get_res_of_dev(q.device)?;
// If resource is a buffer, the image was created from a buffer. Use strides and if arg.kind != KernelArgType::Image
// dimensions of the image then. && arg.kind != KernelArgType::RWImage
let app_img_info = && arg.kind != KernelArgType::Texture
if res.as_ref().is_buffer() && image.mem_type == CL_MEM_OBJECT_IMAGE2D { && arg.kind != KernelArgType::Sampler
{
input.resize(arg.offset, 0);
}
match val.as_ref().unwrap() {
KernelArgValue::Constant(c) => input.extend_from_slice(c),
KernelArgValue::Buffer(buffer) => {
let res = buffer.get_res_of_dev(q.device)?;
if q.device.address_bits() == 64 {
let offset: u64 = buffer.offset as u64;
input.extend_from_slice(&offset.to_ne_bytes());
} else {
let offset: u32 = buffer.offset as u32;
input.extend_from_slice(&offset.to_ne_bytes());
}
resource_info.push((res.clone(), arg.offset));
}
KernelArgValue::Image(image) => {
let res = image.get_res_of_dev(q.device)?;
// If resource is a buffer, the image was created from a buffer. Use strides and
// dimensions of the image then.
let app_img_info = if res.as_ref().is_buffer()
&& image.mem_type == CL_MEM_OBJECT_IMAGE2D
{
Some(AppImgInfo::new( Some(AppImgInfo::new(
image.image_desc.row_pitch()? / image.image_elem_size as u32, image.image_desc.row_pitch()? / image.image_elem_size as u32,
image.image_desc.width()?, image.image_desc.width()?,
@@ -1013,150 +1020,132 @@ impl Kernel {
None None
}; };
let format = image.pipe_format; let format = image.pipe_format;
let (formats, orders) = if arg.kind == KernelArgType::Image { let (formats, orders) = if arg.kind == KernelArgType::Image {
iviews.push(res.pipe_image_view( iviews.push(res.pipe_image_view(
format, format,
false, false,
image.pipe_image_host_access(), image.pipe_image_host_access(),
app_img_info.as_ref(), app_img_info.as_ref(),
)); ));
(&mut img_formats, &mut img_orders) (&mut img_formats, &mut img_orders)
} else if arg.kind == KernelArgType::RWImage { } else if arg.kind == KernelArgType::RWImage {
iviews.push(res.pipe_image_view( iviews.push(res.pipe_image_view(
format, format,
true, true,
image.pipe_image_host_access(), image.pipe_image_host_access(),
app_img_info.as_ref(), app_img_info.as_ref(),
)); ));
(&mut img_formats, &mut img_orders) (&mut img_formats, &mut img_orders)
} else { } else {
sviews.push((res.clone(), format, app_img_info)); sviews.push((res.clone(), format, app_img_info));
(&mut tex_formats, &mut tex_orders) (&mut tex_formats, &mut tex_orders)
}; };
let binding = arg.binding as usize; let binding = arg.binding as usize;
assert!(binding >= formats.len()); assert!(binding >= formats.len());
formats.resize(binding, 0); formats.resize(binding, 0);
orders.resize(binding, 0); orders.resize(binding, 0);
formats.push(image.image_format.image_channel_data_type as u16); formats.push(image.image_format.image_channel_data_type as u16);
orders.push(image.image_format.image_channel_order as u16); orders.push(image.image_format.image_channel_order as u16);
}
KernelArgValue::LocalMem(size) => {
// TODO 32 bit
let pot = cmp::min(*size, 0x80);
variable_local_size =
align(variable_local_size, pot.next_power_of_two() as u64);
if q.device.address_bits() == 64 {
let variable_local_size: [u8; 8] = variable_local_size.to_ne_bytes();
input.extend_from_slice(&variable_local_size);
} else {
let variable_local_size: [u8; 4] =
(variable_local_size as u32).to_ne_bytes();
input.extend_from_slice(&variable_local_size);
} }
variable_local_size += *size as u64; KernelArgValue::LocalMem(size) => {
} // TODO 32 bit
KernelArgValue::Sampler(sampler) => { let pot = cmp::min(*size, 0x80);
samplers.push(sampler.pipe()); variable_local_size =
} align(variable_local_size, pot.next_power_of_two() as u64);
KernelArgValue::None => { if q.device.address_bits() == 64 {
assert!( let variable_local_size: [u8; 8] = variable_local_size.to_ne_bytes();
arg.kind == KernelArgType::MemGlobal input.extend_from_slice(&variable_local_size);
|| arg.kind == KernelArgType::MemConstant } else {
); let variable_local_size: [u8; 4] =
input.extend_from_slice(null_ptr); (variable_local_size as u32).to_ne_bytes();
} input.extend_from_slice(&variable_local_size);
} }
} variable_local_size += *size as u64;
}
// subtract the shader local_size as we only request something on top of that. KernelArgValue::Sampler(sampler) => {
variable_local_size -= static_local_size; samplers.push(sampler.pipe());
}
let mut printf_buf = None; KernelArgValue::None => {
for arg in &self.kernel_info.internal_args { assert!(
if arg.offset > input.len() { arg.kind == KernelArgType::MemGlobal
input.resize(arg.offset, 0); || arg.kind == KernelArgType::MemConstant
} );
match arg.kind { input.extend_from_slice(null_ptr);
InternalKernelArgType::ConstantBuffer => {
assert!(nir_kernel_build.constant_buffer.is_some());
input.extend_from_slice(null_ptr);
resource_info.push((
nir_kernel_build.constant_buffer.clone().unwrap(),
arg.offset,
));
}
InternalKernelArgType::GlobalWorkOffsets => {
if q.device.address_bits() == 64 {
input.extend_from_slice(unsafe {
as_byte_slice(&[
offsets[0] as u64,
offsets[1] as u64,
offsets[2] as u64,
])
});
} else {
input.extend_from_slice(unsafe {
as_byte_slice(&[
offsets[0] as u32,
offsets[1] as u32,
offsets[2] as u32,
])
});
} }
} }
InternalKernelArgType::WorkGroupOffsets => {
workgroup_id_offset_loc = Some(input.len());
input.extend_from_slice(null_ptr_v3);
}
InternalKernelArgType::PrintfBuffer => {
let buf = Arc::new(
q.device
.screen
.resource_create_buffer(
printf_size,
ResourceType::Staging,
PIPE_BIND_GLOBAL,
)
.unwrap(),
);
input.extend_from_slice(null_ptr);
resource_info.push((buf.clone(), arg.offset));
printf_buf = Some(buf);
}
InternalKernelArgType::InlineSampler(cl) => {
samplers.push(Sampler::cl_to_pipe(cl));
}
InternalKernelArgType::FormatArray => {
input.extend_from_slice(unsafe { as_byte_slice(&tex_formats) });
input.extend_from_slice(unsafe { as_byte_slice(&img_formats) });
}
InternalKernelArgType::OrderArray => {
input.extend_from_slice(unsafe { as_byte_slice(&tex_orders) });
input.extend_from_slice(unsafe { as_byte_slice(&img_orders) });
}
InternalKernelArgType::WorkDim => {
input.extend_from_slice(&[work_dim as u8; 1]);
}
InternalKernelArgType::NumWorkgroups => {
input.extend_from_slice(unsafe {
as_byte_slice(&[grid[0] as u32, grid[1] as u32, grid[2] as u32])
});
}
} }
}
Ok(Box::new(move |q, ctx| { // subtract the shader local_size as we only request something on top of that.
let mut input = input.clone(); variable_local_size -= static_local_size;
let mut resources = Vec::with_capacity(resource_info.len()); let mut resources = Vec::with_capacity(resource_info.len());
let mut globals: Vec<*mut u32> = Vec::new(); let mut globals: Vec<*mut u32> = Vec::new();
let printf_format = &nir_kernel_build.printf_info; let printf_format = &nir_kernel_build.printf_info;
let mut printf_buf = None;
for arg in &kernel_info.internal_args {
if arg.offset > input.len() {
input.resize(arg.offset, 0);
}
match arg.kind {
InternalKernelArgType::ConstantBuffer => {
assert!(nir_kernel_build.constant_buffer.is_some());
input.extend_from_slice(null_ptr);
resource_info.push((
nir_kernel_build.constant_buffer.clone().unwrap(),
arg.offset,
));
}
InternalKernelArgType::GlobalWorkOffsets => {
input.extend_from_slice(unsafe { as_byte_slice(&offsets) });
}
InternalKernelArgType::WorkGroupOffsets => {
workgroup_id_offset_loc = Some(input.len());
input.extend_from_slice(null_ptr_v3);
}
InternalKernelArgType::PrintfBuffer => {
let buf = Arc::new(
q.device
.screen
.resource_create_buffer(
printf_size,
ResourceType::Staging,
PIPE_BIND_GLOBAL,
)
.unwrap(),
);
input.extend_from_slice(null_ptr);
resource_info.push((buf.clone(), arg.offset));
printf_buf = Some(buf);
}
InternalKernelArgType::InlineSampler(cl) => {
samplers.push(Sampler::cl_to_pipe(cl));
}
InternalKernelArgType::FormatArray => {
input.extend_from_slice(unsafe { as_byte_slice(&tex_formats) });
input.extend_from_slice(unsafe { as_byte_slice(&img_formats) });
}
InternalKernelArgType::OrderArray => {
input.extend_from_slice(unsafe { as_byte_slice(&tex_orders) });
input.extend_from_slice(unsafe { as_byte_slice(&img_orders) });
}
InternalKernelArgType::WorkDim => {
input.extend_from_slice(&[work_dim as u8; 1]);
}
InternalKernelArgType::NumWorkgroups => {
input.extend_from_slice(unsafe {
as_byte_slice(&[grid[0] as u32, grid[1] as u32, grid[2] as u32])
});
}
}
}
let mut sviews: Vec<_> = sviews let mut sviews: Vec<_> = sviews
.iter() .iter()
.map(|(s, f, aii)| ctx.create_sampler_view(s, *f, aii.as_ref())) .map(|(s, f, aii)| ctx.create_sampler_view(s, *f, aii.as_ref()))