rusticl/memory: complete rework on how mapping is implemented

Previously we tried to map GPU resources directly wherever we could,
however this was always causing random issues and was overall not very
robust.

Now we just allocate a staging buffer on the host to copy into, with some
short-cut for host_ptr allocations.

Fixes the following tests across various drivers:

1Dbuffer tests (radeonsi, zink)
buffers map_read_* (zink)
multiple_device_context context_* (zink)
thread_dimensions quick_* (zink)
math_brute_force (zink)

Cc: mesa-stable
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30082>
This commit is contained in:
Karol Herbst
2024-07-09 10:27:07 +02:00
committed by Marge Bot
parent 00180933ad
commit 7b22bc617b
3 changed files with 213 additions and 315 deletions

View File

@@ -867,7 +867,7 @@ Rusticl OpenCL 1.2 -- all DONE:
Separate compilation and linking of programs DONE Separate compilation and linking of programs DONE
Extend cl_mem_flags DONE Extend cl_mem_flags DONE
clEnqueueFillBuffer, clEnqueueFillImage DONE clEnqueueFillBuffer, clEnqueueFillImage DONE
Add CL_MAP_WRITE_INVALIDATE_REGION to cl_map_flags in progress (flag is ignored) Add CL_MAP_WRITE_INVALIDATE_REGION to cl_map_flags DONE
New image types DONE New image types DONE
clCreateImage DONE clCreateImage DONE
clEnqueueMigrateMemObjects DONE clEnqueueMigrateMemObjects DONE

View File

@@ -1685,14 +1685,20 @@ fn enqueue_map_buffer(
return Err(CL_INVALID_CONTEXT); return Err(CL_INVALID_CONTEXT);
} }
let ptr = b.map(q.device, offset)?; let ptr = b.map(size, offset, map_flags != CL_MAP_READ.into())?;
create_and_queue( create_and_queue(
q, q,
CL_COMMAND_MAP_BUFFER, CL_COMMAND_MAP_BUFFER,
evs, evs,
event, event,
block, block,
Box::new(move |q, ctx| b.sync_shadow(q, ctx, ptr)), Box::new(move |q, ctx| {
if map_flags != CL_MAP_WRITE_INVALIDATE_REGION.into() {
b.sync_map(q, ctx, ptr)
} else {
Ok(())
}
}),
)?; )?;
Ok(ptr.as_ptr()) Ok(ptr.as_ptr())
@@ -2141,24 +2147,29 @@ fn enqueue_map_image(
}; };
let ptr = i.map( let ptr = i.map(
q.device, origin,
&origin, region,
unsafe { image_row_pitch.as_mut().unwrap() }, unsafe { image_row_pitch.as_mut().unwrap() },
image_slice_pitch, image_slice_pitch,
map_flags != CL_MAP_READ.into(),
)?; )?;
// SAFETY: it's required that applications do not cause data races
let sync_ptr = unsafe { MutMemoryPtr::from_ptr(ptr) };
create_and_queue( create_and_queue(
q, q,
CL_COMMAND_MAP_IMAGE, CL_COMMAND_MAP_IMAGE,
evs, evs,
event, event,
block, block,
Box::new(move |q, ctx| i.sync_shadow(q, ctx, sync_ptr)), Box::new(move |q, ctx| {
if map_flags != CL_MAP_WRITE_INVALIDATE_REGION.into() {
i.sync_map(q, ctx, ptr)
} else {
Ok(())
}
}),
)?; )?;
Ok(ptr) Ok(ptr.as_ptr())
//• CL_INVALID_IMAGE_SIZE if image dimensions (image width, height, specified or compute row and/or slice pitch) for image are not supported by device associated with queue. //• CL_INVALID_IMAGE_SIZE if image dimensions (image width, height, specified or compute row and/or slice pitch) for image are not supported by device associated with queue.
//• CL_IMAGE_FORMAT_NOT_SUPPORTED if image format (image channel order and data type) for image are not supported by device associated with queue. //• CL_IMAGE_FORMAT_NOT_SUPPORTED if image format (image channel order and data type) for image are not supported by device associated with queue.

View File

@@ -17,10 +17,14 @@ use mesa_rust::pipe::transfer::*;
use mesa_rust_gen::*; use mesa_rust_gen::*;
use mesa_rust_util::math::*; use mesa_rust_util::math::*;
use mesa_rust_util::properties::Properties; use mesa_rust_util::properties::Properties;
use mesa_rust_util::ptr::AllocSize;
use mesa_rust_util::ptr::TrackedPointers;
use rusticl_opencl_gen::*; use rusticl_opencl_gen::*;
use std::alloc;
use std::alloc::Layout;
use std::cmp; use std::cmp;
use std::collections::hash_map::Entry; use std::collections::btree_map::Entry;
use std::collections::HashMap; use std::collections::HashMap;
use std::convert::TryInto; use std::convert::TryInto;
use std::mem; use std::mem;
@@ -31,84 +35,45 @@ use std::ptr;
use std::sync::Arc; use std::sync::Arc;
use std::sync::Mutex; use std::sync::Mutex;
struct MappingTransfer { struct Mapping<T> {
tx: PipeTransfer, layout: Layout,
shadow: Option<PipeResource>, writes: bool,
pending: u32, ptr: Option<MutMemoryPtr>,
count: u32,
inner: T,
} }
impl MappingTransfer { impl<T> Drop for Mapping<T> {
fn new(tx: PipeTransfer, shadow: Option<PipeResource>) -> Self { fn drop(&mut self) {
MappingTransfer { if let Some(ptr) = &self.ptr {
tx: tx, unsafe {
shadow: shadow, alloc::dealloc(ptr.as_ptr().cast(), self.layout);
pending: 1, }
} }
} }
} }
struct Mappings { impl<T> AllocSize<usize> for Mapping<T> {
tx: HashMap<&'static Device, MappingTransfer>, fn size(&self) -> usize {
maps: HashMap<usize, u32>, self.layout.size()
}
} }
impl Mappings { impl<T> Deref for Mapping<T> {
fn new() -> Mutex<Self> { type Target = T;
Mutex::new(Mappings {
tx: HashMap::new(), fn deref(&self) -> &Self::Target {
maps: HashMap::new(), &self.inner
})
} }
}
fn contains_ptr(&self, ptr: *mut c_void) -> bool { struct BufferMapping {
let ptr = ptr as usize; offset: usize,
self.maps.contains_key(&ptr) }
}
fn mark_pending(&mut self, dev: &Device) { struct ImageMapping {
self.tx.get_mut(dev).unwrap().pending += 1; origin: CLVec<usize>,
} region: CLVec<usize>,
fn unmark_pending(&mut self, dev: &Device) {
if let Some(tx) = self.tx.get_mut(dev) {
tx.pending -= 1;
}
}
fn increase_ref(&mut self, dev: &Device, ptr: *mut c_void) -> bool {
let ptr = ptr as usize;
let res = self.maps.is_empty();
*self.maps.entry(ptr).or_default() += 1;
self.unmark_pending(dev);
res
}
fn decrease_ref(&mut self, ptr: *mut c_void, dev: &Device) -> (bool, Option<&PipeResource>) {
let ptr = ptr as usize;
if let Some(r) = self.maps.get_mut(&ptr) {
*r -= 1;
if *r == 0 {
self.maps.remove(&ptr);
}
if self.maps.is_empty() {
let shadow = self.tx.get(dev).and_then(|tx| tx.shadow.as_ref());
return (true, shadow);
}
}
(false, None)
}
fn clean_up_tx(&mut self, dev: &Device, ctx: &PipeContext) {
if self.maps.is_empty() {
if let Some(tx) = self.tx.get(&dev) {
if tx.pending == 0 {
self.tx.remove(dev).unwrap().tx.with_ctx(ctx);
}
}
}
}
} }
#[repr(transparent)] #[repr(transparent)]
@@ -133,6 +98,14 @@ impl ConstMemoryPtr {
} }
} }
impl From<MutMemoryPtr> for ConstMemoryPtr {
fn from(value: MutMemoryPtr) -> Self {
Self {
ptr: value.ptr.cast(),
}
}
}
#[repr(transparent)] #[repr(transparent)]
#[derive(Clone, Copy)] #[derive(Clone, Copy)]
pub struct MutMemoryPtr { pub struct MutMemoryPtr {
@@ -172,6 +145,13 @@ impl Deref for Mem {
} }
impl Mem { impl Mem {
pub fn is_mapped_ptr(&self, ptr: *mut c_void) -> bool {
match self {
Self::Buffer(b) => b.is_mapped_ptr(ptr),
Self::Image(i) => i.is_mapped_ptr(ptr),
}
}
pub fn unmap(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()> { pub fn unmap(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()> {
match self { match self {
Self::Buffer(b) => b.unmap(q, ctx, ptr), Self::Buffer(b) => b.unmap(q, ctx, ptr),
@@ -219,12 +199,12 @@ pub struct MemBase {
pub cbs: Mutex<Vec<MemCB>>, pub cbs: Mutex<Vec<MemCB>>,
pub gl_obj: Option<GLObject>, pub gl_obj: Option<GLObject>,
res: Option<HashMap<&'static Device, Arc<PipeResource>>>, res: Option<HashMap<&'static Device, Arc<PipeResource>>>,
maps: Mutex<Mappings>,
} }
pub struct Buffer { pub struct Buffer {
base: MemBase, base: MemBase,
pub offset: usize, pub offset: usize,
maps: Mutex<TrackedPointers<usize, Mapping<BufferMapping>>>,
} }
pub struct Image { pub struct Image {
@@ -233,6 +213,7 @@ pub struct Image {
pub pipe_format: pipe_format, pub pipe_format: pipe_format,
pub image_desc: cl_image_desc, pub image_desc: cl_image_desc,
pub image_elem_size: u8, pub image_elem_size: u8,
maps: Mutex<TrackedPointers<usize, Mapping<ImageMapping>>>,
} }
impl Deref for Buffer { impl Deref for Buffer {
@@ -436,9 +417,9 @@ impl MemBase {
gl_obj: None, gl_obj: None,
cbs: Mutex::new(Vec::new()), cbs: Mutex::new(Vec::new()),
res: Some(buffer), res: Some(buffer),
maps: Mappings::new(),
}, },
offset: 0, offset: 0,
maps: Mutex::new(TrackedPointers::new()),
})) }))
} }
@@ -467,9 +448,9 @@ impl MemBase {
gl_obj: None, gl_obj: None,
cbs: Mutex::new(Vec::new()), cbs: Mutex::new(Vec::new()),
res: None, res: None,
maps: Mappings::new(),
}, },
offset: offset, offset: offset,
maps: Mutex::new(TrackedPointers::new()),
}) })
} }
@@ -550,12 +531,12 @@ impl MemBase {
gl_obj: None, gl_obj: None,
cbs: Mutex::new(Vec::new()), cbs: Mutex::new(Vec::new()),
res: texture, res: texture,
maps: Mappings::new(),
}, },
image_format: *image_format, image_format: *image_format,
pipe_format: pipe_format, pipe_format: pipe_format,
image_desc: api_image_desc, image_desc: api_image_desc,
image_elem_size: image_elem_size, image_elem_size: image_elem_size,
maps: Mutex::new(TrackedPointers::new()),
})) }))
} }
@@ -656,13 +637,13 @@ impl MemBase {
}), }),
cbs: Mutex::new(Vec::new()), cbs: Mutex::new(Vec::new()),
res: Some(texture), res: Some(texture),
maps: Mappings::new(),
}; };
Ok(if rusticl_type == RusticlTypes::Buffer { Ok(if rusticl_type == RusticlTypes::Buffer {
Arc::new(Buffer { Arc::new(Buffer {
base: base, base: base,
offset: gl_mem_props.offset as usize, offset: gl_mem_props.offset as usize,
maps: Mutex::new(TrackedPointers::new()),
}) })
.into_cl() .into_cl()
} else { } else {
@@ -683,6 +664,7 @@ impl MemBase {
..Default::default() ..Default::default()
}, },
image_elem_size: gl_mem_props.pixel_size, image_elem_size: gl_mem_props.pixel_size,
maps: Mutex::new(TrackedPointers::new()),
}) })
.into_cl() .into_cl()
}) })
@@ -729,8 +711,47 @@ impl MemBase {
self.host_ptr as *mut c_void self.host_ptr as *mut c_void
} }
pub fn is_mapped_ptr(&self, ptr: *mut c_void) -> bool { fn is_pure_user_memory(&self, d: &Device) -> CLResult<bool> {
self.maps.lock().unwrap().contains_ptr(ptr) let r = self.get_res_of_dev(d)?;
Ok(r.is_user)
}
fn map<T>(
&self,
offset: usize,
layout: Layout,
writes: bool,
maps: &Mutex<TrackedPointers<usize, Mapping<T>>>,
inner: T,
) -> CLResult<MutMemoryPtr> {
let host_ptr = self.host_ptr();
let ptr = unsafe {
let ptr = if !host_ptr.is_null() {
host_ptr.add(offset)
} else {
alloc::alloc(layout).cast()
};
MutMemoryPtr::from_ptr(ptr)
};
match maps.lock().unwrap().entry(ptr.as_ptr() as usize) {
Entry::Occupied(mut e) => {
debug_assert!(!host_ptr.is_null());
e.get_mut().count += 1;
}
Entry::Vacant(e) => {
e.insert(Mapping {
layout: layout,
writes: writes,
ptr: host_ptr.is_null().then_some(ptr),
count: 1,
inner: inner,
});
}
}
Ok(ptr)
} }
} }
@@ -740,10 +761,6 @@ impl Drop for MemBase {
for cb in cbs.into_iter().rev() { for cb in cbs.into_iter().rev() {
cb.call(self); cb.call(self);
} }
for (d, tx) in self.maps.get_mut().unwrap().tx.drain() {
d.helper_ctx().unmap(tx.tx);
}
} }
} }
@@ -895,25 +912,20 @@ impl Buffer {
Ok(()) Ok(())
} }
pub fn map(&self, dev: &'static Device, offset: usize) -> CLResult<MutMemoryPtr> { fn is_mapped_ptr(&self, ptr: *mut c_void) -> bool {
let ptr = if self.has_user_shadow_buffer(dev)? { self.maps.lock().unwrap().contains_key(ptr as usize)
self.host_ptr() }
} else {
let mut lock = self.maps.lock().unwrap();
if let Entry::Vacant(e) = lock.tx.entry(dev) { pub fn map(&self, size: usize, offset: usize, writes: bool) -> CLResult<MutMemoryPtr> {
let (tx, res) = self.tx_raw_async(dev, RWFlags::RW)?; let layout =
e.insert(MappingTransfer::new(tx, res)); unsafe { Layout::from_size_align_unchecked(size, size_of::<[cl_ulong; 16]>()) };
} else { self.base.map(
lock.mark_pending(dev); offset,
} layout,
writes,
lock.tx.get(dev).unwrap().tx.ptr() &self.maps,
}; BufferMapping { offset: offset },
)
let ptr = unsafe { ptr.add(offset) };
// SAFETY: it's required that applications do not cause data races
Ok(unsafe { MutMemoryPtr::from_ptr(ptr) })
} }
pub fn read( pub fn read(
@@ -968,35 +980,18 @@ impl Buffer {
Ok(()) Ok(())
} }
// TODO: only sync on map when the memory is not mapped with discard pub fn sync_map(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()> {
pub fn sync_shadow(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()> { // no need to update
let ptr = ptr.as_ptr(); if self.is_pure_user_memory(q.device)? {
let mut lock = self.maps.lock().unwrap();
if !lock.increase_ref(q.device, ptr) {
return Ok(()); return Ok(());
} }
if self.has_user_shadow_buffer(q.device)? { let maps = self.maps.lock().unwrap();
self.read( let Some(mapping) = maps.find_alloc_precise(ptr.as_ptr() as usize) else {
q, return Err(CL_INVALID_VALUE);
ctx, };
0,
// SAFETY: it's required that applications do not cause data races self.read(q, ctx, mapping.offset, ptr, mapping.size())
unsafe { MutMemoryPtr::from_ptr(self.host_ptr()) },
self.size,
)
} else {
if let Some(shadow) = lock.tx.get(&q.device).and_then(|tx| tx.shadow.as_ref()) {
let res = self.get_res_of_dev(q.device)?;
let bx = create_pipe_box(
[self.offset, 0, 0].into(),
[self.size, 1, 1].into(),
CL_MEM_OBJECT_BUFFER,
)?;
ctx.resource_copy_region(res, shadow, &[0; 3], &bx);
}
Ok(())
}
} }
fn tx<'a>( fn tx<'a>(
@@ -1022,69 +1017,21 @@ impl Buffer {
.with_ctx(ctx)) .with_ctx(ctx))
} }
fn tx_raw_async(
&self,
dev: &Device,
rw: RWFlags,
) -> CLResult<(PipeTransfer, Option<PipeResource>)> {
let r = self.get_res_of_dev(dev)?;
let offset = self.offset.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?;
let size = self.size.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?;
let ctx = dev.helper_ctx();
let tx = if can_map_directly(dev, r) {
ctx.buffer_map_directly(r, offset, size, rw)
} else {
None
};
if let Some(tx) = tx {
Ok((tx, None))
} else {
let shadow = dev
.screen()
.resource_create_buffer(size as u32, ResourceType::Staging, 0)
.ok_or(CL_OUT_OF_RESOURCES)?;
let tx = ctx
.buffer_map_coherent(&shadow, 0, size, rw)
.ok_or(CL_OUT_OF_RESOURCES)?;
Ok((tx, Some(shadow)))
}
}
// TODO: only sync on unmap when the memory is not mapped for writing // TODO: only sync on unmap when the memory is not mapped for writing
pub fn unmap(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()> { pub fn unmap(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()> {
let ptr = ptr.as_ptr(); let mapping = match self.maps.lock().unwrap().entry(ptr.as_ptr() as usize) {
let mut lock = self.maps.lock().unwrap(); Entry::Vacant(_) => return Err(CL_INVALID_VALUE),
if !lock.contains_ptr(ptr) { Entry::Occupied(mut entry) => {
return Ok(()); entry.get_mut().count -= 1;
} (entry.get().count == 0).then(|| entry.remove())
let (needs_sync, shadow) = lock.decrease_ref(ptr, q.device);
if needs_sync {
if let Some(shadow) = shadow {
let res = self.get_res_of_dev(q.device)?;
let offset = self.offset.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?;
let bx = create_pipe_box(
CLVec::default(),
[self.size, 1, 1].into(),
CL_MEM_OBJECT_BUFFER,
)?;
ctx.resource_copy_region(shadow, res, &[offset, 0, 0], &bx);
} else if self.has_user_shadow_buffer(q.device)? {
self.write(
q,
ctx,
0,
// SAFETY: it's required that applications do not cause data races
unsafe { ConstMemoryPtr::from_ptr(self.host_ptr()) },
self.size,
)?;
} }
} };
lock.clean_up_tx(q.device, ctx); if let Some(mapping) = mapping {
if mapping.writes && !self.is_pure_user_memory(q.device)? {
self.write(q, ctx, mapping.offset, ptr.into(), mapping.size())?;
}
};
Ok(()) Ok(())
} }
@@ -1345,61 +1292,45 @@ impl Image {
Ok(()) Ok(())
} }
fn is_mapped_ptr(&self, ptr: *mut c_void) -> bool {
self.maps.lock().unwrap().contains_key(ptr as usize)
}
pub fn is_parent_buffer(&self) -> bool { pub fn is_parent_buffer(&self) -> bool {
matches!(self.parent, Some(Mem::Buffer(_))) matches!(self.parent, Some(Mem::Buffer(_)))
} }
pub fn map( pub fn map(
&self, &self,
dev: &'static Device, origin: CLVec<usize>,
origin: &CLVec<usize>, region: CLVec<usize>,
row_pitch: &mut usize, row_pitch: &mut usize,
slice_pitch: &mut usize, slice_pitch: &mut usize,
) -> CLResult<*mut c_void> { writes: bool,
// we might have a host_ptr shadow buffer or image created from buffer ) -> CLResult<MutMemoryPtr> {
let ptr = if self.has_user_shadow_buffer(dev)? { let pixel_size = self.image_format.pixel_size().unwrap() as usize;
*row_pitch = self.image_desc.image_row_pitch;
*slice_pitch = self.image_desc.image_slice_pitch;
self.host_ptr()
} else if let Some(Mem::Buffer(buffer)) = &self.parent {
*row_pitch = self.image_desc.image_row_pitch;
*slice_pitch = self.image_desc.image_slice_pitch;
buffer.map(dev, 0)?.as_ptr()
} else {
let mut lock = self.maps.lock().unwrap();
if let Entry::Vacant(e) = lock.tx.entry(dev) { *row_pitch = self.image_desc.row_pitch()? as usize;
let bx = self.image_desc.bx()?; *slice_pitch = self.image_desc.slice_pitch();
let (tx, res) = self.tx_raw_async(dev, &bx, RWFlags::RW)?;
e.insert(MappingTransfer::new(tx, res));
} else {
lock.mark_pending(dev);
}
let tx = &lock.tx.get(dev).unwrap().tx; let (offset, size) =
CLVec::calc_offset_size(origin, region, [pixel_size, *row_pitch, *slice_pitch]);
if self.image_desc.dims() > 1 { let layout;
*row_pitch = tx.row_pitch() as usize; unsafe {
} layout = Layout::from_size_align_unchecked(size, size_of::<[u32; 4]>());
if self.image_desc.dims() > 2 || self.image_desc.is_array() { }
*slice_pitch = tx.slice_pitch();
}
tx.ptr() self.base.map(
}; offset,
layout,
let ptr = unsafe { writes,
ptr.add( &self.maps,
*origin ImageMapping {
* [ origin: origin,
self.image_format.pixel_size().unwrap().into(), region: region,
*row_pitch, },
*slice_pitch, )
],
)
};
Ok(ptr)
} }
pub fn pipe_image_host_access(&self) -> u16 { pub fn pipe_image_host_access(&self) -> u16 {
@@ -1466,32 +1397,29 @@ impl Image {
} }
// TODO: only sync on map when the memory is not mapped with discard // TODO: only sync on map when the memory is not mapped with discard
pub fn sync_shadow(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()> { pub fn sync_map(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()> {
let ptr = ptr.as_ptr(); // no need to update
let mut lock = self.maps.lock().unwrap(); if self.is_pure_user_memory(q.device)? {
if !lock.increase_ref(q.device, ptr) {
return Ok(()); return Ok(());
} }
if self.has_user_shadow_buffer(q.device)? { let maps = self.maps.lock().unwrap();
self.read( let Some(mapping) = maps.find_alloc_precise(ptr.as_ptr() as usize) else {
// SAFETY: it's required that applications do not cause data races return Err(CL_INVALID_VALUE);
unsafe { MutMemoryPtr::from_ptr(self.host_ptr()) }, };
q,
ctx, let row_pitch = self.image_desc.row_pitch()? as usize;
&self.image_desc.size(), let slice_pitch = self.image_desc.slice_pitch();
&CLVec::default(),
self.image_desc.image_row_pitch, self.read(
self.image_desc.image_slice_pitch, ptr,
) q,
} else { ctx,
if let Some(shadow) = lock.tx.get(q.device).and_then(|tx| tx.shadow.as_ref()) { &mapping.region,
let res = self.get_res_of_dev(q.device)?; &mapping.origin,
let bx = self.image_desc.bx()?; row_pitch,
ctx.resource_copy_region(res, shadow, &[0, 0, 0], &bx); slice_pitch,
} )
Ok(())
}
} }
fn tx_image<'a>( fn tx_image<'a>(
@@ -1508,74 +1436,33 @@ impl Image {
.with_ctx(ctx)) .with_ctx(ctx))
} }
fn tx_raw_async(
&self,
dev: &Device,
bx: &pipe_box,
rw: RWFlags,
) -> CLResult<(PipeTransfer, Option<PipeResource>)> {
let r = self.get_res_of_dev(dev)?;
let ctx = dev.helper_ctx();
let tx = if can_map_directly(dev, r) {
ctx.texture_map_directly(r, bx, rw)
} else {
None
};
if let Some(tx) = tx {
Ok((tx, None))
} else {
let shadow = dev
.screen()
.resource_create_texture(
r.width(),
r.height(),
r.depth(),
r.array_size(),
cl_mem_type_to_texture_target(self.image_desc.image_type),
self.pipe_format,
ResourceType::Staging,
false,
)
.ok_or(CL_OUT_OF_RESOURCES)?;
let tx = ctx
.texture_map_coherent(&shadow, bx, rw)
.ok_or(CL_OUT_OF_RESOURCES)?;
Ok((tx, Some(shadow)))
}
}
// TODO: only sync on unmap when the memory is not mapped for writing // TODO: only sync on unmap when the memory is not mapped for writing
pub fn unmap(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()> { pub fn unmap(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()> {
let ptr = ptr.as_ptr(); let mapping = match self.maps.lock().unwrap().entry(ptr.as_ptr() as usize) {
let mut lock = self.maps.lock().unwrap(); Entry::Vacant(_) => return Err(CL_INVALID_VALUE),
if !lock.contains_ptr(ptr) { Entry::Occupied(mut entry) => {
return Ok(()); entry.get_mut().count -= 1;
} (entry.get().count == 0).then(|| entry.remove())
}
};
let (needs_sync, shadow) = lock.decrease_ref(ptr, q.device); let row_pitch = self.image_desc.row_pitch()? as usize;
if needs_sync { let slice_pitch = self.image_desc.slice_pitch();
if let Some(shadow) = shadow {
let res = self.get_res_of_dev(q.device)?; if let Some(mapping) = mapping {
let bx = self.image_desc.bx()?; if mapping.writes && !self.is_pure_user_memory(q.device)? {
ctx.resource_copy_region(shadow, res, &[0, 0, 0], &bx);
} else if self.has_user_shadow_buffer(q.device)? {
self.write( self.write(
// SAFETY: it's required that applications do not cause data races ptr.into(),
unsafe { ConstMemoryPtr::from_ptr(self.host_ptr()) },
q, q,
ctx, ctx,
&self.image_desc.size(), &mapping.region,
self.image_desc.image_row_pitch, row_pitch,
self.image_desc.image_slice_pitch, slice_pitch,
&CLVec::default(), &mapping.origin,
)?; )?;
} }
} }
lock.clean_up_tx(q.device, ctx);
Ok(()) Ok(())
} }