d3d12: Video Process - Implement get_processor_fence and async queing

Reviewed-by: Jesse Natalie <jenatali@microsoft.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23969>
This commit is contained in:
Sil Vilerino
2023-06-30 15:37:19 -04:00
committed by Marge Bot
parent c875028b02
commit 5a8d8dad9a
2 changed files with 210 additions and 43 deletions

View File

@@ -40,6 +40,25 @@ d3d12_video_processor_begin_frame(struct pipe_video_codec * codec,
"fenceValue: %d\n",
pD3D12Proc->m_fenceValue);
///
/// Wait here to make sure the next in flight resource set is empty before using it
///
uint64_t fenceValueToWaitOn = static_cast<uint64_t>(std::max(static_cast<int64_t>(0l), static_cast<int64_t>(pD3D12Proc->m_fenceValue) - static_cast<int64_t>(D3D12_VIDEO_PROC_ASYNC_DEPTH) ));
debug_printf("[d3d12_video_processor] d3d12_video_processor_begin_frame Waiting for completion of in flight resource sets with previous work with fenceValue: %" PRIu64 "\n",
fenceValueToWaitOn);
ASSERTED bool wait_res = d3d12_video_processor_sync_completion(codec, fenceValueToWaitOn, OS_TIMEOUT_INFINITE);
assert(wait_res);
HRESULT hr = pD3D12Proc->m_spCommandList->Reset(pD3D12Proc->m_spCommandAllocators[d3d12_video_processor_pool_current_index(pD3D12Proc)].Get());
if (FAILED(hr)) {
debug_printf(
"[d3d12_video_processor] resetting ID3D12GraphicsCommandList failed with HR %x\n",
hr);
assert(false);
}
// Setup process frame arguments for output/target texture.
struct d3d12_video_buffer *pOutputVideoBuffer = (struct d3d12_video_buffer *) target;
@@ -159,6 +178,10 @@ d3d12_video_processor_end_frame(struct pipe_video_codec * codec,
std::swap(BarrierDesc.Transition.StateBefore, BarrierDesc.Transition.StateAfter);
pD3D12Proc->m_spCommandList->ResourceBarrier(static_cast<uint32_t>(barrier_transitions.size()), barrier_transitions.data());
pD3D12Proc->m_PendingFences[d3d12_video_processor_pool_current_index(pD3D12Proc)].value = pD3D12Proc->m_fenceValue;
pD3D12Proc->m_PendingFences[d3d12_video_processor_pool_current_index(pD3D12Proc)].cmdqueue_fence = pD3D12Proc->m_spFence.Get();
*picture->fence = (pipe_fence_handle*) &pD3D12Proc->m_PendingFences[d3d12_video_processor_pool_current_index(pD3D12Proc)];
}
void
@@ -168,6 +191,9 @@ d3d12_video_processor_process_frame(struct pipe_video_codec *codec,
{
struct d3d12_video_processor * pD3D12Proc = (struct d3d12_video_processor *) codec;
// begin_frame gets only called once so wouldn't update process_properties->src_surface_fence correctly
pD3D12Proc->input_surface_fence = (struct d3d12_fence*) process_properties->src_surface_fence;
// Get the underlying resources from the pipe_video_buffers
struct d3d12_video_buffer *pInputVideoBuffer = (struct d3d12_video_buffer *) input_texture;
@@ -265,10 +291,17 @@ d3d12_video_processor_destroy(struct pipe_video_codec * codec)
if (codec == nullptr) {
return;
}
d3d12_video_processor_flush(codec); // Flush pending work before destroying.
// Flush pending work before destroying.
struct d3d12_video_processor *pD3D12Proc = (struct d3d12_video_processor *) codec;
uint64_t curBatchFence = pD3D12Proc->m_fenceValue;
if (pD3D12Proc->m_needsGPUFlush)
{
d3d12_video_processor_flush(codec);
d3d12_video_processor_sync_completion(codec, curBatchFence, OS_TIMEOUT_INFINITE);
}
// Call dtor to make ComPtr work
struct d3d12_video_processor * pD3D12Proc = (struct d3d12_video_processor *) codec;
delete pD3D12Proc;
}
@@ -292,10 +325,6 @@ d3d12_video_processor_flush(struct pipe_video_codec * codec)
// Make the resources permanently resident for video use
d3d12_promote_to_permanent_residency(pD3D12Proc->m_pD3D12Screen, pD3D12Proc->m_OutputArguments.buffer->texture);
// Synchronize against the resources that are going to be read/written to
d3d12_resource_wait_idle(d3d12_context(pD3D12Proc->base.context),
pD3D12Proc->m_OutputArguments.buffer->texture,
true /*wantToWrite*/);
for(auto curInput : pD3D12Proc->m_InputBuffers)
{
@@ -303,10 +332,6 @@ d3d12_video_processor_flush(struct pipe_video_codec * codec)
curInput->texture);
// Make the resources permanently resident for video use
d3d12_promote_to_permanent_residency(pD3D12Proc->m_pD3D12Screen, curInput->texture);
// Synchronize against the resources that are going to be read/written to
d3d12_resource_wait_idle(d3d12_context(pD3D12Proc->base.context),
curInput->texture,
false /*wantToWrite*/);
}
HRESULT hr = pD3D12Proc->m_pD3D12Screen->dev->GetDeviceRemovedReason();
@@ -333,29 +358,20 @@ d3d12_video_processor_flush(struct pipe_video_codec * codec)
goto flush_fail;
}
// Flush any work batched in the d3d12_screen and Wait on the m_spCommandQueue
struct pipe_fence_handle *completion_fence = NULL;
pD3D12Proc->base.context->flush(pD3D12Proc->base.context, &completion_fence, PIPE_FLUSH_ASYNC | PIPE_FLUSH_HINT_FINISH);
struct d3d12_fence *casted_completion_fence = d3d12_fence(completion_fence);
pD3D12Proc->m_spCommandQueue->Wait(casted_completion_fence->cmdqueue_fence, casted_completion_fence->value);
pD3D12Proc->m_pD3D12Screen->base.fence_reference(&pD3D12Proc->m_pD3D12Screen->base, &completion_fence, NULL);
struct d3d12_fence *input_surface_fence = pD3D12Proc->input_surface_fence;
if (input_surface_fence)
pD3D12Proc->m_spCommandQueue->Wait(input_surface_fence->cmdqueue_fence, input_surface_fence->value);
ID3D12CommandList *ppCommandLists[1] = { pD3D12Proc->m_spCommandList.Get() };
pD3D12Proc->m_spCommandQueue->ExecuteCommandLists(1, ppCommandLists);
pD3D12Proc->m_spCommandQueue->Signal(pD3D12Proc->m_spFence.Get(), pD3D12Proc->m_fenceValue);
pD3D12Proc->m_spFence->SetEventOnCompletion(pD3D12Proc->m_fenceValue, nullptr);
debug_printf("[d3d12_video_processor] d3d12_video_processor_flush - ExecuteCommandLists finished on signal with "
"fenceValue: %d\n",
pD3D12Proc->m_fenceValue);
hr = pD3D12Proc->m_spCommandAllocator->Reset();
if (FAILED(hr)) {
debug_printf(
"[d3d12_video_processor] d3d12_video_processor_flush - resetting ID3D12CommandAllocator failed with HR %x\n",
hr);
goto flush_fail;
}
hr = pD3D12Proc->m_spCommandList->Reset(pD3D12Proc->m_spCommandAllocator.Get());
if (FAILED(hr)) {
debug_printf(
"[d3d12_video_processor] d3d12_video_processor_flush - resetting ID3D12GraphicsCommandList failed with HR %x\n",
hr);
goto flush_fail;
}
// Validate device was not removed
hr = pD3D12Proc->m_pD3D12Screen->dev->GetDeviceRemovedReason();
@@ -395,6 +411,7 @@ d3d12_video_processor_create(struct pipe_context *context, const struct pipe_vid
// Not using new doesn't call ctor and the initializations in the class declaration are lost
struct d3d12_video_processor *pD3D12Proc = new d3d12_video_processor;
pD3D12Proc->m_PendingFences.resize(D3D12_VIDEO_PROC_ASYNC_DEPTH);
pD3D12Proc->base = *codec;
pD3D12Proc->base.context = context;
@@ -405,6 +422,7 @@ d3d12_video_processor_create(struct pipe_context *context, const struct pipe_vid
pD3D12Proc->base.process_frame = d3d12_video_processor_process_frame;
pD3D12Proc->base.end_frame = d3d12_video_processor_end_frame;
pD3D12Proc->base.flush = d3d12_video_processor_flush;
pD3D12Proc->base.get_processor_fence = d3d12_video_processor_get_processor_fence;
///
@@ -651,22 +669,32 @@ d3d12_video_processor_create_command_objects(struct d3d12_video_processor *pD3D1
return false;
}
hr = pD3D12Proc->m_pD3D12Screen->dev->CreateCommandAllocator(
D3D12_COMMAND_LIST_TYPE_VIDEO_PROCESS,
IID_PPV_ARGS(pD3D12Proc->m_spCommandAllocator.GetAddressOf()));
pD3D12Proc->m_spCommandAllocators.resize(D3D12_VIDEO_PROC_ASYNC_DEPTH);
for (uint32_t i = 0; i < pD3D12Proc->m_spCommandAllocators.size() ; i++) {
hr = pD3D12Proc->m_pD3D12Screen->dev->CreateCommandAllocator(
D3D12_COMMAND_LIST_TYPE_VIDEO_PROCESS,
IID_PPV_ARGS(pD3D12Proc->m_spCommandAllocators[i].GetAddressOf()));
if (FAILED(hr)) {
debug_printf("[d3d12_video_processor] d3d12_video_processor_create_command_objects - Call to "
"CreateCommandAllocator failed with HR %x\n",
hr);
if (FAILED(hr)) {
debug_printf("[d3d12_video_processor] d3d12_video_processor_create_command_objects - Call to "
"CreateCommandAllocator failed with HR %x\n",
hr);
return false;
}
}
ComPtr<ID3D12Device4> spD3D12Device4;
if (FAILED(pD3D12Proc->m_pD3D12Screen->dev->QueryInterface(
IID_PPV_ARGS(spD3D12Device4.GetAddressOf())))) {
debug_printf(
"[d3d12_video_processor] d3d12_video_processor_create_processor - D3D12 Device has no ID3D12Device4 support\n");
return false;
}
hr = pD3D12Proc->m_pD3D12Screen->dev->CreateCommandList(0,
D3D12_COMMAND_LIST_TYPE_VIDEO_PROCESS,
pD3D12Proc->m_spCommandAllocator.Get(),
nullptr,
IID_PPV_ARGS(pD3D12Proc->m_spCommandList.GetAddressOf()));
hr = spD3D12Device4->CreateCommandList1(0,
D3D12_COMMAND_LIST_TYPE_VIDEO_PROCESS,
D3D12_COMMAND_LIST_FLAG_NONE,
IID_PPV_ARGS(pD3D12Proc->m_spCommandList.GetAddressOf()));
if (FAILED(hr)) {
debug_printf("[d3d12_video_processor] d3d12_video_processor_create_command_objects - Call to CreateCommandList "
@@ -711,3 +739,123 @@ d3d12_video_processor_convert_pipe_rotation(enum pipe_video_vpp_orientation orie
return result;
}
uint64_t
d3d12_video_processor_pool_current_index(struct d3d12_video_processor *pD3D12Proc)
{
return pD3D12Proc->m_fenceValue % D3D12_VIDEO_PROC_ASYNC_DEPTH;
}
bool
d3d12_video_processor_ensure_fence_finished(struct pipe_video_codec *codec,
uint64_t fenceValueToWaitOn,
uint64_t timeout_ns)
{
bool wait_result = true;
struct d3d12_video_processor *pD3D12Proc = (struct d3d12_video_processor *) codec;
HRESULT hr = S_OK;
uint64_t completedValue = pD3D12Proc->m_spFence->GetCompletedValue();
debug_printf(
"[d3d12_video_processor] d3d12_video_processor_ensure_fence_finished - Waiting for fence (with timeout_ns %" PRIu64
") to finish with "
"fenceValue: %" PRIu64 " - Current Fence Completed Value %" PRIu64 "\n",
timeout_ns,
fenceValueToWaitOn,
completedValue);
if (completedValue < fenceValueToWaitOn) {
HANDLE event = {};
int event_fd = 0;
event = d3d12_fence_create_event(&event_fd);
hr = pD3D12Proc->m_spFence->SetEventOnCompletion(fenceValueToWaitOn, event);
if (FAILED(hr)) {
debug_printf("[d3d12_video_processor] d3d12_video_processor_ensure_fence_finished - SetEventOnCompletion for "
"fenceValue %" PRIu64 " failed with HR %x\n",
fenceValueToWaitOn,
hr);
goto ensure_fence_finished_fail;
}
wait_result = d3d12_fence_wait_event(event, event_fd, timeout_ns);
d3d12_fence_close_event(event, event_fd);
debug_printf("[d3d12_video_processor] d3d12_video_processor_ensure_fence_finished - Waiting on fence to be done with "
"fenceValue: %" PRIu64 " - current CompletedValue: %" PRIu64 "\n",
fenceValueToWaitOn,
completedValue);
} else {
debug_printf("[d3d12_video_processor] d3d12_video_processor_ensure_fence_finished - Fence already done with "
"fenceValue: %" PRIu64 " - current CompletedValue: %" PRIu64 "\n",
fenceValueToWaitOn,
completedValue);
}
return wait_result;
ensure_fence_finished_fail:
debug_printf("[d3d12_video_processor] d3d12_video_processor_sync_completion failed for fenceValue: %" PRIu64 "\n",
fenceValueToWaitOn);
assert(false);
return false;
}
bool
d3d12_video_processor_sync_completion(struct pipe_video_codec *codec, uint64_t fenceValueToWaitOn, uint64_t timeout_ns)
{
struct d3d12_video_processor *pD3D12Proc = (struct d3d12_video_processor *) codec;
assert(pD3D12Proc);
assert(pD3D12Proc->m_spD3D12VideoDevice);
assert(pD3D12Proc->m_spCommandQueue);
HRESULT hr = S_OK;
ASSERTED bool wait_result = d3d12_video_processor_ensure_fence_finished(codec, fenceValueToWaitOn, timeout_ns);
assert(wait_result);
hr =
pD3D12Proc->m_spCommandAllocators[fenceValueToWaitOn % D3D12_VIDEO_PROC_ASYNC_DEPTH]->Reset();
if (FAILED(hr)) {
debug_printf("m_spCommandAllocator->Reset() failed with %x.\n", hr);
goto sync_with_token_fail;
}
// Validate device was not removed
hr = pD3D12Proc->m_pD3D12Screen->dev->GetDeviceRemovedReason();
if (hr != S_OK) {
debug_printf("[d3d12_video_processor] d3d12_video_processor_sync_completion"
" - D3D12Device was removed AFTER d3d12_video_processor_ensure_fence_finished "
"execution with HR %x, but wasn't before.\n",
hr);
goto sync_with_token_fail;
}
debug_printf(
"[d3d12_video_processor] d3d12_video_processor_sync_completion - GPU execution finalized for fenceValue: %" PRIu64
"\n",
fenceValueToWaitOn);
return wait_result;
sync_with_token_fail:
debug_printf("[d3d12_video_processor] d3d12_video_processor_sync_completion failed for fenceValue: %" PRIu64 "\n",
fenceValueToWaitOn);
assert(false);
return false;
}
int d3d12_video_processor_get_processor_fence(struct pipe_video_codec *codec,
struct pipe_fence_handle *fence,
uint64_t timeout)
{
struct d3d12_fence *fenceValueToWaitOn = (struct d3d12_fence *) fence;
assert(fenceValueToWaitOn);
ASSERTED bool wait_res = d3d12_video_processor_sync_completion(codec, fenceValueToWaitOn->value, timeout);
// Return semantics based on p_video_codec interface
// ret == 0 -> work in progress
// ret != 0 -> work completed
return wait_res ? 1 : 0;
}

View File

@@ -109,7 +109,8 @@ struct d3d12_video_processor
std::vector<D3D12_VIDEO_PROCESS_INPUT_STREAM_DESC> m_inputStreamDescs;
ComPtr<ID3D12VideoProcessor1> m_spVideoProcessor;
ComPtr<ID3D12CommandQueue> m_spCommandQueue;
ComPtr<ID3D12CommandAllocator> m_spCommandAllocator;
std::vector<ComPtr<ID3D12CommandAllocator>> m_spCommandAllocators;
std::vector<struct d3d12_fence> m_PendingFences;
ComPtr<ID3D12VideoProcessCommandList1> m_spCommandList;
std::vector<D3D12_RESOURCE_BARRIER> m_transitionsBeforeCloseCmdList;
@@ -123,6 +124,8 @@ struct d3d12_video_processor
bool m_needsGPUFlush = false;
D3D12_FEATURE_DATA_VIDEO_PROCESS_MAX_INPUT_STREAMS m_vpMaxInputStreams = { };
struct d3d12_fence* input_surface_fence = NULL;
};
struct pipe_video_codec *
@@ -141,6 +144,22 @@ d3d12_video_processor_create_command_objects(struct d3d12_video_processor *pD3D1
D3D12_VIDEO_PROCESS_ORIENTATION
d3d12_video_processor_convert_pipe_rotation(enum pipe_video_vpp_orientation orientation);
bool
d3d12_video_processor_ensure_fence_finished(struct pipe_video_codec *codec, uint64_t fenceValueToWaitOn, uint64_t timeout_ns);
bool
d3d12_video_processor_sync_completion(struct pipe_video_codec *codec, uint64_t fenceValueToWaitOn, uint64_t timeout_ns);
uint64_t
d3d12_video_processor_pool_current_index(struct d3d12_video_processor *codec);
int d3d12_video_processor_get_processor_fence(struct pipe_video_codec *codec,
struct pipe_fence_handle *fence,
uint64_t timeout);
// We need enough to so next item in pipeline doesn't ask for a fence value we lost
const uint64_t D3D12_VIDEO_PROC_ASYNC_DEPTH = 36;
///
/// d3d12_video_processor functions ends
///