d3d12: Video Process - Implement get_processor_fence and async queing
Reviewed-by: Jesse Natalie <jenatali@microsoft.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23969>
This commit is contained in:
@@ -40,6 +40,25 @@ d3d12_video_processor_begin_frame(struct pipe_video_codec * codec,
|
||||
"fenceValue: %d\n",
|
||||
pD3D12Proc->m_fenceValue);
|
||||
|
||||
///
|
||||
/// Wait here to make sure the next in flight resource set is empty before using it
|
||||
///
|
||||
uint64_t fenceValueToWaitOn = static_cast<uint64_t>(std::max(static_cast<int64_t>(0l), static_cast<int64_t>(pD3D12Proc->m_fenceValue) - static_cast<int64_t>(D3D12_VIDEO_PROC_ASYNC_DEPTH) ));
|
||||
|
||||
debug_printf("[d3d12_video_processor] d3d12_video_processor_begin_frame Waiting for completion of in flight resource sets with previous work with fenceValue: %" PRIu64 "\n",
|
||||
fenceValueToWaitOn);
|
||||
|
||||
ASSERTED bool wait_res = d3d12_video_processor_sync_completion(codec, fenceValueToWaitOn, OS_TIMEOUT_INFINITE);
|
||||
assert(wait_res);
|
||||
|
||||
HRESULT hr = pD3D12Proc->m_spCommandList->Reset(pD3D12Proc->m_spCommandAllocators[d3d12_video_processor_pool_current_index(pD3D12Proc)].Get());
|
||||
if (FAILED(hr)) {
|
||||
debug_printf(
|
||||
"[d3d12_video_processor] resetting ID3D12GraphicsCommandList failed with HR %x\n",
|
||||
hr);
|
||||
assert(false);
|
||||
}
|
||||
|
||||
// Setup process frame arguments for output/target texture.
|
||||
struct d3d12_video_buffer *pOutputVideoBuffer = (struct d3d12_video_buffer *) target;
|
||||
|
||||
@@ -159,6 +178,10 @@ d3d12_video_processor_end_frame(struct pipe_video_codec * codec,
|
||||
std::swap(BarrierDesc.Transition.StateBefore, BarrierDesc.Transition.StateAfter);
|
||||
|
||||
pD3D12Proc->m_spCommandList->ResourceBarrier(static_cast<uint32_t>(barrier_transitions.size()), barrier_transitions.data());
|
||||
|
||||
pD3D12Proc->m_PendingFences[d3d12_video_processor_pool_current_index(pD3D12Proc)].value = pD3D12Proc->m_fenceValue;
|
||||
pD3D12Proc->m_PendingFences[d3d12_video_processor_pool_current_index(pD3D12Proc)].cmdqueue_fence = pD3D12Proc->m_spFence.Get();
|
||||
*picture->fence = (pipe_fence_handle*) &pD3D12Proc->m_PendingFences[d3d12_video_processor_pool_current_index(pD3D12Proc)];
|
||||
}
|
||||
|
||||
void
|
||||
@@ -168,6 +191,9 @@ d3d12_video_processor_process_frame(struct pipe_video_codec *codec,
|
||||
{
|
||||
struct d3d12_video_processor * pD3D12Proc = (struct d3d12_video_processor *) codec;
|
||||
|
||||
// begin_frame gets only called once so wouldn't update process_properties->src_surface_fence correctly
|
||||
pD3D12Proc->input_surface_fence = (struct d3d12_fence*) process_properties->src_surface_fence;
|
||||
|
||||
// Get the underlying resources from the pipe_video_buffers
|
||||
struct d3d12_video_buffer *pInputVideoBuffer = (struct d3d12_video_buffer *) input_texture;
|
||||
|
||||
@@ -265,10 +291,17 @@ d3d12_video_processor_destroy(struct pipe_video_codec * codec)
|
||||
if (codec == nullptr) {
|
||||
return;
|
||||
}
|
||||
d3d12_video_processor_flush(codec); // Flush pending work before destroying.
|
||||
// Flush pending work before destroying.
|
||||
struct d3d12_video_processor *pD3D12Proc = (struct d3d12_video_processor *) codec;
|
||||
|
||||
uint64_t curBatchFence = pD3D12Proc->m_fenceValue;
|
||||
if (pD3D12Proc->m_needsGPUFlush)
|
||||
{
|
||||
d3d12_video_processor_flush(codec);
|
||||
d3d12_video_processor_sync_completion(codec, curBatchFence, OS_TIMEOUT_INFINITE);
|
||||
}
|
||||
|
||||
// Call dtor to make ComPtr work
|
||||
struct d3d12_video_processor * pD3D12Proc = (struct d3d12_video_processor *) codec;
|
||||
delete pD3D12Proc;
|
||||
}
|
||||
|
||||
@@ -292,10 +325,6 @@ d3d12_video_processor_flush(struct pipe_video_codec * codec)
|
||||
|
||||
// Make the resources permanently resident for video use
|
||||
d3d12_promote_to_permanent_residency(pD3D12Proc->m_pD3D12Screen, pD3D12Proc->m_OutputArguments.buffer->texture);
|
||||
// Synchronize against the resources that are going to be read/written to
|
||||
d3d12_resource_wait_idle(d3d12_context(pD3D12Proc->base.context),
|
||||
pD3D12Proc->m_OutputArguments.buffer->texture,
|
||||
true /*wantToWrite*/);
|
||||
|
||||
for(auto curInput : pD3D12Proc->m_InputBuffers)
|
||||
{
|
||||
@@ -303,10 +332,6 @@ d3d12_video_processor_flush(struct pipe_video_codec * codec)
|
||||
curInput->texture);
|
||||
// Make the resources permanently resident for video use
|
||||
d3d12_promote_to_permanent_residency(pD3D12Proc->m_pD3D12Screen, curInput->texture);
|
||||
// Synchronize against the resources that are going to be read/written to
|
||||
d3d12_resource_wait_idle(d3d12_context(pD3D12Proc->base.context),
|
||||
curInput->texture,
|
||||
false /*wantToWrite*/);
|
||||
}
|
||||
|
||||
HRESULT hr = pD3D12Proc->m_pD3D12Screen->dev->GetDeviceRemovedReason();
|
||||
@@ -333,29 +358,20 @@ d3d12_video_processor_flush(struct pipe_video_codec * codec)
|
||||
goto flush_fail;
|
||||
}
|
||||
|
||||
// Flush any work batched in the d3d12_screen and Wait on the m_spCommandQueue
|
||||
struct pipe_fence_handle *completion_fence = NULL;
|
||||
pD3D12Proc->base.context->flush(pD3D12Proc->base.context, &completion_fence, PIPE_FLUSH_ASYNC | PIPE_FLUSH_HINT_FINISH);
|
||||
struct d3d12_fence *casted_completion_fence = d3d12_fence(completion_fence);
|
||||
pD3D12Proc->m_spCommandQueue->Wait(casted_completion_fence->cmdqueue_fence, casted_completion_fence->value);
|
||||
pD3D12Proc->m_pD3D12Screen->base.fence_reference(&pD3D12Proc->m_pD3D12Screen->base, &completion_fence, NULL);
|
||||
|
||||
struct d3d12_fence *input_surface_fence = pD3D12Proc->input_surface_fence;
|
||||
if (input_surface_fence)
|
||||
pD3D12Proc->m_spCommandQueue->Wait(input_surface_fence->cmdqueue_fence, input_surface_fence->value);
|
||||
|
||||
ID3D12CommandList *ppCommandLists[1] = { pD3D12Proc->m_spCommandList.Get() };
|
||||
pD3D12Proc->m_spCommandQueue->ExecuteCommandLists(1, ppCommandLists);
|
||||
pD3D12Proc->m_spCommandQueue->Signal(pD3D12Proc->m_spFence.Get(), pD3D12Proc->m_fenceValue);
|
||||
pD3D12Proc->m_spFence->SetEventOnCompletion(pD3D12Proc->m_fenceValue, nullptr);
|
||||
debug_printf("[d3d12_video_processor] d3d12_video_processor_flush - ExecuteCommandLists finished on signal with "
|
||||
"fenceValue: %d\n",
|
||||
pD3D12Proc->m_fenceValue);
|
||||
|
||||
hr = pD3D12Proc->m_spCommandAllocator->Reset();
|
||||
if (FAILED(hr)) {
|
||||
debug_printf(
|
||||
"[d3d12_video_processor] d3d12_video_processor_flush - resetting ID3D12CommandAllocator failed with HR %x\n",
|
||||
hr);
|
||||
goto flush_fail;
|
||||
}
|
||||
|
||||
hr = pD3D12Proc->m_spCommandList->Reset(pD3D12Proc->m_spCommandAllocator.Get());
|
||||
if (FAILED(hr)) {
|
||||
debug_printf(
|
||||
"[d3d12_video_processor] d3d12_video_processor_flush - resetting ID3D12GraphicsCommandList failed with HR %x\n",
|
||||
hr);
|
||||
goto flush_fail;
|
||||
}
|
||||
|
||||
// Validate device was not removed
|
||||
hr = pD3D12Proc->m_pD3D12Screen->dev->GetDeviceRemovedReason();
|
||||
@@ -395,6 +411,7 @@ d3d12_video_processor_create(struct pipe_context *context, const struct pipe_vid
|
||||
// Not using new doesn't call ctor and the initializations in the class declaration are lost
|
||||
struct d3d12_video_processor *pD3D12Proc = new d3d12_video_processor;
|
||||
|
||||
pD3D12Proc->m_PendingFences.resize(D3D12_VIDEO_PROC_ASYNC_DEPTH);
|
||||
pD3D12Proc->base = *codec;
|
||||
|
||||
pD3D12Proc->base.context = context;
|
||||
@@ -405,6 +422,7 @@ d3d12_video_processor_create(struct pipe_context *context, const struct pipe_vid
|
||||
pD3D12Proc->base.process_frame = d3d12_video_processor_process_frame;
|
||||
pD3D12Proc->base.end_frame = d3d12_video_processor_end_frame;
|
||||
pD3D12Proc->base.flush = d3d12_video_processor_flush;
|
||||
pD3D12Proc->base.get_processor_fence = d3d12_video_processor_get_processor_fence;
|
||||
|
||||
///
|
||||
|
||||
@@ -651,22 +669,32 @@ d3d12_video_processor_create_command_objects(struct d3d12_video_processor *pD3D1
|
||||
return false;
|
||||
}
|
||||
|
||||
hr = pD3D12Proc->m_pD3D12Screen->dev->CreateCommandAllocator(
|
||||
D3D12_COMMAND_LIST_TYPE_VIDEO_PROCESS,
|
||||
IID_PPV_ARGS(pD3D12Proc->m_spCommandAllocator.GetAddressOf()));
|
||||
pD3D12Proc->m_spCommandAllocators.resize(D3D12_VIDEO_PROC_ASYNC_DEPTH);
|
||||
for (uint32_t i = 0; i < pD3D12Proc->m_spCommandAllocators.size() ; i++) {
|
||||
hr = pD3D12Proc->m_pD3D12Screen->dev->CreateCommandAllocator(
|
||||
D3D12_COMMAND_LIST_TYPE_VIDEO_PROCESS,
|
||||
IID_PPV_ARGS(pD3D12Proc->m_spCommandAllocators[i].GetAddressOf()));
|
||||
|
||||
if (FAILED(hr)) {
|
||||
debug_printf("[d3d12_video_processor] d3d12_video_processor_create_command_objects - Call to "
|
||||
"CreateCommandAllocator failed with HR %x\n",
|
||||
hr);
|
||||
if (FAILED(hr)) {
|
||||
debug_printf("[d3d12_video_processor] d3d12_video_processor_create_command_objects - Call to "
|
||||
"CreateCommandAllocator failed with HR %x\n",
|
||||
hr);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
ComPtr<ID3D12Device4> spD3D12Device4;
|
||||
if (FAILED(pD3D12Proc->m_pD3D12Screen->dev->QueryInterface(
|
||||
IID_PPV_ARGS(spD3D12Device4.GetAddressOf())))) {
|
||||
debug_printf(
|
||||
"[d3d12_video_processor] d3d12_video_processor_create_processor - D3D12 Device has no ID3D12Device4 support\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
hr = pD3D12Proc->m_pD3D12Screen->dev->CreateCommandList(0,
|
||||
D3D12_COMMAND_LIST_TYPE_VIDEO_PROCESS,
|
||||
pD3D12Proc->m_spCommandAllocator.Get(),
|
||||
nullptr,
|
||||
IID_PPV_ARGS(pD3D12Proc->m_spCommandList.GetAddressOf()));
|
||||
hr = spD3D12Device4->CreateCommandList1(0,
|
||||
D3D12_COMMAND_LIST_TYPE_VIDEO_PROCESS,
|
||||
D3D12_COMMAND_LIST_FLAG_NONE,
|
||||
IID_PPV_ARGS(pD3D12Proc->m_spCommandList.GetAddressOf()));
|
||||
|
||||
if (FAILED(hr)) {
|
||||
debug_printf("[d3d12_video_processor] d3d12_video_processor_create_command_objects - Call to CreateCommandList "
|
||||
@@ -711,3 +739,123 @@ d3d12_video_processor_convert_pipe_rotation(enum pipe_video_vpp_orientation orie
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
d3d12_video_processor_pool_current_index(struct d3d12_video_processor *pD3D12Proc)
|
||||
{
|
||||
return pD3D12Proc->m_fenceValue % D3D12_VIDEO_PROC_ASYNC_DEPTH;
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
d3d12_video_processor_ensure_fence_finished(struct pipe_video_codec *codec,
|
||||
uint64_t fenceValueToWaitOn,
|
||||
uint64_t timeout_ns)
|
||||
{
|
||||
bool wait_result = true;
|
||||
struct d3d12_video_processor *pD3D12Proc = (struct d3d12_video_processor *) codec;
|
||||
HRESULT hr = S_OK;
|
||||
uint64_t completedValue = pD3D12Proc->m_spFence->GetCompletedValue();
|
||||
|
||||
debug_printf(
|
||||
"[d3d12_video_processor] d3d12_video_processor_ensure_fence_finished - Waiting for fence (with timeout_ns %" PRIu64
|
||||
") to finish with "
|
||||
"fenceValue: %" PRIu64 " - Current Fence Completed Value %" PRIu64 "\n",
|
||||
timeout_ns,
|
||||
fenceValueToWaitOn,
|
||||
completedValue);
|
||||
|
||||
if (completedValue < fenceValueToWaitOn) {
|
||||
|
||||
HANDLE event = {};
|
||||
int event_fd = 0;
|
||||
event = d3d12_fence_create_event(&event_fd);
|
||||
|
||||
hr = pD3D12Proc->m_spFence->SetEventOnCompletion(fenceValueToWaitOn, event);
|
||||
if (FAILED(hr)) {
|
||||
debug_printf("[d3d12_video_processor] d3d12_video_processor_ensure_fence_finished - SetEventOnCompletion for "
|
||||
"fenceValue %" PRIu64 " failed with HR %x\n",
|
||||
fenceValueToWaitOn,
|
||||
hr);
|
||||
goto ensure_fence_finished_fail;
|
||||
}
|
||||
|
||||
wait_result = d3d12_fence_wait_event(event, event_fd, timeout_ns);
|
||||
d3d12_fence_close_event(event, event_fd);
|
||||
|
||||
debug_printf("[d3d12_video_processor] d3d12_video_processor_ensure_fence_finished - Waiting on fence to be done with "
|
||||
"fenceValue: %" PRIu64 " - current CompletedValue: %" PRIu64 "\n",
|
||||
fenceValueToWaitOn,
|
||||
completedValue);
|
||||
} else {
|
||||
debug_printf("[d3d12_video_processor] d3d12_video_processor_ensure_fence_finished - Fence already done with "
|
||||
"fenceValue: %" PRIu64 " - current CompletedValue: %" PRIu64 "\n",
|
||||
fenceValueToWaitOn,
|
||||
completedValue);
|
||||
}
|
||||
return wait_result;
|
||||
|
||||
ensure_fence_finished_fail:
|
||||
debug_printf("[d3d12_video_processor] d3d12_video_processor_sync_completion failed for fenceValue: %" PRIu64 "\n",
|
||||
fenceValueToWaitOn);
|
||||
assert(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
d3d12_video_processor_sync_completion(struct pipe_video_codec *codec, uint64_t fenceValueToWaitOn, uint64_t timeout_ns)
|
||||
{
|
||||
struct d3d12_video_processor *pD3D12Proc = (struct d3d12_video_processor *) codec;
|
||||
assert(pD3D12Proc);
|
||||
assert(pD3D12Proc->m_spD3D12VideoDevice);
|
||||
assert(pD3D12Proc->m_spCommandQueue);
|
||||
HRESULT hr = S_OK;
|
||||
|
||||
ASSERTED bool wait_result = d3d12_video_processor_ensure_fence_finished(codec, fenceValueToWaitOn, timeout_ns);
|
||||
assert(wait_result);
|
||||
|
||||
hr =
|
||||
pD3D12Proc->m_spCommandAllocators[fenceValueToWaitOn % D3D12_VIDEO_PROC_ASYNC_DEPTH]->Reset();
|
||||
if (FAILED(hr)) {
|
||||
debug_printf("m_spCommandAllocator->Reset() failed with %x.\n", hr);
|
||||
goto sync_with_token_fail;
|
||||
}
|
||||
|
||||
// Validate device was not removed
|
||||
hr = pD3D12Proc->m_pD3D12Screen->dev->GetDeviceRemovedReason();
|
||||
if (hr != S_OK) {
|
||||
debug_printf("[d3d12_video_processor] d3d12_video_processor_sync_completion"
|
||||
" - D3D12Device was removed AFTER d3d12_video_processor_ensure_fence_finished "
|
||||
"execution with HR %x, but wasn't before.\n",
|
||||
hr);
|
||||
goto sync_with_token_fail;
|
||||
}
|
||||
|
||||
debug_printf(
|
||||
"[d3d12_video_processor] d3d12_video_processor_sync_completion - GPU execution finalized for fenceValue: %" PRIu64
|
||||
"\n",
|
||||
fenceValueToWaitOn);
|
||||
|
||||
return wait_result;
|
||||
|
||||
sync_with_token_fail:
|
||||
debug_printf("[d3d12_video_processor] d3d12_video_processor_sync_completion failed for fenceValue: %" PRIu64 "\n",
|
||||
fenceValueToWaitOn);
|
||||
assert(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
int d3d12_video_processor_get_processor_fence(struct pipe_video_codec *codec,
|
||||
struct pipe_fence_handle *fence,
|
||||
uint64_t timeout)
|
||||
{
|
||||
struct d3d12_fence *fenceValueToWaitOn = (struct d3d12_fence *) fence;
|
||||
assert(fenceValueToWaitOn);
|
||||
|
||||
ASSERTED bool wait_res = d3d12_video_processor_sync_completion(codec, fenceValueToWaitOn->value, timeout);
|
||||
|
||||
// Return semantics based on p_video_codec interface
|
||||
// ret == 0 -> work in progress
|
||||
// ret != 0 -> work completed
|
||||
return wait_res ? 1 : 0;
|
||||
}
|
||||
|
@@ -109,7 +109,8 @@ struct d3d12_video_processor
|
||||
std::vector<D3D12_VIDEO_PROCESS_INPUT_STREAM_DESC> m_inputStreamDescs;
|
||||
ComPtr<ID3D12VideoProcessor1> m_spVideoProcessor;
|
||||
ComPtr<ID3D12CommandQueue> m_spCommandQueue;
|
||||
ComPtr<ID3D12CommandAllocator> m_spCommandAllocator;
|
||||
std::vector<ComPtr<ID3D12CommandAllocator>> m_spCommandAllocators;
|
||||
std::vector<struct d3d12_fence> m_PendingFences;
|
||||
ComPtr<ID3D12VideoProcessCommandList1> m_spCommandList;
|
||||
|
||||
std::vector<D3D12_RESOURCE_BARRIER> m_transitionsBeforeCloseCmdList;
|
||||
@@ -123,6 +124,8 @@ struct d3d12_video_processor
|
||||
bool m_needsGPUFlush = false;
|
||||
|
||||
D3D12_FEATURE_DATA_VIDEO_PROCESS_MAX_INPUT_STREAMS m_vpMaxInputStreams = { };
|
||||
|
||||
struct d3d12_fence* input_surface_fence = NULL;
|
||||
};
|
||||
|
||||
struct pipe_video_codec *
|
||||
@@ -141,6 +144,22 @@ d3d12_video_processor_create_command_objects(struct d3d12_video_processor *pD3D1
|
||||
D3D12_VIDEO_PROCESS_ORIENTATION
|
||||
d3d12_video_processor_convert_pipe_rotation(enum pipe_video_vpp_orientation orientation);
|
||||
|
||||
bool
|
||||
d3d12_video_processor_ensure_fence_finished(struct pipe_video_codec *codec, uint64_t fenceValueToWaitOn, uint64_t timeout_ns);
|
||||
|
||||
bool
|
||||
d3d12_video_processor_sync_completion(struct pipe_video_codec *codec, uint64_t fenceValueToWaitOn, uint64_t timeout_ns);
|
||||
|
||||
uint64_t
|
||||
d3d12_video_processor_pool_current_index(struct d3d12_video_processor *codec);
|
||||
|
||||
int d3d12_video_processor_get_processor_fence(struct pipe_video_codec *codec,
|
||||
struct pipe_fence_handle *fence,
|
||||
uint64_t timeout);
|
||||
|
||||
// We need enough to so next item in pipeline doesn't ask for a fence value we lost
|
||||
const uint64_t D3D12_VIDEO_PROC_ASYNC_DEPTH = 36;
|
||||
|
||||
///
|
||||
/// d3d12_video_processor functions ends
|
||||
///
|
||||
|
Reference in New Issue
Block a user