d3d12: Implement get_feedback with additional metadata

Reviewed-by: Jesse Natalie <jenatali@microsoft.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26223>
This commit is contained in:
Sil Vilerino
2023-11-09 18:58:26 -05:00
committed by Marge Bot
parent 8c1ba75f68
commit e4ffb2473b
14 changed files with 477 additions and 187 deletions

View File

@@ -104,6 +104,12 @@ d3d12_video_encoder_flush(struct pipe_video_codec *codec)
assert(pD3D12Enc->m_spD3D12VideoDevice);
assert(pD3D12Enc->m_spEncodeCommandQueue);
if (pD3D12Enc->m_inflightResourcesPool[d3d12_video_encoder_pool_current_index(pD3D12Enc)].encode_result & PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_FAILED) {
debug_printf("WARNING: [d3d12_video_encoder] d3d12_video_encoder_flush - Frame submission %" PRIu64 " failed. Encoder lost, please recreate pipe_video_codec object \n", pD3D12Enc->m_fenceValue);
assert(false);
return;
}
// Flush any work batched (ie. shaders blit on input texture, etc or bitstream headers buffer_subdata batched upload)
// and Wait the m_spEncodeCommandQueue for GPU upload completion
// before recording EncodeFrame below.
@@ -168,6 +174,8 @@ d3d12_video_encoder_flush(struct pipe_video_codec *codec)
flush_fail:
debug_printf("[d3d12_video_encoder] d3d12_video_encoder_flush failed for fenceValue: %" PRIu64 "\n", pD3D12Enc->m_fenceValue);
pD3D12Enc->m_inflightResourcesPool[d3d12_video_encoder_pool_current_index(pD3D12Enc)].encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_FAILED;
pD3D12Enc->m_spEncodedFrameMetadata[pD3D12Enc->m_fenceValue % D3D12_VIDEO_ENC_METADATA_BUFFERS_COUNT].encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_FAILED;
assert(false);
}
@@ -196,13 +204,13 @@ d3d12_video_encoder_ensure_fence_finished(struct pipe_video_codec *codec, uint64
goto ensure_fence_finished_fail;
}
d3d12_fence_wait_event(event, event_fd, timeout_ns);
d3d12_fence_close_event(event, event_fd);
debug_printf("[d3d12_video_encoder] d3d12_video_encoder_ensure_fence_finished - Waiting on fence to be done with "
"fenceValue: %" PRIu64 " - current CompletedValue: %" PRIu64 "\n",
fenceValueToWaitOn,
completedValue);
d3d12_fence_wait_event(event, event_fd, timeout_ns);
d3d12_fence_close_event(event, event_fd);
} else {
debug_printf("[d3d12_video_encoder] d3d12_video_encoder_ensure_fence_finished - Fence already done with "
"fenceValue: %" PRIu64 " - current CompletedValue: %" PRIu64 "\n",
@@ -213,6 +221,8 @@ d3d12_video_encoder_ensure_fence_finished(struct pipe_video_codec *codec, uint64
ensure_fence_finished_fail:
debug_printf("[d3d12_video_encoder] d3d12_video_encoder_sync_completion failed for fenceValue: %" PRIu64 "\n", fenceValueToWaitOn);
pD3D12Enc->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_ENC_ASYNC_DEPTH].encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_FAILED;
pD3D12Enc->m_spEncodedFrameMetadata[fenceValueToWaitOn % D3D12_VIDEO_ENC_METADATA_BUFFERS_COUNT].encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_FAILED;
assert(false);
}
@@ -259,6 +269,8 @@ d3d12_video_encoder_sync_completion(struct pipe_video_codec *codec, uint64_t fen
sync_with_token_fail:
debug_printf("[d3d12_video_encoder] d3d12_video_encoder_sync_completion failed for fenceValue: %" PRIu64 "\n", fenceValueToWaitOn);
pD3D12Enc->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_ENC_ASYNC_DEPTH].encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_FAILED;
pD3D12Enc->m_spEncodedFrameMetadata[fenceValueToWaitOn % D3D12_VIDEO_ENC_METADATA_BUFFERS_COUNT].encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_FAILED;
assert(false);
}
@@ -406,7 +418,7 @@ d3d12_video_encoder_reconfigure_encoder_objects(struct d3d12_video_encoder *pD3D
resourceAllocFlags,
pD3D12Enc->m_NodeMask);
}
d3d12_video_encoder_create_reference_picture_manager(pD3D12Enc);
d3d12_video_encoder_create_reference_picture_manager(pD3D12Enc, picture);
}
bool reCreatedEncoder = false;
@@ -538,7 +550,7 @@ d3d12_video_encoder_reconfigure_encoder_objects(struct d3d12_video_encoder *pD3D
}
void
d3d12_video_encoder_create_reference_picture_manager(struct d3d12_video_encoder *pD3D12Enc)
d3d12_video_encoder_create_reference_picture_manager(struct d3d12_video_encoder *pD3D12Enc, struct pipe_picture_desc * picture)
{
pD3D12Enc->m_upDPBManager.reset();
pD3D12Enc->m_upBitstreamBuilder.reset();
@@ -560,7 +572,8 @@ d3d12_video_encoder_create_reference_picture_manager(struct d3d12_video_encoder
d3d12_video_encoder_get_current_max_dpb_capacity(pD3D12Enc)
);
pD3D12Enc->m_upBitstreamBuilder = std::make_unique<d3d12_video_bitstream_builder_h264>();
struct pipe_h264_enc_picture_desc *pH264Pic = (struct pipe_h264_enc_picture_desc *) picture;
pD3D12Enc->m_upBitstreamBuilder = std::make_unique<d3d12_video_bitstream_builder_h264>(pH264Pic->insert_aud_nalu);
} break;
#endif
#if VIDEO_CODEC_H265ENC
@@ -845,7 +858,8 @@ d3d12_video_encoder_get_current_level_desc(struct d3d12_video_encoder *pD3D12Enc
void
d3d12_video_encoder_build_pre_encode_codec_headers(struct d3d12_video_encoder *pD3D12Enc,
bool &postEncodeHeadersNeeded,
uint64_t &preEncodeGeneratedHeadersByteSize)
uint64_t &preEncodeGeneratedHeadersByteSize,
std::vector<uint64_t> &pWrittenCodecUnitsSizes)
{
enum pipe_video_format codec = u_reduce_video_profile(pD3D12Enc->base.profile);
switch (codec) {
@@ -853,14 +867,14 @@ d3d12_video_encoder_build_pre_encode_codec_headers(struct d3d12_video_encoder *p
case PIPE_VIDEO_FORMAT_MPEG4_AVC:
{
postEncodeHeadersNeeded = false;
preEncodeGeneratedHeadersByteSize = d3d12_video_encoder_build_codec_headers_h264(pD3D12Enc);
preEncodeGeneratedHeadersByteSize = d3d12_video_encoder_build_codec_headers_h264(pD3D12Enc, pWrittenCodecUnitsSizes);
} break;
#endif
#if VIDEO_CODEC_H265ENC
case PIPE_VIDEO_FORMAT_HEVC:
{
postEncodeHeadersNeeded = false;
preEncodeGeneratedHeadersByteSize = d3d12_video_encoder_build_codec_headers_hevc(pD3D12Enc);
preEncodeGeneratedHeadersByteSize = d3d12_video_encoder_build_codec_headers_hevc(pD3D12Enc, pWrittenCodecUnitsSizes);
} break;
#endif
#if VIDEO_CODEC_AV1ENC
@@ -869,6 +883,7 @@ d3d12_video_encoder_build_pre_encode_codec_headers(struct d3d12_video_encoder *p
pD3D12Enc->m_BitstreamHeadersBuffer.resize(0);
postEncodeHeadersNeeded = true;
preEncodeGeneratedHeadersByteSize = 0;
pWrittenCodecUnitsSizes.clear();
} break;
#endif
default:
@@ -1031,6 +1046,7 @@ static void
d3d12_video_encoder_disable_rc_maxframesize(struct D3D12EncodeRateControlState & rcState)
{
rcState.m_Flags &= ~D3D12_VIDEO_ENCODER_RATE_CONTROL_FLAG_ENABLE_MAX_FRAME_SIZE;
rcState.max_frame_size = 0;
switch (rcState.m_Mode) {
case D3D12_VIDEO_ENCODER_RATE_CONTROL_MODE_CBR:
{
@@ -1695,6 +1711,16 @@ d3d12_video_encoder_reconfigure_session(struct d3d12_video_encoder *pD3D12Enc,
debug_printf("d3d12_video_encoder_prepare_output_buffers failed!\n");
return false;
}
// Save frame size expectation snapshot from record time to resolve at get_feedback time (after execution)
uint64_t current_metadata_slot = (pD3D12Enc->m_fenceValue % D3D12_VIDEO_ENC_METADATA_BUFFERS_COUNT);
pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].expected_max_frame_size =
pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.max_frame_size;
pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].expected_max_slice_size =
(pD3D12Enc->m_currentEncodeConfig.m_encoderSliceConfigMode == D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_BYTES_PER_SUBREGION) ?
pD3D12Enc->m_currentEncodeConfig.m_encoderSliceConfigDesc.m_SlicesPartition_H264.MaxBytesPerSlice : 0;
return true;
}
@@ -1739,6 +1765,8 @@ d3d12_video_encoder_begin_frame(struct pipe_video_codec * codec,
}
pD3D12Enc->m_inflightResourcesPool[d3d12_video_encoder_pool_current_index(pD3D12Enc)].m_InputSurfaceFence = (struct d3d12_fence*) *picture->fence;
pD3D12Enc->m_inflightResourcesPool[d3d12_video_encoder_pool_current_index(pD3D12Enc)].encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_OK;
pD3D12Enc->m_spEncodedFrameMetadata[pD3D12Enc->m_fenceValue % D3D12_VIDEO_ENC_METADATA_BUFFERS_COUNT].encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_OK;
debug_printf("[d3d12_video_encoder] d3d12_video_encoder_begin_frame finalized for fenceValue: %" PRIu64 "\n",
pD3D12Enc->m_fenceValue);
@@ -1747,6 +1775,8 @@ d3d12_video_encoder_begin_frame(struct pipe_video_codec * codec,
fail:
debug_printf("[d3d12_video_encoder] d3d12_video_encoder_begin_frame failed for fenceValue: %" PRIu64 "\n",
pD3D12Enc->m_fenceValue);
pD3D12Enc->m_inflightResourcesPool[d3d12_video_encoder_pool_current_index(pD3D12Enc)].encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_FAILED;
pD3D12Enc->m_spEncodedFrameMetadata[pD3D12Enc->m_fenceValue % D3D12_VIDEO_ENC_METADATA_BUFFERS_COUNT].encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_FAILED;
assert(false);
}
@@ -1846,9 +1876,11 @@ d3d12_video_encoder_encode_bitstream(struct pipe_video_codec * codec,
assert(pD3D12Enc->m_spEncodeCommandQueue);
assert(pD3D12Enc->m_pD3D12Screen);
// Since this can be queried out of order in get_feedback, we need to pass out the actual value of the fence
// and not the pointer to it (the fence value will keep increasing in the surfaces that have a pointer to it)
*feedback = (void*) pD3D12Enc->m_fenceValue;
if (pD3D12Enc->m_inflightResourcesPool[d3d12_video_encoder_pool_current_index(pD3D12Enc)].encode_result & PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_FAILED) {
debug_printf("WARNING: [d3d12_video_encoder] d3d12_video_encoder_encode_bitstream - Frame submission %" PRIu64 " failed. Encoder lost, please recreate pipe_video_codec object\n", pD3D12Enc->m_fenceValue);
assert(false);
return;
}
struct d3d12_video_buffer *pInputVideoBuffer = (struct d3d12_video_buffer *) source;
assert(pInputVideoBuffer);
@@ -1912,7 +1944,8 @@ d3d12_video_encoder_encode_bitstream(struct pipe_video_codec * codec,
d3d12_video_encoder_build_pre_encode_codec_headers(pD3D12Enc,
pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].postEncodeHeadersNeeded,
pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].preEncodeGeneratedHeadersByteSize);
pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].preEncodeGeneratedHeadersByteSize,
pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pWrittenCodecUnitsSizes);
assert(pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].preEncodeGeneratedHeadersByteSize == pD3D12Enc->m_BitstreamHeadersBuffer.size());
// Only upload headers now and leave prefix offset space gap in compressed bitstream if the codec builds headers before execution.
@@ -1930,8 +1963,13 @@ d3d12_video_encoder_encode_bitstream(struct pipe_video_codec * codec,
(pD3D12Enc->m_currentEncodeCapabilities.m_ResourceRequirementsCaps.CompressedBitstreamBufferAccessAlignment > 1)
&& ((pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].preEncodeGeneratedHeadersByteSize % pD3D12Enc->m_currentEncodeCapabilities.m_ResourceRequirementsCaps.CompressedBitstreamBufferAccessAlignment) != 0)
) {
pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].preEncodeGeneratedHeadersByteSize = ALIGN(pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].preEncodeGeneratedHeadersByteSize, pD3D12Enc->m_currentEncodeCapabilities.m_ResourceRequirementsCaps.CompressedBitstreamBufferAccessAlignment);
size_t new_size = ALIGN(pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].preEncodeGeneratedHeadersByteSize, pD3D12Enc->m_currentEncodeCapabilities.m_ResourceRequirementsCaps.CompressedBitstreamBufferAccessAlignment);
size_t align_padding = new_size - pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].preEncodeGeneratedHeadersByteSize;
pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].preEncodeGeneratedHeadersByteSize = new_size;
pD3D12Enc->m_BitstreamHeadersBuffer.resize(pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].preEncodeGeneratedHeadersByteSize, 0);
// Update last pWrittenCodecUnitsSizes with extra offset padding
if (pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pWrittenCodecUnitsSizes.size() > 0)
pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pWrittenCodecUnitsSizes[pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pWrittenCodecUnitsSizes.size() - 1] += align_padding;
}
// Upload the CPU buffers with the bitstream headers to the compressed bitstream resource in the interval
@@ -1964,7 +2002,9 @@ d3d12_video_encoder_encode_bitstream(struct pipe_video_codec * codec,
if (FAILED(hr)) {
debug_printf("CreateCommittedResource failed with HR %x\n", hr);
pD3D12Enc->m_inflightResourcesPool[d3d12_video_encoder_pool_current_index(pD3D12Enc)].encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_FAILED;
assert(false);
return;
}
}
@@ -1976,6 +2016,13 @@ d3d12_video_encoder_encode_bitstream(struct pipe_video_codec * codec,
pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].comp_bit_destination = &pOutputBitstreamBuffer->base.b;
}
memset(&pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].m_FenceData,
0,
sizeof(pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].m_FenceData));
pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].m_FenceData.value = pD3D12Enc->m_fenceValue;
pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].m_FenceData.cmdqueue_fence = pD3D12Enc->m_spFence.Get();
*feedback = (void*) &pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].m_FenceData;
std::vector<D3D12_RESOURCE_BARRIER> rgCurrentFrameStateTransitions = {
CD3DX12_RESOURCE_BARRIER::Transition(pInputVideoD3D12Res,
D3D12_RESOURCE_STATE_COMMON,
@@ -2218,15 +2265,49 @@ d3d12_video_encoder_encode_bitstream(struct pipe_video_codec * codec,
}
void
d3d12_video_encoder_get_feedback(struct pipe_video_codec *codec, void *feedback, unsigned *size, struct pipe_enc_feedback_metadata* metadata)
d3d12_video_encoder_get_feedback(struct pipe_video_codec *codec,
void *feedback,
unsigned *size,
struct pipe_enc_feedback_metadata* pMetadata)
{
struct d3d12_video_encoder *pD3D12Enc = (struct d3d12_video_encoder *) codec;
assert(pD3D12Enc);
uint64_t requested_metadata_fence = ((uint64_t) feedback);
d3d12_video_encoder_sync_completion(codec, requested_metadata_fence, OS_TIMEOUT_INFINITE);
struct d3d12_fence *feedback_fence = (struct d3d12_fence *) feedback;
uint64_t requested_metadata_fence = feedback_fence->value;
struct pipe_enc_feedback_metadata opt_metadata;
memset(&opt_metadata, 0, sizeof(opt_metadata));
HRESULT hr = pD3D12Enc->m_pD3D12Screen->dev->GetDeviceRemovedReason();
if (hr != S_OK) {
opt_metadata.encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_FAILED;
debug_printf("Error: d3d12_video_encoder_get_feedback for Encode GPU command for fence %" PRIu64 " failed with GetDeviceRemovedReason: %x\n",
requested_metadata_fence,
hr);
assert(false);
return;
}
uint64_t current_metadata_slot = (requested_metadata_fence % D3D12_VIDEO_ENC_METADATA_BUFFERS_COUNT);
opt_metadata.encode_result = pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].encode_result;
if (opt_metadata.encode_result & PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_FAILED) {
debug_printf("Error: d3d12_video_encoder_get_feedback for Encode GPU command for fence %" PRIu64 " failed on submission with encode_result: %x\n",
requested_metadata_fence,
opt_metadata.encode_result);
assert(false);
return;
}
d3d12_video_encoder_sync_completion(codec, requested_metadata_fence, OS_TIMEOUT_INFINITE);
opt_metadata.encode_result = pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].encode_result;
if (opt_metadata.encode_result & PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_FAILED) {
debug_printf("Error: d3d12_video_encoder_get_feedback for Encode GPU command for fence %" PRIu64 " failed on GPU fence wait with encode_result: %x\n",
requested_metadata_fence,
opt_metadata.encode_result);
assert(false);
return;
}
debug_printf("d3d12_video_encoder_get_feedback with feedback: %" PRIu64 ", resources slot %" PRIu64 " metadata resolved ID3D12Resource buffer %p metadata required size %" PRIu64 "\n",
requested_metadata_fence,
@@ -2242,11 +2323,33 @@ d3d12_video_encoder_get_feedback(struct pipe_video_codec *codec, void *feedback,
requested_metadata_fence,
pD3D12Enc->m_fenceValue,
D3D12_VIDEO_ENC_METADATA_BUFFERS_COUNT);
*size = 0;
opt_metadata.encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_FAILED;
assert(false);
return;
}
// Extract encode metadata
D3D12_VIDEO_ENCODER_OUTPUT_METADATA encoderMetadata;
std::vector<D3D12_VIDEO_ENCODER_FRAME_SUBREGION_METADATA> pSubregionsMetadata;
d3d12_video_encoder_extract_encode_metadata(
pD3D12Enc,
pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].spBuffer.Get(),
pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].bufferSize,
encoderMetadata,
pSubregionsMetadata);
// Validate encoder output metadata
if ((encoderMetadata.EncodeErrorFlags != D3D12_VIDEO_ENCODER_ENCODE_ERROR_FLAG_NO_ERROR) || (encoderMetadata.EncodedBitstreamWrittenBytesCount == 0)) {
opt_metadata.encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_FAILED;
debug_printf("[d3d12_video_encoder] Encode GPU command for fence %" PRIu64 " failed - EncodeErrorFlags: %" PRIu64 "\n",
requested_metadata_fence,
encoderMetadata.EncodeErrorFlags);
assert(false);
return;
}
debug_printf("WrittenSubregionsCount: %" PRIu64" \n", encoderMetadata.WrittenSubregionsCount);
// Calculate the full bitstream size
if(pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].postEncodeHeadersNeeded)
{
///
@@ -2268,35 +2371,49 @@ d3d12_video_encoder_get_feedback(struct pipe_video_codec *codec, void *feedback,
/// If we wrote headers (if any) before encode execution, use that size to calculate feedback size of complete bitstream.
///
D3D12_VIDEO_ENCODER_OUTPUT_METADATA encoderMetadata;
std::vector<D3D12_VIDEO_ENCODER_FRAME_SUBREGION_METADATA> pSubregionsMetadata;
d3d12_video_encoder_extract_encode_metadata(
pD3D12Enc,
pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].spBuffer.Get(),
pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].bufferSize,
encoderMetadata,
pSubregionsMetadata);
// Read metadata from encoderMetadata
if (encoderMetadata.EncodeErrorFlags != D3D12_VIDEO_ENCODER_ENCODE_ERROR_FLAG_NO_ERROR) {
debug_printf("[d3d12_video_encoder] Encode GPU command for fence %" PRIu64 " failed - EncodeErrorFlags: %" PRIu64 "\n",
requested_metadata_fence,
encoderMetadata.EncodeErrorFlags);
*size = 0;
assert(false);
return;
}
assert(encoderMetadata.EncodedBitstreamWrittenBytesCount > 0u);
*size = static_cast<unsigned int>(pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].preEncodeGeneratedHeadersByteSize + encoderMetadata.EncodedBitstreamWrittenBytesCount);
// Prepare codec unit metadata post execution with pre-execution headers generation
for (unsigned i = 0; i < pSubregionsMetadata.size();i++)
pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pWrittenCodecUnitsSizes.push_back(pSubregionsMetadata[i].bSize);
}
debug_printf("[d3d12_video_encoder_get_feedback] Requested metadata for encoded frame at fence %" PRIu64 " is %d (feedback was requested at current fence %" PRIu64 ")\n",
requested_metadata_fence,
*size,
pD3D12Enc->m_fenceValue);
if (*size > pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].expected_max_frame_size)
opt_metadata.encode_result |= PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_MAX_FRAME_SIZE_OVERFLOW;
// Report codec unit metadata
opt_metadata.codec_unit_metadata_count = 0u;
memset(opt_metadata.codec_unit_metadata, 0, sizeof(opt_metadata.codec_unit_metadata));
uint64_t absolute_offset_acum = 0u;
debug_printf("Written: %" PRIu64" codec units \n", static_cast<uint64_t>(pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pWrittenCodecUnitsSizes.size()));
for (uint32_t i = 0; i < pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pWrittenCodecUnitsSizes.size(); i++)
{
opt_metadata.codec_unit_metadata[opt_metadata.codec_unit_metadata_count].size = pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pWrittenCodecUnitsSizes[i];
opt_metadata.codec_unit_metadata[opt_metadata.codec_unit_metadata_count].offset = absolute_offset_acum;
absolute_offset_acum += opt_metadata.codec_unit_metadata[opt_metadata.codec_unit_metadata_count].size;
debug_printf("Codec unit %d: offset: %" PRIu64" - size: %" PRIu64" \n",
i,
opt_metadata.codec_unit_metadata[opt_metadata.codec_unit_metadata_count].offset,
opt_metadata.codec_unit_metadata[opt_metadata.codec_unit_metadata_count].size);
opt_metadata.codec_unit_metadata_count++;
}
opt_metadata.present_metadata = (PIPE_VIDEO_FEEDBACK_METADATA_TYPE_BITSTREAM_SIZE |
PIPE_VIDEO_FEEDBACK_METADATA_TYPE_ENCODE_RESULT |
PIPE_VIDEO_FEEDBACK_METADATA_TYPE_CODEC_UNIT_LOCATION |
PIPE_VIDEO_FEEDBACK_METADATA_TYPE_MAX_FRAME_SIZE_OVERFLOW);
if (pMetadata)
*pMetadata = opt_metadata;
assert(absolute_offset_acum == *size);
pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].bRead = true;
}
@@ -2420,6 +2537,12 @@ d3d12_video_encoder_end_frame(struct pipe_video_codec * codec,
debug_printf("[d3d12_video_encoder] d3d12_video_encoder_end_frame started for fenceValue: %" PRIu64 "\n",
pD3D12Enc->m_fenceValue);
if (pD3D12Enc->m_inflightResourcesPool[d3d12_video_encoder_pool_current_index(pD3D12Enc)].encode_result != PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_OK) {
debug_printf("WARNING: [d3d12_video_encoder] d3d12_video_encoder_end_frame - Frame submission %" PRIu64 " failed. Encoder lost, please recreate pipe_video_codec object\n", pD3D12Enc->m_fenceValue);
assert(false);
return;
}
// Signal finish of current frame encoding to the picture management tracker
pD3D12Enc->m_upDPBManager->end_frame();

View File

@@ -71,7 +71,10 @@ d3d12_video_encoder_encode_bitstream(struct pipe_video_codec * codec,
* get encoder feedback
*/
void
d3d12_video_encoder_get_feedback(struct pipe_video_codec *codec, void *feedback, unsigned *size, struct pipe_enc_feedback_metadata* metadata);
d3d12_video_encoder_get_feedback(struct pipe_video_codec *codec,
void *feedback,
unsigned *size,
struct pipe_enc_feedback_metadata* pMetadata);
/**
* end encoding of the current frame
@@ -172,6 +175,7 @@ struct D3D12EncodeRateControlState
{
D3D12_VIDEO_ENCODER_RATE_CONTROL_MODE m_Mode = {};
D3D12_VIDEO_ENCODER_RATE_CONTROL_FLAGS m_Flags = {};
uint64_t max_frame_size = 0;
DXGI_RATIONAL m_FrameRate = {};
union
{
@@ -300,6 +304,7 @@ struct EncodedBitstreamResolvedMetadata
* encoded in the GPU
*/
uint64_t preEncodeGeneratedHeadersByteSize = 0;
std::vector<uint64_t> pWrittenCodecUnitsSizes;
/*
* Indicates if the encoded frame needs header generation after GPU execution
@@ -345,6 +350,16 @@ struct EncodedBitstreamResolvedMetadata
* in between the GPU spStagingBitstream contents
*/
std::vector<uint8_t> m_StagingBitstreamConstruction;
/* Stores encode result for get_feedback readback in the D3D12_VIDEO_ENC_METADATA_BUFFERS_COUNT slots */
enum pipe_video_feedback_encode_result_flags encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_OK;
/* Expected max frame, slice sizes */
uint64_t expected_max_frame_size = 0;
uint64_t expected_max_slice_size = 0;
/* Pending fence data for this frame */
struct d3d12_fence m_FenceData;
};
struct d3d12_video_encoder
@@ -396,6 +411,9 @@ struct d3d12_video_encoder
ComPtr<ID3D12CommandAllocator> m_spCommandAllocator;
struct d3d12_fence* m_InputSurfaceFence = NULL;
/* Stores encode result for submission error control in the D3D12_VIDEO_ENC_ASYNC_DEPTH slots */
enum pipe_video_feedback_encode_result_flags encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_OK;
};
std::vector<InFlightEncodeResources> m_inflightResourcesPool;
@@ -432,7 +450,7 @@ d3d12_video_encoder_get_current_gop_desc(struct d3d12_video_encoder *pD3D12Enc);
uint32_t
d3d12_video_encoder_get_current_max_dpb_capacity(struct d3d12_video_encoder *pD3D12Enc);
void
d3d12_video_encoder_create_reference_picture_manager(struct d3d12_video_encoder *pD3D12Enc);
d3d12_video_encoder_create_reference_picture_manager(struct d3d12_video_encoder *pD3D12Enc, struct pipe_picture_desc * picture);
void
d3d12_video_encoder_update_picparams_tracking(struct d3d12_video_encoder *pD3D12Enc,
struct pipe_video_buffer * srcTexture,
@@ -453,7 +471,8 @@ d3d12_video_encoder_prepare_output_buffers(struct d3d12_video_encoder *pD3D12Enc
void
d3d12_video_encoder_build_pre_encode_codec_headers(struct d3d12_video_encoder *pD3D12Enc,
bool &postEncodeHeadersNeeded,
uint64_t &preEncodeGeneratedHeadersByteSize);
uint64_t &preEncodeGeneratedHeadersByteSize,
std::vector<uint64_t> &pWrittenCodecUnitsSizes);
void
d3d12_video_encoder_extract_encode_metadata(
struct d3d12_video_encoder * pD3D12Dec,

View File

@@ -28,6 +28,7 @@
#include "d3d12_screen.h"
#include "d3d12_format.h"
#include <cmath>
#include <numeric>
void
d3d12_video_encoder_update_current_rate_control_av1(struct d3d12_video_encoder *pD3D12Enc,
@@ -76,6 +77,7 @@ d3d12_video_encoder_update_current_rate_control_av1(struct d3d12_video_encoder *
picture->rc[0].vbv_buf_initial_size;
}
pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.max_frame_size = picture->rc[0].max_au_size;
if (picture->rc[0].max_au_size > 0) {
pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Flags |=
D3D12_VIDEO_ENCODER_RATE_CONTROL_FLAG_ENABLE_MAX_FRAME_SIZE;
@@ -163,6 +165,7 @@ d3d12_video_encoder_update_current_rate_control_av1(struct d3d12_video_encoder *
picture->rc[0].vbv_buf_initial_size;
}
#endif
pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.max_frame_size = picture->rc[0].max_au_size;
if (picture->rc[0].max_au_size > 0) {
pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Flags |=
D3D12_VIDEO_ENCODER_RATE_CONTROL_FLAG_ENABLE_MAX_FRAME_SIZE;
@@ -246,6 +249,7 @@ d3d12_video_encoder_update_current_rate_control_av1(struct d3d12_video_encoder *
picture->rc[0].vbv_buf_initial_size;
}
pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.max_frame_size = picture->rc[0].max_au_size;
if (picture->rc[0].max_au_size > 0) {
pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Flags |=
D3D12_VIDEO_ENCODER_RATE_CONTROL_FLAG_ENABLE_MAX_FRAME_SIZE;
@@ -2229,6 +2233,8 @@ d3d12_video_encoder_build_post_encode_codec_bitstream_av1(struct d3d12_video_enc
static_cast<d3d12_video_bitstream_builder_av1 *>(pD3D12Enc->m_upBitstreamBuilder.get());
assert(pAV1BitstreamBuilder);
associatedMetadata.pWrittenCodecUnitsSizes.clear();
size_t writtenTemporalDelimBytes = 0;
if (picHdr.show_frame && associatedMetadata.m_CodecSpecificData.AV1HeadersInfo.temporal_delim_rendered) {
pAV1BitstreamBuilder->write_temporal_delimiter_obu(
@@ -2238,6 +2244,7 @@ d3d12_video_encoder_build_post_encode_codec_bitstream_av1(struct d3d12_video_enc
);
assert(pD3D12Enc->m_BitstreamHeadersBuffer.size() == writtenTemporalDelimBytes);
debug_printf("Written OBU_TEMPORAL_DELIMITER bytes: %" PRIu64 "\n", writtenTemporalDelimBytes);
associatedMetadata.pWrittenCodecUnitsSizes.push_back(writtenTemporalDelimBytes);
}
size_t writtenSequenceBytes = 0;
@@ -2255,6 +2262,7 @@ d3d12_video_encoder_build_post_encode_codec_bitstream_av1(struct d3d12_video_enc
pD3D12Enc->m_BitstreamHeadersBuffer.begin() + writtenTemporalDelimBytes, // placingPositionStart
writtenSequenceBytes // Bytes Written AFTER placingPositionStart arg above
);
associatedMetadata.pWrittenCodecUnitsSizes.push_back(writtenSequenceBytes);
assert(pD3D12Enc->m_BitstreamHeadersBuffer.size() == (writtenSequenceBytes + writtenTemporalDelimBytes));
debug_printf("Written OBU_SEQUENCE_HEADER bytes: %" PRIu64 "\n", writtenSequenceBytes);
}
@@ -2303,6 +2311,7 @@ d3d12_video_encoder_build_post_encode_codec_bitstream_av1(struct d3d12_video_enc
);
debug_printf("Written OBU_FRAME bytes: %" PRIu64 "\n", writtenFrameBytes);
associatedMetadata.pWrittenCodecUnitsSizes.push_back(writtenFrameBytes);
assert(pD3D12Enc->m_BitstreamHeadersBuffer.size() ==
(writtenSequenceBytes + writtenTemporalDelimBytes + writtenFrameBytes));
@@ -2339,7 +2348,8 @@ d3d12_video_encoder_build_post_encode_codec_bitstream_av1(struct d3d12_video_enc
1,
associatedMetadata.m_associatedEncodeConfig.m_encoderSliceConfigDesc.m_TilesConfig_AV1.TilesPartition,
associatedMetadata.m_associatedEncodeConfig.m_encoderSliceConfigDesc.m_TilesConfig_AV1.TilesGroups[0],
written_bytes_to_staging_bitstream_buffer);
written_bytes_to_staging_bitstream_buffer,
associatedMetadata.pWrittenCodecUnitsSizes);
writtenTileBytes += tile_group_obu_size;
comp_bitstream_offset += writtenTileBytes;
@@ -2377,6 +2387,7 @@ d3d12_video_encoder_build_post_encode_codec_bitstream_av1(struct d3d12_video_enc
writtenTemporalDelimBytes, // placingPositionStart
writtenFrameBytes // Bytes Written AFTER placingPositionStart arg above
);
associatedMetadata.pWrittenCodecUnitsSizes.push_back(writtenFrameBytes);
debug_printf("Written OBU_FRAME_HEADER bytes: %" PRIu64 "\n", writtenFrameBytes);
@@ -2440,6 +2451,7 @@ d3d12_video_encoder_build_post_encode_codec_bitstream_av1(struct d3d12_video_enc
staging_bitstream_buffer_offset);
writtenTileBytes += writtenTileObuPrefixBytes;
associatedMetadata.pWrittenCodecUnitsSizes.push_back(writtenTileObuPrefixBytes);
// Note: The buffer_subdata is queued in pD3D12Enc->base.context but doesn't execute immediately
pD3D12Enc->base.context->buffer_subdata(
@@ -2477,7 +2489,8 @@ d3d12_video_encoder_build_post_encode_codec_bitstream_av1(struct d3d12_video_enc
1,
associatedMetadata.m_associatedEncodeConfig.m_encoderSliceConfigDesc.m_TilesConfig_AV1.TilesPartition,
currentTg,
written_bytes_to_staging_bitstream_buffer);
written_bytes_to_staging_bitstream_buffer,
associatedMetadata.pWrittenCodecUnitsSizes);
staging_bitstream_buffer_offset += written_bytes_to_staging_bitstream_buffer;
comp_bitstream_offset += tile_group_obu_size;
@@ -2560,6 +2573,7 @@ d3d12_video_encoder_build_post_encode_codec_bitstream_av1(struct d3d12_video_enc
writtenTemporalDelimBytes // Bytes Written AFTER placingPositionStart arg above
);
}
associatedMetadata.pWrittenCodecUnitsSizes.push_back(writtenTemporalDelimBytes);
assert(writtenTemporalDelimBytes == (pD3D12Enc->m_BitstreamHeadersBuffer.size() - staging_buf_offset));
// Add current pending frame being processed in the loop
@@ -2584,6 +2598,8 @@ d3d12_video_encoder_build_post_encode_codec_bitstream_av1(struct d3d12_video_enc
writtenShowExistingFrameBytes // Bytes Written AFTER placingPositionStart arg above
);
}
associatedMetadata.pWrittenCodecUnitsSizes.push_back(writtenShowExistingFrameBytes);
assert(writtenShowExistingFrameBytes ==
(pD3D12Enc->m_BitstreamHeadersBuffer.size() - staging_buf_offset - writtenTemporalDelimBytes));
@@ -2653,8 +2669,12 @@ d3d12_video_encoder_build_post_encode_codec_bitstream_av1(struct d3d12_video_enc
assert((writtenSequenceBytes + writtenTemporalDelimBytes + writtenFrameBytes +
extra_show_existing_frame_payload_bytes) == pD3D12Enc->m_BitstreamHeadersBuffer.size());
return static_cast<unsigned int>(writtenSequenceBytes + writtenTemporalDelimBytes + writtenFrameBytes +
uint32_t total_bytes_written = static_cast<uint32_t>(writtenSequenceBytes + writtenTemporalDelimBytes + writtenFrameBytes +
writtenTileBytes + extra_show_existing_frame_payload_bytes);
assert(std::accumulate(associatedMetadata.pWrittenCodecUnitsSizes.begin(), associatedMetadata.pWrittenCodecUnitsSizes.end(), 0u) ==
static_cast<uint64_t>(total_bytes_written));
return total_bytes_written;
}
void
@@ -2670,7 +2690,8 @@ upload_tile_group_obu(struct d3d12_video_encoder *pD3D12Enc,
size_t TileSizeBytes, // Pass already +1'd from TileSizeBytesMinus1
const D3D12_VIDEO_ENCODER_AV1_PICTURE_CONTROL_SUBREGIONS_LAYOUT_DATA_TILES &TilesPartition,
const av1_tile_group_t &tileGroup,
size_t &written_bytes_to_staging_bitstream_buffer)
size_t &written_bytes_to_staging_bitstream_buffer,
std::vector<uint64_t> &pWrittenCodecUnitsSizes)
{
debug_printf("[Tile group start %d to end %d] Writing to comp_bit_destination %p starts at offset %" PRIu64 "\n",
tileGroup.tg_start,
@@ -2843,6 +2864,13 @@ upload_tile_group_obu(struct d3d12_video_encoder *pD3D12Enc,
comp_bit_destination_offset);
comp_bit_destination_offset += tile_size;
size_t cur_tile_reportable_size = tile_size;
if (TileIdx != tileGroup.tg_end)
cur_tile_reportable_size += TileSizeBytes; /* extra tile_size_bytes_minus1 in all tiles except last*/
if (TileIdx == 0)
cur_tile_reportable_size += bitstream_tile_group_obu_bytes; // part of the obu tile group header (make part of first tile)
pWrittenCodecUnitsSizes.push_back(cur_tile_reportable_size);
}
// Make sure we wrote the expected bytes that match the obu_size elements

View File

@@ -67,7 +67,8 @@ upload_tile_group_obu(struct d3d12_video_encoder *pD3D12Enc,
size_t TileSizeBytes, // Pass already +1'd from TileSizeBytesMinus1
const D3D12_VIDEO_ENCODER_AV1_PICTURE_CONTROL_SUBREGIONS_LAYOUT_DATA_TILES &TilesPartition,
const av1_tile_group_t &tileGroup,
size_t &written_bytes_to_staging_bitstream_buffer);
size_t &written_bytes_to_staging_bitstream_buffer,
std::vector<uint64_t> &pWrittenCodecUnitsSizes);
void

View File

@@ -28,6 +28,8 @@
#include "d3d12_format.h"
#include <cmath>
#include <algorithm>
#include <numeric>
void
d3d12_video_encoder_update_current_rate_control_h264(struct d3d12_video_encoder *pD3D12Enc,
@@ -72,6 +74,7 @@ d3d12_video_encoder_update_current_rate_control_h264(struct d3d12_video_encoder
picture->rate_ctrl[0].vbv_buf_initial_size;
}
pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.max_frame_size = picture->rate_ctrl[0].max_au_size;
if (picture->rate_ctrl[0].max_au_size > 0) {
pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Flags |=
D3D12_VIDEO_ENCODER_RATE_CONTROL_FLAG_ENABLE_MAX_FRAME_SIZE;
@@ -153,6 +156,7 @@ d3d12_video_encoder_update_current_rate_control_h264(struct d3d12_video_encoder
picture->rate_ctrl[0].vbv_buf_initial_size;
}
#endif
pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.max_frame_size = picture->rate_ctrl[0].max_au_size;
if (picture->rate_ctrl[0].max_au_size > 0) {
pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Flags |=
D3D12_VIDEO_ENCODER_RATE_CONTROL_FLAG_ENABLE_MAX_FRAME_SIZE;
@@ -228,6 +232,7 @@ d3d12_video_encoder_update_current_rate_control_h264(struct d3d12_video_encoder
picture->rate_ctrl[0].vbv_buf_initial_size;
}
pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.max_frame_size = picture->rate_ctrl[0].max_au_size;
if (picture->rate_ctrl[0].max_au_size > 0) {
pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Flags |=
D3D12_VIDEO_ENCODER_RATE_CONTROL_FLAG_ENABLE_MAX_FRAME_SIZE;
@@ -406,46 +411,20 @@ d3d12_video_encoder_negotiate_current_h264_slices_configuration(struct d3d12_vid
///
/// Try to see if can accomodate for multi-slice request by user
///
if (picture->num_slice_descriptors > 1) {
/* Last slice can be less for rounding frame size and leave some error for mb rounding */
bool bUniformSizeSlices = true;
const double rounding_delta = 1.0;
for (uint32_t sliceIdx = 1; (sliceIdx < picture->num_slice_descriptors - 1) && bUniformSizeSlices; sliceIdx++) {
int64_t curSlice = picture->slices_descriptors[sliceIdx].num_macroblocks;
int64_t prevSlice = picture->slices_descriptors[sliceIdx - 1].num_macroblocks;
bUniformSizeSlices = bUniformSizeSlices && (std::abs(curSlice - prevSlice) <= rounding_delta);
}
if ((picture->slice_mode == PIPE_VIDEO_SLICE_MODE_BLOCKS) && (picture->num_slice_descriptors > 1)) {
/* Some apps send all same size slices minus 1 slice in any position in the descriptors */
/* Lets validate that there are at most 2 different slice sizes in all the descriptors */
std::vector<int> slice_sizes(picture->num_slice_descriptors);
for (uint32_t i = 0; i < picture->num_slice_descriptors; i++)
slice_sizes[i] = picture->slices_descriptors[i].num_macroblocks;
std::sort(slice_sizes.begin(), slice_sizes.end());
bool bUniformSizeSlices = (std::unique(slice_sizes.begin(), slice_sizes.end()) - slice_sizes.begin()) <= 2;
uint32_t mbPerScanline =
pD3D12Enc->m_currentEncodeConfig.m_currentResolution.Width / D3D12_VIDEO_H264_MB_IN_PIXELS;
bool bSliceAligned = ((picture->slices_descriptors[0].num_macroblocks % mbPerScanline) == 0);
if (!bUniformSizeSlices &&
d3d12_video_encoder_check_subregion_mode_support(
pD3D12Enc,
D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_UNIFORM_PARTITIONING_SUBREGIONS_PER_FRAME)) {
if (D3D12_VIDEO_ENC_FALLBACK_SLICE_CONFIG) { // Check if fallback mode is enabled, or we should just fail
// without support
// Not supported to have custom slice sizes in D3D12 Video Encode fallback to uniform multi-slice
debug_printf(
"[d3d12_video_encoder_h264] WARNING: Requested slice control mode is not supported: All slices must "
"have the same number of macroblocks. Falling back to encoding uniform %d slices per frame.\n",
picture->num_slice_descriptors);
requestedSlicesMode =
D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_UNIFORM_PARTITIONING_SUBREGIONS_PER_FRAME;
requestedSlicesConfig.NumberOfSlicesPerFrame = picture->num_slice_descriptors;
debug_printf("[d3d12_video_encoder_h264] Using multi slice encoding mode: "
"D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_UNIFORM_PARTITIONING_SUBREGIONS_PER_FRAME "
"with %d slices per frame.\n",
requestedSlicesConfig.NumberOfSlicesPerFrame);
} else {
debug_printf("[d3d12_video_encoder_h264] Requested slice control mode is not supported: All slices must "
"have the same number of macroblocks. To continue with uniform slices as a fallback, must "
"enable the OS environment variable D3D12_VIDEO_ENC_FALLBACK_SLICE_CONFIG");
return false;
}
} else if (bUniformSizeSlices && bSliceAligned &&
if (bUniformSizeSlices && bSliceAligned &&
d3d12_video_encoder_check_subregion_mode_support(
pD3D12Enc,
D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_UNIFORM_PARTITIONING_ROWS_PER_SUBREGION)) {
@@ -469,23 +448,43 @@ d3d12_video_encoder_negotiate_current_h264_slices_configuration(struct d3d12_vid
"D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_UNIFORM_PARTITIONING_SUBREGIONS_PER_FRAME "
"with %d slices per frame.\n",
requestedSlicesConfig.NumberOfSlicesPerFrame);
} else if (D3D12_VIDEO_ENC_FALLBACK_SLICE_CONFIG) { // Check if fallback mode is enabled, or we should just fail
// without support
// Fallback to single slice encoding (assigned by default when initializing variables requestedSlicesMode,
// requestedSlicesConfig)
debug_printf(
"[d3d12_video_encoder_h264] WARNING: Slice mode for %d slices with bUniformSizeSlices: %d - bSliceAligned "
"%d not supported by the D3D12 driver, falling back to encoding a single slice per frame.\n",
picture->num_slice_descriptors,
bUniformSizeSlices,
bSliceAligned);
} else if (bUniformSizeSlices &&
d3d12_video_encoder_check_subregion_mode_support(
pD3D12Enc,
D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_SQUARE_UNITS_PER_SUBREGION_ROW_UNALIGNED)) {
requestedSlicesMode =
D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_SQUARE_UNITS_PER_SUBREGION_ROW_UNALIGNED;
requestedSlicesConfig.NumberOfCodingUnitsPerSlice = picture->slices_descriptors[0].num_macroblocks;
debug_printf("[d3d12_video_encoder_h264] Using multi slice encoding mode: "
"D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_SQUARE_UNITS_PER_SUBREGION_ROW_UNALIGNED "
"with %d NumberOfCodingUnitsPerSlice per frame.\n",
requestedSlicesConfig.NumberOfCodingUnitsPerSlice);
} else {
debug_printf("[d3d12_video_encoder_h264] Requested slice control mode is not supported: All slices must "
"have the same number of macroblocks. To continue with uniform slices as a fallback, must "
"enable the OS environment variable D3D12_VIDEO_ENC_FALLBACK_SLICE_CONFIG");
"have the same number of macroblocks.\n");
return false;
}
} else if(picture->slice_mode == PIPE_VIDEO_SLICE_MODE_MAX_SLICE_SICE) {
if ((picture->max_slice_bytes > 0) &&
d3d12_video_encoder_check_subregion_mode_support(
pD3D12Enc,
D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_BYTES_PER_SUBREGION )) {
requestedSlicesMode =
D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_BYTES_PER_SUBREGION;
requestedSlicesConfig.MaxBytesPerSlice = picture->max_slice_bytes;
debug_printf("[d3d12_video_encoder_h264] Using multi slice encoding mode: "
"D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_BYTES_PER_SUBREGION "
"with %d MaxBytesPerSlice per frame.\n",
requestedSlicesConfig.MaxBytesPerSlice);
} else {
debug_printf("[d3d12_video_encoder_h264] Requested slice control mode is not supported: All slices must "
"have the same number of macroblocks.\n");
return false;
}
} else {
requestedSlicesMode = D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_FULL_FRAME;
requestedSlicesConfig.NumberOfSlicesPerFrame = 1;
debug_printf("[d3d12_video_encoder_h264] Requested slice control mode is full frame. m_SlicesPartition_H264.NumberOfSlicesPerFrame = %d - m_encoderSliceConfigMode = %d \n",
requestedSlicesConfig.NumberOfSlicesPerFrame, requestedSlicesMode);
}
@@ -1016,8 +1015,12 @@ d3d12_video_encoder_update_current_encoder_config_state_h264(struct d3d12_video_
if (pD3D12Enc->m_currentEncodeCapabilities.m_MaxSlicesInOutput >
pD3D12Enc->m_currentEncodeCapabilities.m_currentResolutionSupportCaps.MaxSubregionsNumber) {
debug_printf("[d3d12_video_encoder_h264] Desired number of subregions is not supported (higher than max "
"reported slice number in query caps)\n.");
debug_printf("[d3d12_video_encoder_h264] Desired number of subregions %d is not supported (higher than max "
"reported slice number %d in query caps) for current resolution (%d, %d)\n.",
pD3D12Enc->m_currentEncodeCapabilities.m_MaxSlicesInOutput,
pD3D12Enc->m_currentEncodeCapabilities.m_currentResolutionSupportCaps.MaxSubregionsNumber,
pD3D12Enc->m_currentEncodeConfig.m_currentResolution.Width,
pD3D12Enc->m_currentEncodeConfig.m_currentResolution.Height);
return false;
}
return true;
@@ -1062,7 +1065,8 @@ d3d12_video_encoder_compare_slice_config_h264_hevc(
}
uint32_t
d3d12_video_encoder_build_codec_headers_h264(struct d3d12_video_encoder *pD3D12Enc)
d3d12_video_encoder_build_codec_headers_h264(struct d3d12_video_encoder *pD3D12Enc,
std::vector<uint64_t> &pWrittenCodecUnitsSizes)
{
D3D12_VIDEO_ENCODER_PICTURE_CONTROL_CODEC_DATA currentPicParams =
d3d12_video_encoder_get_current_picture_param_settings(pD3D12Enc);
@@ -1072,7 +1076,20 @@ d3d12_video_encoder_build_codec_headers_h264(struct d3d12_video_encoder *pD3D12E
auto codecConfigDesc = d3d12_video_encoder_get_current_codec_config_desc(pD3D12Enc);
auto MaxDPBCapacity = d3d12_video_encoder_get_current_max_dpb_capacity(pD3D12Enc);
size_t writtenSPSBytesCount = 0;
d3d12_video_bitstream_builder_h264 *pH264BitstreamBuilder =
static_cast<d3d12_video_bitstream_builder_h264 *>(pD3D12Enc->m_upBitstreamBuilder.get());
assert(pH264BitstreamBuilder);
uint64_t writtenAUDBytesCount = 0;
pWrittenCodecUnitsSizes.clear();
if (pH264BitstreamBuilder->insert_aud_nalu_requested())
{
pH264BitstreamBuilder->write_aud(pD3D12Enc->m_BitstreamHeadersBuffer,
pD3D12Enc->m_BitstreamHeadersBuffer.begin(),
writtenAUDBytesCount);
pWrittenCodecUnitsSizes.push_back(writtenAUDBytesCount);
}
bool isFirstFrame = (pD3D12Enc->m_fenceValue == 1);
bool writeNewSPS = isFirstFrame // on first frame
|| ((pD3D12Enc->m_currentEncodeConfig.m_seqFlags & // also on resolution change
@@ -1080,12 +1097,9 @@ d3d12_video_encoder_build_codec_headers_h264(struct d3d12_video_encoder *pD3D12E
// Also on input format dirty flag for new SPS, VUI etc
|| (pD3D12Enc->m_currentEncodeConfig.m_ConfigDirtyFlags & d3d12_video_encoder_config_dirty_flag_sequence_info);
d3d12_video_bitstream_builder_h264 *pH264BitstreamBuilder =
static_cast<d3d12_video_bitstream_builder_h264 *>(pD3D12Enc->m_upBitstreamBuilder.get());
assert(pH264BitstreamBuilder);
uint32_t active_seq_parameter_set_id = pH264BitstreamBuilder->get_active_sps_id();
uint64_t writtenSPSBytesCount = 0;
if (writeNewSPS) {
// For every new SPS for reconfiguration, increase the active_sps_id
if (!isFirstFrame) {
@@ -1103,11 +1117,12 @@ d3d12_video_encoder_build_codec_headers_h264(struct d3d12_video_encoder *pD3D12E
pD3D12Enc->m_currentEncodeConfig.m_currentResolution,
pD3D12Enc->m_currentEncodeConfig.m_FrameCroppingCodecConfig,
pD3D12Enc->m_BitstreamHeadersBuffer,
pD3D12Enc->m_BitstreamHeadersBuffer.begin(),
pD3D12Enc->m_BitstreamHeadersBuffer.begin() + writtenAUDBytesCount,
writtenSPSBytesCount);
pWrittenCodecUnitsSizes.push_back(writtenSPSBytesCount);
}
size_t writtenPPSBytesCount = 0;
uint64_t writtenPPSBytesCount = 0;
pH264BitstreamBuilder->build_pps(*profDesc.pH264Profile,
*codecConfigDesc.pH264Config,
*currentPicParams.pH264PicData,
@@ -1121,17 +1136,20 @@ d3d12_video_encoder_build_codec_headers_h264(struct d3d12_video_encoder *pD3D12E
if ( (writtenPPSBytesCount != active_pps.size()) ||
memcmp(pD3D12Enc->m_StagingHeadersBuffer.data(), active_pps.data(), writtenPPSBytesCount)) {
active_pps = pD3D12Enc->m_StagingHeadersBuffer;
pD3D12Enc->m_BitstreamHeadersBuffer.resize(writtenSPSBytesCount + writtenPPSBytesCount);
memcpy(&pD3D12Enc->m_BitstreamHeadersBuffer.data()[writtenSPSBytesCount], pD3D12Enc->m_StagingHeadersBuffer.data(), writtenPPSBytesCount);
pD3D12Enc->m_BitstreamHeadersBuffer.resize(writtenAUDBytesCount + writtenSPSBytesCount + writtenPPSBytesCount);
memcpy(&pD3D12Enc->m_BitstreamHeadersBuffer.data()[writtenAUDBytesCount + writtenSPSBytesCount], pD3D12Enc->m_StagingHeadersBuffer.data(), writtenPPSBytesCount);
pWrittenCodecUnitsSizes.push_back(writtenPPSBytesCount);
} else {
writtenPPSBytesCount = 0;
debug_printf("Skipping PPS (same as active PPS) for fenceValue: %" PRIu64 "\n", pD3D12Enc->m_fenceValue);
}
// Shrink buffer to fit the headers
if (pD3D12Enc->m_BitstreamHeadersBuffer.size() > (writtenPPSBytesCount + writtenSPSBytesCount)) {
pD3D12Enc->m_BitstreamHeadersBuffer.resize(writtenPPSBytesCount + writtenSPSBytesCount);
if (pD3D12Enc->m_BitstreamHeadersBuffer.size() > (writtenAUDBytesCount + writtenSPSBytesCount + writtenPPSBytesCount)) {
pD3D12Enc->m_BitstreamHeadersBuffer.resize(writtenAUDBytesCount + writtenSPSBytesCount + writtenPPSBytesCount);
}
assert(std::accumulate(pWrittenCodecUnitsSizes.begin(), pWrittenCodecUnitsSizes.end(), 0u) ==
static_cast<uint64_t>(pD3D12Enc->m_BitstreamHeadersBuffer.size()));
return pD3D12Enc->m_BitstreamHeadersBuffer.size();
}

View File

@@ -57,7 +57,8 @@ d3d12_video_encoder_update_current_frame_pic_params_info_h264(struct d3d12_video
D3D12_VIDEO_ENCODER_FRAME_TYPE_H264
d3d12_video_encoder_convert_frame_type_h264(enum pipe_h2645_enc_picture_type picType);
uint32_t
d3d12_video_encoder_build_codec_headers_h264(struct d3d12_video_encoder *pD3D12Enc);
d3d12_video_encoder_build_codec_headers_h264(struct d3d12_video_encoder *pD3D12Enc,
std::vector<uint64_t> &pWrittenCodecUnitsSizes);
bool
d3d12_video_encoder_compare_slice_config_h264_hevc(
D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE targetMode,

View File

@@ -28,6 +28,8 @@
#include "d3d12_format.h"
#include <cmath>
#include <algorithm>
#include <numeric>
void
d3d12_video_encoder_update_current_rate_control_hevc(struct d3d12_video_encoder *pD3D12Enc,
@@ -72,6 +74,7 @@ d3d12_video_encoder_update_current_rate_control_hevc(struct d3d12_video_encoder
picture->rc.vbv_buf_initial_size;
}
pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.max_frame_size = picture->rc.max_au_size;
if (picture->rc.max_au_size > 0) {
pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Flags |=
D3D12_VIDEO_ENCODER_RATE_CONTROL_FLAG_ENABLE_MAX_FRAME_SIZE;
@@ -152,6 +155,7 @@ d3d12_video_encoder_update_current_rate_control_hevc(struct d3d12_video_encoder
picture->rc.vbv_buf_initial_size;
}
#endif
pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.max_frame_size = picture->rc.max_au_size;
if (picture->rc.max_au_size > 0) {
pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Flags |=
D3D12_VIDEO_ENCODER_RATE_CONTROL_FLAG_ENABLE_MAX_FRAME_SIZE;
@@ -229,6 +233,7 @@ d3d12_video_encoder_update_current_rate_control_hevc(struct d3d12_video_encoder
picture->rc.vbv_buf_initial_size;
}
pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.max_frame_size = picture->rc.max_au_size;
if (picture->rc.max_au_size > 0) {
pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Flags |=
D3D12_VIDEO_ENCODER_RATE_CONTROL_FLAG_ENABLE_MAX_FRAME_SIZE;
@@ -410,15 +415,14 @@ d3d12_video_encoder_negotiate_current_hevc_slices_configuration(struct d3d12_vid
///
/// Try to see if can accomodate for multi-slice request by user
///
if (picture->num_slice_descriptors > 1) {
/* Last slice can be less for rounding frame size and leave some error for mb rounding */
bool bUniformSizeSlices = true;
const double rounding_delta = 1.0;
for (uint32_t sliceIdx = 1; (sliceIdx < picture->num_slice_descriptors - 1) && bUniformSizeSlices; sliceIdx++) {
int64_t curSlice = picture->slices_descriptors[sliceIdx].num_ctu_in_slice;
int64_t prevSlice = picture->slices_descriptors[sliceIdx - 1].num_ctu_in_slice;
bUniformSizeSlices = bUniformSizeSlices && (std::abs(curSlice - prevSlice) <= rounding_delta);
}
if ((picture->slice_mode == PIPE_VIDEO_SLICE_MODE_BLOCKS) && (picture->num_slice_descriptors > 1)) {
/* Some apps send all same size slices minus 1 slice in any position in the descriptors */
/* Lets validate that there are at most 2 different slice sizes in all the descriptors */
std::vector<int> slice_sizes(picture->num_slice_descriptors);
for (uint32_t i = 0; i < picture->num_slice_descriptors; i++)
slice_sizes[i] = picture->slices_descriptors[i].num_ctu_in_slice;
std::sort(slice_sizes.begin(), slice_sizes.end());
bool bUniformSizeSlices = (std::unique(slice_sizes.begin(), slice_sizes.end()) - slice_sizes.begin()) <= 2;
uint32_t subregion_block_pixel_size = pD3D12Enc->m_currentEncodeCapabilities.m_currentResolutionSupportCaps.SubregionBlockPixelsSize;
uint32_t num_subregions_per_scanline =
@@ -444,18 +448,10 @@ d3d12_video_encoder_negotiate_current_hevc_slices_configuration(struct d3d12_vid
bool bSliceAligned = ((num_subregions_per_slice % num_subregions_per_scanline) == 0);
if (!bUniformSizeSlices &&
d3d12_video_encoder_check_subregion_mode_support(
pD3D12Enc,
D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_UNIFORM_PARTITIONING_SUBREGIONS_PER_FRAME)) {
if (D3D12_VIDEO_ENC_FALLBACK_SLICE_CONFIG) { // Check if fallback mode is enabled, or we should just fail
// without support
// Not supported to have custom slice sizes in D3D12 Video Encode fallback to uniform multi-slice
debug_printf(
"[d3d12_video_encoder_hevc] WARNING: Requested slice control mode is not supported: All slices must "
"have the same number of macroblocks. Falling back to encoding uniform %d slices per frame.\n",
picture->num_slice_descriptors);
if (bUniformSizeSlices &&
d3d12_video_encoder_check_subregion_mode_support(
pD3D12Enc,
D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_UNIFORM_PARTITIONING_SUBREGIONS_PER_FRAME)) {
requestedSlicesMode =
D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_UNIFORM_PARTITIONING_SUBREGIONS_PER_FRAME;
requestedSlicesConfig.NumberOfSlicesPerFrame = picture->num_slice_descriptors;
@@ -463,12 +459,18 @@ d3d12_video_encoder_negotiate_current_hevc_slices_configuration(struct d3d12_vid
"D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_UNIFORM_PARTITIONING_SUBREGIONS_PER_FRAME "
"with %d slices per frame.\n",
requestedSlicesConfig.NumberOfSlicesPerFrame);
} else {
debug_printf("[d3d12_video_encoder_hevc] Requested slice control mode is not supported: All slices must "
"have the same number of macroblocks. To continue with uniform slices as a fallback, must "
"enable the OS environment variable D3D12_VIDEO_ENC_FALLBACK_SLICE_CONFIG");
return false;
}
} else if (bUniformSizeSlices &&
d3d12_video_encoder_check_subregion_mode_support(
pD3D12Enc,
D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_SQUARE_UNITS_PER_SUBREGION_ROW_UNALIGNED)) {
requestedSlicesMode =
D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_SQUARE_UNITS_PER_SUBREGION_ROW_UNALIGNED;
requestedSlicesConfig.NumberOfCodingUnitsPerSlice = num_subregions_per_slice;
debug_printf("[d3d12_video_encoder_hevc] Using multi slice encoding mode: "
"D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_SQUARE_UNITS_PER_SUBREGION_ROW_UNALIGNED "
"with %d NumberOfCodingUnitsPerSlice per frame.\n",
requestedSlicesConfig.NumberOfCodingUnitsPerSlice);
} else if (bUniformSizeSlices && bSliceAligned &&
d3d12_video_encoder_check_subregion_mode_support(
pD3D12Enc,
@@ -483,33 +485,33 @@ d3d12_video_encoder_negotiate_current_hevc_slices_configuration(struct d3d12_vid
"%d subregion block rows (%d pix scanlines) per slice.\n",
requestedSlicesConfig.NumberOfRowsPerSlice,
pD3D12Enc->m_currentEncodeCapabilities.m_currentResolutionSupportCaps.SubregionBlockPixelsSize);
} else if (bUniformSizeSlices &&
d3d12_video_encoder_check_subregion_mode_support(
pD3D12Enc,
D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_UNIFORM_PARTITIONING_SUBREGIONS_PER_FRAME)) {
requestedSlicesMode =
D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_UNIFORM_PARTITIONING_SUBREGIONS_PER_FRAME;
requestedSlicesConfig.NumberOfSlicesPerFrame = picture->num_slice_descriptors;
debug_printf("[d3d12_video_encoder_hevc] Using multi slice encoding mode: "
"D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_UNIFORM_PARTITIONING_SUBREGIONS_PER_FRAME "
"with %d slices per frame.\n",
requestedSlicesConfig.NumberOfSlicesPerFrame);
} else if (D3D12_VIDEO_ENC_FALLBACK_SLICE_CONFIG) { // Check if fallback mode is enabled, or we should just fail
// without support
// Fallback to single slice encoding (assigned by default when initializing variables requestedSlicesMode,
// requestedSlicesConfig)
debug_printf(
"[d3d12_video_encoder_hevc] WARNING: Slice mode for %d slices with bUniformSizeSlices: %d - bSliceAligned "
"%d not supported by the D3D12 driver, falling back to encoding a single slice per frame.\n",
picture->num_slice_descriptors,
bUniformSizeSlices,
bSliceAligned);
} else {
debug_printf("[d3d12_video_encoder_hevc] Requested slice control mode is not supported: All slices must "
"have the same number of macroblocks. To continue with uniform slices as a fallback, must "
"enable the OS environment variable D3D12_VIDEO_ENC_FALLBACK_SLICE_CONFIG\n");
"have the same number of macroblocks.\n");
return false;
}
} else if(picture->slice_mode == PIPE_VIDEO_SLICE_MODE_MAX_SLICE_SICE) {
if ((picture->max_slice_bytes > 0) &&
d3d12_video_encoder_check_subregion_mode_support(
pD3D12Enc,
D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_BYTES_PER_SUBREGION )) {
requestedSlicesMode =
D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_BYTES_PER_SUBREGION;
requestedSlicesConfig.MaxBytesPerSlice = picture->max_slice_bytes;
debug_printf("[d3d12_video_encoder_hevc] Using multi slice encoding mode: "
"D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_BYTES_PER_SUBREGION "
"with %d MaxBytesPerSlice per frame.\n",
requestedSlicesConfig.MaxBytesPerSlice);
} else {
debug_printf("[d3d12_video_encoder_hevc] Requested slice control mode is not supported: All slices must "
"have the same number of macroblocks.\n");
return false;
}
} else {
requestedSlicesMode = D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_FULL_FRAME;
requestedSlicesConfig.NumberOfSlicesPerFrame = 1;
debug_printf("[d3d12_video_encoder_hevc] Requested slice control mode is full frame. m_SlicesPartition_H264.NumberOfSlicesPerFrame = %d - m_encoderSliceConfigMode = %d \n",
requestedSlicesConfig.NumberOfSlicesPerFrame, requestedSlicesMode);
}
if (!d3d12_video_encoder_isequal_slice_config_hevc(
@@ -878,6 +880,9 @@ d3d12_video_encoder_update_current_encoder_config_state_hevc(struct d3d12_video_
// Will call for d3d12 driver support based on the initial requested features, then
// try to fallback if any of them is not supported and return the negotiated d3d12 settings
D3D12_FEATURE_DATA_VIDEO_ENCODER_SUPPORT1 capEncoderSupportData1 = {};
// Get max number of slices per frame supported
pD3D12Enc->m_currentEncodeConfig.m_encoderSliceConfigMode =
D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_UNIFORM_PARTITIONING_SUBREGIONS_PER_FRAME;
if (!d3d12_video_encoder_negotiate_requested_features_and_d3d12_driver_caps(pD3D12Enc, capEncoderSupportData1)) {
debug_printf("[d3d12_video_encoder_hevc] After negotiating caps, D3D12_FEATURE_VIDEO_ENCODER_SUPPORT1 "
"arguments are not supported - "
@@ -887,6 +892,12 @@ d3d12_video_encoder_update_current_encoder_config_state_hevc(struct d3d12_video_
return false;
}
// Set slices config (configure before calling d3d12_video_encoder_calculate_max_slices_count_in_output)
if(!d3d12_video_encoder_negotiate_current_hevc_slices_configuration(pD3D12Enc, hevcPic)) {
debug_printf("d3d12_video_encoder_negotiate_current_hevc_slices_configuration failed!\n");
return false;
}
///
// Calculate current settings based on the returned values from the caps query
//
@@ -898,12 +909,6 @@ d3d12_video_encoder_update_current_encoder_config_state_hevc(struct d3d12_video_
pD3D12Enc->m_currentEncodeConfig.m_currentResolution,
pD3D12Enc->m_currentEncodeCapabilities.m_currentResolutionSupportCaps.SubregionBlockPixelsSize);
// Set slices config
if(!d3d12_video_encoder_negotiate_current_hevc_slices_configuration(pD3D12Enc, hevcPic)) {
debug_printf("d3d12_video_encoder_negotiate_current_hevc_slices_configuration failed!\n");
return false;
}
// Set GOP config
if(!d3d12_video_encoder_update_hevc_gop_configuration(pD3D12Enc, hevcPic)) {
debug_printf("d3d12_video_encoder_update_hevc_gop_configuration failed!\n");
@@ -956,8 +961,12 @@ d3d12_video_encoder_update_current_encoder_config_state_hevc(struct d3d12_video_
if (pD3D12Enc->m_currentEncodeCapabilities.m_MaxSlicesInOutput >
pD3D12Enc->m_currentEncodeCapabilities.m_currentResolutionSupportCaps.MaxSubregionsNumber) {
debug_printf("[d3d12_video_encoder_hevc] Desired number of subregions is not supported (higher than max "
"reported slice number in query caps)\n.");
debug_printf("[d3d12_video_encoder_hevc] Desired number of subregions %d is not supported (higher than max "
"reported slice number %d in query caps) for current resolution (%d, %d)\n.",
pD3D12Enc->m_currentEncodeCapabilities.m_MaxSlicesInOutput,
pD3D12Enc->m_currentEncodeCapabilities.m_currentResolutionSupportCaps.MaxSubregionsNumber,
pD3D12Enc->m_currentEncodeConfig.m_currentResolution.Width,
pD3D12Enc->m_currentEncodeConfig.m_currentResolution.Height);
return false;
}
return true;
@@ -997,7 +1006,8 @@ d3d12_video_encoder_isequal_slice_config_hevc(
}
uint32_t
d3d12_video_encoder_build_codec_headers_hevc(struct d3d12_video_encoder *pD3D12Enc)
d3d12_video_encoder_build_codec_headers_hevc(struct d3d12_video_encoder *pD3D12Enc,
std::vector<uint64_t> &pWrittenCodecUnitsSizes)
{
D3D12_VIDEO_ENCODER_PICTURE_CONTROL_CODEC_DATA currentPicParams =
d3d12_video_encoder_get_current_picture_param_settings(pD3D12Enc);
@@ -1007,8 +1017,7 @@ d3d12_video_encoder_build_codec_headers_hevc(struct d3d12_video_encoder *pD3D12E
auto codecConfigDesc = d3d12_video_encoder_get_current_codec_config_desc(pD3D12Enc);
auto MaxDPBCapacity = d3d12_video_encoder_get_current_max_dpb_capacity(pD3D12Enc);
size_t writtenSPSBytesCount = 0;
size_t writtenVPSBytesCount = 0;
pWrittenCodecUnitsSizes.clear();
bool isFirstFrame = (pD3D12Enc->m_fenceValue == 1);
bool writeNewSPS = isFirstFrame // on first frame
|| ((pD3D12Enc->m_currentEncodeConfig.m_seqFlags & // also on resolution change
@@ -1025,6 +1034,7 @@ d3d12_video_encoder_build_codec_headers_hevc(struct d3d12_video_encoder *pD3D12E
bool writeNewVPS = isFirstFrame;
uint64_t writtenVPSBytesCount = 0;
if (writeNewVPS) {
bool gopHasBFrames = (pD3D12Enc->m_currentEncodeConfig.m_encoderGOPConfigDesc.m_HEVCGroupOfPictures.PPicturePeriod > 1);
pHEVCBitstreamBuilder->build_vps(*profDesc.pHEVCProfile,
@@ -1036,8 +1046,11 @@ d3d12_video_encoder_build_codec_headers_hevc(struct d3d12_video_encoder *pD3D12E
pD3D12Enc->m_BitstreamHeadersBuffer,
pD3D12Enc->m_BitstreamHeadersBuffer.begin(),
writtenVPSBytesCount);
pWrittenCodecUnitsSizes.push_back(writtenVPSBytesCount);
}
uint64_t writtenSPSBytesCount = 0;
if (writeNewSPS) {
// For every new SPS for reconfiguration, increase the active_sps_id
if (!isFirstFrame) {
@@ -1058,10 +1071,11 @@ d3d12_video_encoder_build_codec_headers_hevc(struct d3d12_video_encoder *pD3D12E
pD3D12Enc->m_BitstreamHeadersBuffer,
pD3D12Enc->m_BitstreamHeadersBuffer.begin() + writtenVPSBytesCount,
writtenSPSBytesCount);
pWrittenCodecUnitsSizes.push_back(writtenSPSBytesCount);
}
size_t writtenPPSBytesCount = 0;
pHEVCBitstreamBuilder->build_pps(pHEVCBitstreamBuilder->get_latest_sps(),
currentPicParams.pHEVCPicData->slice_pic_parameter_set_id,
*codecConfigDesc.pHEVCConfig,
@@ -1076,6 +1090,7 @@ d3d12_video_encoder_build_codec_headers_hevc(struct d3d12_video_encoder *pD3D12E
active_pps = pD3D12Enc->m_StagingHeadersBuffer;
pD3D12Enc->m_BitstreamHeadersBuffer.resize(writtenSPSBytesCount + writtenVPSBytesCount + writtenPPSBytesCount);
memcpy(&pD3D12Enc->m_BitstreamHeadersBuffer.data()[(writtenSPSBytesCount + writtenVPSBytesCount)], pD3D12Enc->m_StagingHeadersBuffer.data(), writtenPPSBytesCount);
pWrittenCodecUnitsSizes.push_back(writtenPPSBytesCount);
} else {
writtenPPSBytesCount = 0;
debug_printf("Skipping PPS (same as active PPS) for fenceValue: %" PRIu64 "\n", pD3D12Enc->m_fenceValue);
@@ -1086,5 +1101,7 @@ d3d12_video_encoder_build_codec_headers_hevc(struct d3d12_video_encoder *pD3D12E
pD3D12Enc->m_BitstreamHeadersBuffer.resize(writtenPPSBytesCount + writtenSPSBytesCount + writtenVPSBytesCount);
}
assert(std::accumulate(pWrittenCodecUnitsSizes.begin(), pWrittenCodecUnitsSizes.end(), 0u) ==
static_cast<uint64_t>(pD3D12Enc->m_BitstreamHeadersBuffer.size()));
return pD3D12Enc->m_BitstreamHeadersBuffer.size();
}

View File

@@ -57,7 +57,8 @@ d3d12_video_encoder_update_current_frame_pic_params_info_hevc(struct d3d12_video
D3D12_VIDEO_ENCODER_FRAME_TYPE_HEVC
d3d12_video_encoder_convert_frame_type_hevc(enum pipe_h2645_enc_picture_type picType);
uint32_t
d3d12_video_encoder_build_codec_headers_hevc(struct d3d12_video_encoder *pD3D12Enc);
d3d12_video_encoder_build_codec_headers_hevc(struct d3d12_video_encoder *pD3D12Enc,
std::vector<uint64_t> &pWrittenCodecUnitsSizes);
bool
d3d12_video_encoder_isequal_slice_config_hevc(
D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE targetMode,

View File

@@ -25,6 +25,10 @@
#include <cmath>
d3d12_video_bitstream_builder_h264::d3d12_video_bitstream_builder_h264(bool insert_aud_nalu)
: m_insert_aud_nalu(insert_aud_nalu)
{ }
inline H264_SPEC_PROFILES
Convert12ToSpecH264Profiles(D3D12_VIDEO_ENCODER_PROFILE_H264 profile12)
{
@@ -192,6 +196,14 @@ d3d12_video_bitstream_builder_h264::write_end_of_sequence_nalu(std::vector<uint8
m_h264Encoder.write_end_of_sequence_nalu(headerBitstream, placingPositionStart, writtenBytes);
}
void
d3d12_video_bitstream_builder_h264::write_aud(std::vector<uint8_t> & headerBitstream,
std::vector<uint8_t>::iterator placingPositionStart,
size_t & writtenBytes)
{
m_h264Encoder.write_access_unit_delimiter_nalu(headerBitstream, placingPositionStart, writtenBytes);
}
void
d3d12_video_bitstream_builder_h264::build_pps(const D3D12_VIDEO_ENCODER_PROFILE_H264 & profile,
const D3D12_VIDEO_ENCODER_CODEC_CONFIGURATION_H264 & codecConfig,

View File

@@ -31,7 +31,7 @@ class d3d12_video_bitstream_builder_h264 : public d3d12_video_bitstream_builder_
{
public:
d3d12_video_bitstream_builder_h264() {};
d3d12_video_bitstream_builder_h264(bool insert_aud_nalu = false);
~d3d12_video_bitstream_builder_h264() {};
void build_sps(const struct pipe_h264_enc_seq_param & seqData,
@@ -64,6 +64,10 @@ class d3d12_video_bitstream_builder_h264 : public d3d12_video_bitstream_builder_
std::vector<uint8_t>::iterator placingPositionStart,
size_t & writtenBytes);
void write_aud(std::vector<uint8_t> & headerBitstream,
std::vector<uint8_t>::iterator placingPositionStart,
size_t & writtenBytes);
void print_pps(const H264_PPS &pps);
void print_sps(const H264_SPS &sps);
@@ -92,11 +96,14 @@ class d3d12_video_bitstream_builder_h264 : public d3d12_video_bitstream_builder_
debug_printf("[d3d12_video_bitstream_builder_h264] Setting new active PPS ID: %d ", m_activePPSIndex);
};
bool insert_aud_nalu_requested() { return m_insert_aud_nalu; }
private:
d3d12_video_nalu_writer_h264 m_h264Encoder;
std::vector<uint8_t> m_activePPS;
uint32_t m_activeSPSIndex = 0;
uint32_t m_activePPSIndex = 0;
bool m_insert_aud_nalu = false;
};
#endif

View File

@@ -483,3 +483,45 @@ d3d12_video_nalu_writer_h264::write_end_of_sequence_nalu(std::vector<uint8_t> &
writtenBytes = naluByteSize;
}
void
d3d12_video_nalu_writer_h264::write_access_unit_delimiter_nalu(std::vector<uint8_t> & headerBitstream,
std::vector<uint8_t>::iterator placingPositionStart,
size_t & writtenBytes)
{
d3d12_video_encoder_bitstream rbsp, nalu;
if (!rbsp.create_bitstream(8)) {
debug_printf("rbsp.create_bitstream(8) failed.\n");
assert(false);
}
if (!nalu.create_bitstream(2 * MAX_COMPRESSED_PPS)) {
debug_printf("nalu.create_bitstream(2 * MAX_COMPRESSED_PPS) failed.\n");
assert(false);
}
rbsp.set_start_code_prevention(true);
rbsp.put_bits(3, 2/*primary_pic_type*/);
rbsp_trailing(&rbsp);
rbsp.flush();
if (wrap_rbsp_into_nalu(&nalu, &rbsp, NAL_REFIDC_NONREF, NAL_TYPE_ACCESS_UNIT_DELIMITER) <= 0u) {
debug_printf(
"wrap_rbsp_into_nalu(&nalu, &rbsp, NAL_REFIDC_NONREF, NAL_TYPE_ACCESS_UNIT_DELIMITER) didn't write any bytes.\n");
assert(false);
}
// Deep copy nalu into headerBitstream, nalu gets out of scope here and its destructor frees the nalu object buffer
// memory.
uint8_t *naluBytes = nalu.get_bitstream_buffer();
size_t naluByteSize = nalu.get_byte_count();
auto startDstIndex = std::distance(headerBitstream.begin(), placingPositionStart);
if (headerBitstream.size() < (startDstIndex + naluByteSize)) {
headerBitstream.resize(startDstIndex + naluByteSize);
}
std::copy_n(&naluBytes[0], naluByteSize, &headerBitstream.data()[startDstIndex]);
writtenBytes = naluByteSize;
}

View File

@@ -43,7 +43,7 @@ enum H264_NALU_TYPE
NAL_TYPE_SEI = 6,
NAL_TYPE_SPS = 7,
NAL_TYPE_PPS = 8,
NAL_TYPE_ACCESS_UNIT_DEMILITER = 9,
NAL_TYPE_ACCESS_UNIT_DELIMITER = 9,
NAL_TYPE_END_OF_SEQUENCE = 10,
NAL_TYPE_END_OF_STREAM = 11,
NAL_TYPE_FILLER_DATA = 12,
@@ -185,6 +185,10 @@ class d3d12_video_nalu_writer_h264
std::vector<uint8_t>::iterator placingPositionStart,
size_t & writtenBytes);
void write_access_unit_delimiter_nalu(std::vector<uint8_t> & headerBitstream,
std::vector<uint8_t>::iterator placingPositionStart,
size_t & writtenBytes);
private:
// Writes from structure into bitstream with RBSP trailing but WITHOUT NAL unit wrap (eg. nal_idc_type, etc)
uint32_t write_sps_bytes(d3d12_video_encoder_bitstream *pBitstream, H264_SPS *pSPS);

View File

@@ -515,10 +515,27 @@ d3d12_video_encode_supported_slice_structures(const D3D12_VIDEO_ENCODER_CODEC &c
supportedSliceStructuresBitMask |= PIPE_VIDEO_CAP_SLICE_STRUCTURE_POWER_OF_TWO_ROWS;
}
/* Needs more work in VA frontend to support VAEncMiscParameterMaxSliceSize
and the driver potentially reporting back status in VACodedBufferSegment */
capDataSubregionLayout.SubregionMode =
D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_SQUARE_UNITS_PER_SUBREGION_ROW_UNALIGNED;
hr = pD3D12VideoDevice->CheckFeatureSupport(D3D12_FEATURE_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE,
&capDataSubregionLayout,
sizeof(capDataSubregionLayout));
if (FAILED(hr)) {
debug_printf("CheckFeatureSupport failed with HR %x\n", hr);
} else if (capDataSubregionLayout.IsSupported) {
/* This would be setting K rows per subregions in this D3D12 mode */
supportedSliceStructuresBitMask |= PIPE_VIDEO_CAP_SLICE_STRUCTURE_EQUAL_MULTI_ROWS;
/* Assuming height/blocksize >= max_supported_slices, which is reported
in PIPE_VIDEO_CAP_ENC_MAX_SLICES_PER_FRAME and should be checked by the client*/
/* This would be setting 1 row per subregion in this D3D12 mode */
supportedSliceStructuresBitMask |= PIPE_VIDEO_CAP_SLICE_STRUCTURE_EQUAL_ROWS;
/* This is ok, would be setting K rows per subregions in this D3D12 mode (and rounding the last one) */
supportedSliceStructuresBitMask |= PIPE_VIDEO_CAP_SLICE_STRUCTURE_POWER_OF_TWO_ROWS;
/* This is ok, would be setting K MBs per subregions in this D3D12 mode*/
supportedSliceStructuresBitMask |= PIPE_VIDEO_CAP_SLICE_STRUCTURE_ARBITRARY_MACROBLOCKS;
}
/*capDataSubregionLayout.SubregionMode = D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_BYTES_PER_SUBREGION;
capDataSubregionLayout.SubregionMode = D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_BYTES_PER_SUBREGION;
hr = pD3D12VideoDevice->CheckFeatureSupport(D3D12_FEATURE_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE,
&capDataSubregionLayout,
sizeof(capDataSubregionLayout));
@@ -526,7 +543,7 @@ d3d12_video_encode_supported_slice_structures(const D3D12_VIDEO_ENCODER_CODEC &c
debug_printf("CheckFeatureSupport failed with HR %x\n", hr);
} else if (capDataSubregionLayout.IsSupported) {
supportedSliceStructuresBitMask |= PIPE_VIDEO_CAP_SLICE_STRUCTURE_MAX_SLICE_SIZE;
}*/
}
return supportedSliceStructuresBitMask;
}
@@ -1666,6 +1683,11 @@ d3d12_screen_get_video_param_encode(struct pipe_screen *pscreen,
return 1;
case PIPE_VIDEO_CAP_NPOT_TEXTURES:
return 1;
case PIPE_VIDEO_CAP_ENC_SUPPORTS_FEEDBACK_METADATA:
return (PIPE_VIDEO_FEEDBACK_METADATA_TYPE_BITSTREAM_SIZE |
PIPE_VIDEO_FEEDBACK_METADATA_TYPE_ENCODE_RESULT |
PIPE_VIDEO_FEEDBACK_METADATA_TYPE_CODEC_UNIT_LOCATION |
PIPE_VIDEO_FEEDBACK_METADATA_TYPE_MAX_FRAME_SIZE_OVERFLOW);
case PIPE_VIDEO_CAP_MAX_WIDTH:
case PIPE_VIDEO_CAP_MAX_HEIGHT:
case PIPE_VIDEO_CAP_MIN_WIDTH:

View File

@@ -67,14 +67,6 @@ GetDesc(ID3D12VideoDecoderHeap *heap)
*/
const bool D3D12_VIDEO_ENC_CBR_FORCE_VBV_EQUAL_BITRATE = debug_get_bool_option("D3D12_VIDEO_ENC_CBR_FORCE_VBV_EQUAL_BITRATE", false);
// Allow encoder to continue the encoding session when aa slice mode
// is requested but not supported.
//
// If setting this OS Env variable to true, the encoder will try to adjust to the closest slice
// setting available and encode using that configuration anyway
//
const bool D3D12_VIDEO_ENC_FALLBACK_SLICE_CONFIG = debug_get_bool_option("D3D12_VIDEO_ENC_FALLBACK_SLICE_CONFIG", false);
const bool D3D12_VIDEO_ENC_ASYNC = debug_get_bool_option("D3D12_VIDEO_ENC_ASYNC", true);
/**
@@ -179,5 +171,8 @@ d3d12_video_encoder_convert_12tusize_to_pixel_size_hevc(const D3D12_VIDEO_ENCODE
DEFINE_ENUM_FLAG_OPERATORS(pipe_enc_feature);
DEFINE_ENUM_FLAG_OPERATORS(pipe_h265_enc_pred_direction);
DEFINE_ENUM_FLAG_OPERATORS(codec_unit_location_flags);
DEFINE_ENUM_FLAG_OPERATORS(pipe_video_feedback_encode_result_flags);
DEFINE_ENUM_FLAG_OPERATORS(pipe_video_feedback_metadata_type);
#endif