diff --git a/src/gallium/drivers/d3d12/d3d12_video_enc.cpp b/src/gallium/drivers/d3d12/d3d12_video_enc.cpp index 5d71d6e7ef5..d1f6f1f0853 100644 --- a/src/gallium/drivers/d3d12/d3d12_video_enc.cpp +++ b/src/gallium/drivers/d3d12/d3d12_video_enc.cpp @@ -104,6 +104,12 @@ d3d12_video_encoder_flush(struct pipe_video_codec *codec) assert(pD3D12Enc->m_spD3D12VideoDevice); assert(pD3D12Enc->m_spEncodeCommandQueue); + if (pD3D12Enc->m_inflightResourcesPool[d3d12_video_encoder_pool_current_index(pD3D12Enc)].encode_result & PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_FAILED) { + debug_printf("WARNING: [d3d12_video_encoder] d3d12_video_encoder_flush - Frame submission %" PRIu64 " failed. Encoder lost, please recreate pipe_video_codec object \n", pD3D12Enc->m_fenceValue); + assert(false); + return; + } + // Flush any work batched (ie. shaders blit on input texture, etc or bitstream headers buffer_subdata batched upload) // and Wait the m_spEncodeCommandQueue for GPU upload completion // before recording EncodeFrame below. @@ -168,6 +174,8 @@ d3d12_video_encoder_flush(struct pipe_video_codec *codec) flush_fail: debug_printf("[d3d12_video_encoder] d3d12_video_encoder_flush failed for fenceValue: %" PRIu64 "\n", pD3D12Enc->m_fenceValue); + pD3D12Enc->m_inflightResourcesPool[d3d12_video_encoder_pool_current_index(pD3D12Enc)].encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_FAILED; + pD3D12Enc->m_spEncodedFrameMetadata[pD3D12Enc->m_fenceValue % D3D12_VIDEO_ENC_METADATA_BUFFERS_COUNT].encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_FAILED; assert(false); } @@ -196,13 +204,13 @@ d3d12_video_encoder_ensure_fence_finished(struct pipe_video_codec *codec, uint64 goto ensure_fence_finished_fail; } - d3d12_fence_wait_event(event, event_fd, timeout_ns); - d3d12_fence_close_event(event, event_fd); - debug_printf("[d3d12_video_encoder] d3d12_video_encoder_ensure_fence_finished - Waiting on fence to be done with " "fenceValue: %" PRIu64 " - current CompletedValue: %" PRIu64 "\n", fenceValueToWaitOn, completedValue); + + d3d12_fence_wait_event(event, event_fd, timeout_ns); + d3d12_fence_close_event(event, event_fd); } else { debug_printf("[d3d12_video_encoder] d3d12_video_encoder_ensure_fence_finished - Fence already done with " "fenceValue: %" PRIu64 " - current CompletedValue: %" PRIu64 "\n", @@ -213,6 +221,8 @@ d3d12_video_encoder_ensure_fence_finished(struct pipe_video_codec *codec, uint64 ensure_fence_finished_fail: debug_printf("[d3d12_video_encoder] d3d12_video_encoder_sync_completion failed for fenceValue: %" PRIu64 "\n", fenceValueToWaitOn); + pD3D12Enc->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_ENC_ASYNC_DEPTH].encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_FAILED; + pD3D12Enc->m_spEncodedFrameMetadata[fenceValueToWaitOn % D3D12_VIDEO_ENC_METADATA_BUFFERS_COUNT].encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_FAILED; assert(false); } @@ -259,6 +269,8 @@ d3d12_video_encoder_sync_completion(struct pipe_video_codec *codec, uint64_t fen sync_with_token_fail: debug_printf("[d3d12_video_encoder] d3d12_video_encoder_sync_completion failed for fenceValue: %" PRIu64 "\n", fenceValueToWaitOn); + pD3D12Enc->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_ENC_ASYNC_DEPTH].encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_FAILED; + pD3D12Enc->m_spEncodedFrameMetadata[fenceValueToWaitOn % D3D12_VIDEO_ENC_METADATA_BUFFERS_COUNT].encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_FAILED; assert(false); } @@ -406,7 +418,7 @@ d3d12_video_encoder_reconfigure_encoder_objects(struct d3d12_video_encoder *pD3D resourceAllocFlags, pD3D12Enc->m_NodeMask); } - d3d12_video_encoder_create_reference_picture_manager(pD3D12Enc); + d3d12_video_encoder_create_reference_picture_manager(pD3D12Enc, picture); } bool reCreatedEncoder = false; @@ -538,7 +550,7 @@ d3d12_video_encoder_reconfigure_encoder_objects(struct d3d12_video_encoder *pD3D } void -d3d12_video_encoder_create_reference_picture_manager(struct d3d12_video_encoder *pD3D12Enc) +d3d12_video_encoder_create_reference_picture_manager(struct d3d12_video_encoder *pD3D12Enc, struct pipe_picture_desc * picture) { pD3D12Enc->m_upDPBManager.reset(); pD3D12Enc->m_upBitstreamBuilder.reset(); @@ -560,7 +572,8 @@ d3d12_video_encoder_create_reference_picture_manager(struct d3d12_video_encoder d3d12_video_encoder_get_current_max_dpb_capacity(pD3D12Enc) ); - pD3D12Enc->m_upBitstreamBuilder = std::make_unique(); + struct pipe_h264_enc_picture_desc *pH264Pic = (struct pipe_h264_enc_picture_desc *) picture; + pD3D12Enc->m_upBitstreamBuilder = std::make_unique(pH264Pic->insert_aud_nalu); } break; #endif #if VIDEO_CODEC_H265ENC @@ -845,7 +858,8 @@ d3d12_video_encoder_get_current_level_desc(struct d3d12_video_encoder *pD3D12Enc void d3d12_video_encoder_build_pre_encode_codec_headers(struct d3d12_video_encoder *pD3D12Enc, bool &postEncodeHeadersNeeded, - uint64_t &preEncodeGeneratedHeadersByteSize) + uint64_t &preEncodeGeneratedHeadersByteSize, + std::vector &pWrittenCodecUnitsSizes) { enum pipe_video_format codec = u_reduce_video_profile(pD3D12Enc->base.profile); switch (codec) { @@ -853,14 +867,14 @@ d3d12_video_encoder_build_pre_encode_codec_headers(struct d3d12_video_encoder *p case PIPE_VIDEO_FORMAT_MPEG4_AVC: { postEncodeHeadersNeeded = false; - preEncodeGeneratedHeadersByteSize = d3d12_video_encoder_build_codec_headers_h264(pD3D12Enc); + preEncodeGeneratedHeadersByteSize = d3d12_video_encoder_build_codec_headers_h264(pD3D12Enc, pWrittenCodecUnitsSizes); } break; #endif #if VIDEO_CODEC_H265ENC case PIPE_VIDEO_FORMAT_HEVC: { postEncodeHeadersNeeded = false; - preEncodeGeneratedHeadersByteSize = d3d12_video_encoder_build_codec_headers_hevc(pD3D12Enc); + preEncodeGeneratedHeadersByteSize = d3d12_video_encoder_build_codec_headers_hevc(pD3D12Enc, pWrittenCodecUnitsSizes); } break; #endif #if VIDEO_CODEC_AV1ENC @@ -869,6 +883,7 @@ d3d12_video_encoder_build_pre_encode_codec_headers(struct d3d12_video_encoder *p pD3D12Enc->m_BitstreamHeadersBuffer.resize(0); postEncodeHeadersNeeded = true; preEncodeGeneratedHeadersByteSize = 0; + pWrittenCodecUnitsSizes.clear(); } break; #endif default: @@ -1031,6 +1046,7 @@ static void d3d12_video_encoder_disable_rc_maxframesize(struct D3D12EncodeRateControlState & rcState) { rcState.m_Flags &= ~D3D12_VIDEO_ENCODER_RATE_CONTROL_FLAG_ENABLE_MAX_FRAME_SIZE; + rcState.max_frame_size = 0; switch (rcState.m_Mode) { case D3D12_VIDEO_ENCODER_RATE_CONTROL_MODE_CBR: { @@ -1695,6 +1711,16 @@ d3d12_video_encoder_reconfigure_session(struct d3d12_video_encoder *pD3D12Enc, debug_printf("d3d12_video_encoder_prepare_output_buffers failed!\n"); return false; } + + // Save frame size expectation snapshot from record time to resolve at get_feedback time (after execution) + uint64_t current_metadata_slot = (pD3D12Enc->m_fenceValue % D3D12_VIDEO_ENC_METADATA_BUFFERS_COUNT); + pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].expected_max_frame_size = + pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.max_frame_size; + + pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].expected_max_slice_size = + (pD3D12Enc->m_currentEncodeConfig.m_encoderSliceConfigMode == D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_BYTES_PER_SUBREGION) ? + pD3D12Enc->m_currentEncodeConfig.m_encoderSliceConfigDesc.m_SlicesPartition_H264.MaxBytesPerSlice : 0; + return true; } @@ -1739,6 +1765,8 @@ d3d12_video_encoder_begin_frame(struct pipe_video_codec * codec, } pD3D12Enc->m_inflightResourcesPool[d3d12_video_encoder_pool_current_index(pD3D12Enc)].m_InputSurfaceFence = (struct d3d12_fence*) *picture->fence; + pD3D12Enc->m_inflightResourcesPool[d3d12_video_encoder_pool_current_index(pD3D12Enc)].encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_OK; + pD3D12Enc->m_spEncodedFrameMetadata[pD3D12Enc->m_fenceValue % D3D12_VIDEO_ENC_METADATA_BUFFERS_COUNT].encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_OK; debug_printf("[d3d12_video_encoder] d3d12_video_encoder_begin_frame finalized for fenceValue: %" PRIu64 "\n", pD3D12Enc->m_fenceValue); @@ -1747,6 +1775,8 @@ d3d12_video_encoder_begin_frame(struct pipe_video_codec * codec, fail: debug_printf("[d3d12_video_encoder] d3d12_video_encoder_begin_frame failed for fenceValue: %" PRIu64 "\n", pD3D12Enc->m_fenceValue); + pD3D12Enc->m_inflightResourcesPool[d3d12_video_encoder_pool_current_index(pD3D12Enc)].encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_FAILED; + pD3D12Enc->m_spEncodedFrameMetadata[pD3D12Enc->m_fenceValue % D3D12_VIDEO_ENC_METADATA_BUFFERS_COUNT].encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_FAILED; assert(false); } @@ -1846,9 +1876,11 @@ d3d12_video_encoder_encode_bitstream(struct pipe_video_codec * codec, assert(pD3D12Enc->m_spEncodeCommandQueue); assert(pD3D12Enc->m_pD3D12Screen); - // Since this can be queried out of order in get_feedback, we need to pass out the actual value of the fence - // and not the pointer to it (the fence value will keep increasing in the surfaces that have a pointer to it) - *feedback = (void*) pD3D12Enc->m_fenceValue; + if (pD3D12Enc->m_inflightResourcesPool[d3d12_video_encoder_pool_current_index(pD3D12Enc)].encode_result & PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_FAILED) { + debug_printf("WARNING: [d3d12_video_encoder] d3d12_video_encoder_encode_bitstream - Frame submission %" PRIu64 " failed. Encoder lost, please recreate pipe_video_codec object\n", pD3D12Enc->m_fenceValue); + assert(false); + return; + } struct d3d12_video_buffer *pInputVideoBuffer = (struct d3d12_video_buffer *) source; assert(pInputVideoBuffer); @@ -1912,7 +1944,8 @@ d3d12_video_encoder_encode_bitstream(struct pipe_video_codec * codec, d3d12_video_encoder_build_pre_encode_codec_headers(pD3D12Enc, pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].postEncodeHeadersNeeded, - pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].preEncodeGeneratedHeadersByteSize); + pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].preEncodeGeneratedHeadersByteSize, + pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pWrittenCodecUnitsSizes); assert(pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].preEncodeGeneratedHeadersByteSize == pD3D12Enc->m_BitstreamHeadersBuffer.size()); // Only upload headers now and leave prefix offset space gap in compressed bitstream if the codec builds headers before execution. @@ -1930,8 +1963,13 @@ d3d12_video_encoder_encode_bitstream(struct pipe_video_codec * codec, (pD3D12Enc->m_currentEncodeCapabilities.m_ResourceRequirementsCaps.CompressedBitstreamBufferAccessAlignment > 1) && ((pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].preEncodeGeneratedHeadersByteSize % pD3D12Enc->m_currentEncodeCapabilities.m_ResourceRequirementsCaps.CompressedBitstreamBufferAccessAlignment) != 0) ) { - pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].preEncodeGeneratedHeadersByteSize = ALIGN(pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].preEncodeGeneratedHeadersByteSize, pD3D12Enc->m_currentEncodeCapabilities.m_ResourceRequirementsCaps.CompressedBitstreamBufferAccessAlignment); + size_t new_size = ALIGN(pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].preEncodeGeneratedHeadersByteSize, pD3D12Enc->m_currentEncodeCapabilities.m_ResourceRequirementsCaps.CompressedBitstreamBufferAccessAlignment); + size_t align_padding = new_size - pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].preEncodeGeneratedHeadersByteSize; + pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].preEncodeGeneratedHeadersByteSize = new_size; pD3D12Enc->m_BitstreamHeadersBuffer.resize(pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].preEncodeGeneratedHeadersByteSize, 0); + // Update last pWrittenCodecUnitsSizes with extra offset padding + if (pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pWrittenCodecUnitsSizes.size() > 0) + pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pWrittenCodecUnitsSizes[pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pWrittenCodecUnitsSizes.size() - 1] += align_padding; } // Upload the CPU buffers with the bitstream headers to the compressed bitstream resource in the interval @@ -1964,7 +2002,9 @@ d3d12_video_encoder_encode_bitstream(struct pipe_video_codec * codec, if (FAILED(hr)) { debug_printf("CreateCommittedResource failed with HR %x\n", hr); + pD3D12Enc->m_inflightResourcesPool[d3d12_video_encoder_pool_current_index(pD3D12Enc)].encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_FAILED; assert(false); + return; } } @@ -1976,6 +2016,13 @@ d3d12_video_encoder_encode_bitstream(struct pipe_video_codec * codec, pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].comp_bit_destination = &pOutputBitstreamBuffer->base.b; } + memset(&pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].m_FenceData, + 0, + sizeof(pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].m_FenceData)); + pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].m_FenceData.value = pD3D12Enc->m_fenceValue; + pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].m_FenceData.cmdqueue_fence = pD3D12Enc->m_spFence.Get(); + *feedback = (void*) &pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].m_FenceData; + std::vector rgCurrentFrameStateTransitions = { CD3DX12_RESOURCE_BARRIER::Transition(pInputVideoD3D12Res, D3D12_RESOURCE_STATE_COMMON, @@ -2218,15 +2265,49 @@ d3d12_video_encoder_encode_bitstream(struct pipe_video_codec * codec, } void -d3d12_video_encoder_get_feedback(struct pipe_video_codec *codec, void *feedback, unsigned *size, struct pipe_enc_feedback_metadata* metadata) +d3d12_video_encoder_get_feedback(struct pipe_video_codec *codec, + void *feedback, + unsigned *size, + struct pipe_enc_feedback_metadata* pMetadata) { struct d3d12_video_encoder *pD3D12Enc = (struct d3d12_video_encoder *) codec; assert(pD3D12Enc); - uint64_t requested_metadata_fence = ((uint64_t) feedback); - d3d12_video_encoder_sync_completion(codec, requested_metadata_fence, OS_TIMEOUT_INFINITE); + struct d3d12_fence *feedback_fence = (struct d3d12_fence *) feedback; + uint64_t requested_metadata_fence = feedback_fence->value; + + struct pipe_enc_feedback_metadata opt_metadata; + memset(&opt_metadata, 0, sizeof(opt_metadata)); + HRESULT hr = pD3D12Enc->m_pD3D12Screen->dev->GetDeviceRemovedReason(); + if (hr != S_OK) { + opt_metadata.encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_FAILED; + debug_printf("Error: d3d12_video_encoder_get_feedback for Encode GPU command for fence %" PRIu64 " failed with GetDeviceRemovedReason: %x\n", + requested_metadata_fence, + hr); + assert(false); + return; + } uint64_t current_metadata_slot = (requested_metadata_fence % D3D12_VIDEO_ENC_METADATA_BUFFERS_COUNT); + opt_metadata.encode_result = pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].encode_result; + if (opt_metadata.encode_result & PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_FAILED) { + debug_printf("Error: d3d12_video_encoder_get_feedback for Encode GPU command for fence %" PRIu64 " failed on submission with encode_result: %x\n", + requested_metadata_fence, + opt_metadata.encode_result); + assert(false); + return; + } + + d3d12_video_encoder_sync_completion(codec, requested_metadata_fence, OS_TIMEOUT_INFINITE); + + opt_metadata.encode_result = pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].encode_result; + if (opt_metadata.encode_result & PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_FAILED) { + debug_printf("Error: d3d12_video_encoder_get_feedback for Encode GPU command for fence %" PRIu64 " failed on GPU fence wait with encode_result: %x\n", + requested_metadata_fence, + opt_metadata.encode_result); + assert(false); + return; + } debug_printf("d3d12_video_encoder_get_feedback with feedback: %" PRIu64 ", resources slot %" PRIu64 " metadata resolved ID3D12Resource buffer %p metadata required size %" PRIu64 "\n", requested_metadata_fence, @@ -2242,11 +2323,33 @@ d3d12_video_encoder_get_feedback(struct pipe_video_codec *codec, void *feedback, requested_metadata_fence, pD3D12Enc->m_fenceValue, D3D12_VIDEO_ENC_METADATA_BUFFERS_COUNT); - *size = 0; + opt_metadata.encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_FAILED; assert(false); return; } + // Extract encode metadata + D3D12_VIDEO_ENCODER_OUTPUT_METADATA encoderMetadata; + std::vector pSubregionsMetadata; + d3d12_video_encoder_extract_encode_metadata( + pD3D12Enc, + pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].spBuffer.Get(), + pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].bufferSize, + encoderMetadata, + pSubregionsMetadata); + + // Validate encoder output metadata + if ((encoderMetadata.EncodeErrorFlags != D3D12_VIDEO_ENCODER_ENCODE_ERROR_FLAG_NO_ERROR) || (encoderMetadata.EncodedBitstreamWrittenBytesCount == 0)) { + opt_metadata.encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_FAILED; + debug_printf("[d3d12_video_encoder] Encode GPU command for fence %" PRIu64 " failed - EncodeErrorFlags: %" PRIu64 "\n", + requested_metadata_fence, + encoderMetadata.EncodeErrorFlags); + assert(false); + return; + } + debug_printf("WrittenSubregionsCount: %" PRIu64" \n", encoderMetadata.WrittenSubregionsCount); + + // Calculate the full bitstream size if(pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].postEncodeHeadersNeeded) { /// @@ -2268,35 +2371,49 @@ d3d12_video_encoder_get_feedback(struct pipe_video_codec *codec, void *feedback, /// If we wrote headers (if any) before encode execution, use that size to calculate feedback size of complete bitstream. /// - D3D12_VIDEO_ENCODER_OUTPUT_METADATA encoderMetadata; - std::vector pSubregionsMetadata; - d3d12_video_encoder_extract_encode_metadata( - pD3D12Enc, - pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].spBuffer.Get(), - pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].bufferSize, - encoderMetadata, - pSubregionsMetadata); - - // Read metadata from encoderMetadata - if (encoderMetadata.EncodeErrorFlags != D3D12_VIDEO_ENCODER_ENCODE_ERROR_FLAG_NO_ERROR) { - debug_printf("[d3d12_video_encoder] Encode GPU command for fence %" PRIu64 " failed - EncodeErrorFlags: %" PRIu64 "\n", - requested_metadata_fence, - encoderMetadata.EncodeErrorFlags); - *size = 0; - assert(false); - return; - } - - assert(encoderMetadata.EncodedBitstreamWrittenBytesCount > 0u); - *size = static_cast(pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].preEncodeGeneratedHeadersByteSize + encoderMetadata.EncodedBitstreamWrittenBytesCount); + + // Prepare codec unit metadata post execution with pre-execution headers generation + for (unsigned i = 0; i < pSubregionsMetadata.size();i++) + pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pWrittenCodecUnitsSizes.push_back(pSubregionsMetadata[i].bSize); } - + debug_printf("[d3d12_video_encoder_get_feedback] Requested metadata for encoded frame at fence %" PRIu64 " is %d (feedback was requested at current fence %" PRIu64 ")\n", requested_metadata_fence, *size, pD3D12Enc->m_fenceValue); + + if (*size > pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].expected_max_frame_size) + opt_metadata.encode_result |= PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_MAX_FRAME_SIZE_OVERFLOW; + + // Report codec unit metadata + opt_metadata.codec_unit_metadata_count = 0u; + memset(opt_metadata.codec_unit_metadata, 0, sizeof(opt_metadata.codec_unit_metadata)); + uint64_t absolute_offset_acum = 0u; + debug_printf("Written: %" PRIu64" codec units \n", static_cast(pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pWrittenCodecUnitsSizes.size())); + for (uint32_t i = 0; i < pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pWrittenCodecUnitsSizes.size(); i++) + { + opt_metadata.codec_unit_metadata[opt_metadata.codec_unit_metadata_count].size = pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].pWrittenCodecUnitsSizes[i]; + opt_metadata.codec_unit_metadata[opt_metadata.codec_unit_metadata_count].offset = absolute_offset_acum; + absolute_offset_acum += opt_metadata.codec_unit_metadata[opt_metadata.codec_unit_metadata_count].size; + debug_printf("Codec unit %d: offset: %" PRIu64" - size: %" PRIu64" \n", + i, + opt_metadata.codec_unit_metadata[opt_metadata.codec_unit_metadata_count].offset, + opt_metadata.codec_unit_metadata[opt_metadata.codec_unit_metadata_count].size); + + opt_metadata.codec_unit_metadata_count++; + } + + opt_metadata.present_metadata = (PIPE_VIDEO_FEEDBACK_METADATA_TYPE_BITSTREAM_SIZE | + PIPE_VIDEO_FEEDBACK_METADATA_TYPE_ENCODE_RESULT | + PIPE_VIDEO_FEEDBACK_METADATA_TYPE_CODEC_UNIT_LOCATION | + PIPE_VIDEO_FEEDBACK_METADATA_TYPE_MAX_FRAME_SIZE_OVERFLOW); + + if (pMetadata) + *pMetadata = opt_metadata; + + assert(absolute_offset_acum == *size); pD3D12Enc->m_spEncodedFrameMetadata[current_metadata_slot].bRead = true; } @@ -2420,6 +2537,12 @@ d3d12_video_encoder_end_frame(struct pipe_video_codec * codec, debug_printf("[d3d12_video_encoder] d3d12_video_encoder_end_frame started for fenceValue: %" PRIu64 "\n", pD3D12Enc->m_fenceValue); + if (pD3D12Enc->m_inflightResourcesPool[d3d12_video_encoder_pool_current_index(pD3D12Enc)].encode_result != PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_OK) { + debug_printf("WARNING: [d3d12_video_encoder] d3d12_video_encoder_end_frame - Frame submission %" PRIu64 " failed. Encoder lost, please recreate pipe_video_codec object\n", pD3D12Enc->m_fenceValue); + assert(false); + return; + } + // Signal finish of current frame encoding to the picture management tracker pD3D12Enc->m_upDPBManager->end_frame(); diff --git a/src/gallium/drivers/d3d12/d3d12_video_enc.h b/src/gallium/drivers/d3d12/d3d12_video_enc.h index 2974dab0ee1..b39eab8d478 100644 --- a/src/gallium/drivers/d3d12/d3d12_video_enc.h +++ b/src/gallium/drivers/d3d12/d3d12_video_enc.h @@ -71,7 +71,10 @@ d3d12_video_encoder_encode_bitstream(struct pipe_video_codec * codec, * get encoder feedback */ void -d3d12_video_encoder_get_feedback(struct pipe_video_codec *codec, void *feedback, unsigned *size, struct pipe_enc_feedback_metadata* metadata); +d3d12_video_encoder_get_feedback(struct pipe_video_codec *codec, + void *feedback, + unsigned *size, + struct pipe_enc_feedback_metadata* pMetadata); /** * end encoding of the current frame @@ -172,6 +175,7 @@ struct D3D12EncodeRateControlState { D3D12_VIDEO_ENCODER_RATE_CONTROL_MODE m_Mode = {}; D3D12_VIDEO_ENCODER_RATE_CONTROL_FLAGS m_Flags = {}; + uint64_t max_frame_size = 0; DXGI_RATIONAL m_FrameRate = {}; union { @@ -300,6 +304,7 @@ struct EncodedBitstreamResolvedMetadata * encoded in the GPU */ uint64_t preEncodeGeneratedHeadersByteSize = 0; + std::vector pWrittenCodecUnitsSizes; /* * Indicates if the encoded frame needs header generation after GPU execution @@ -345,6 +350,16 @@ struct EncodedBitstreamResolvedMetadata * in between the GPU spStagingBitstream contents */ std::vector m_StagingBitstreamConstruction; + + /* Stores encode result for get_feedback readback in the D3D12_VIDEO_ENC_METADATA_BUFFERS_COUNT slots */ + enum pipe_video_feedback_encode_result_flags encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_OK; + + /* Expected max frame, slice sizes */ + uint64_t expected_max_frame_size = 0; + uint64_t expected_max_slice_size = 0; + + /* Pending fence data for this frame */ + struct d3d12_fence m_FenceData; }; struct d3d12_video_encoder @@ -396,6 +411,9 @@ struct d3d12_video_encoder ComPtr m_spCommandAllocator; struct d3d12_fence* m_InputSurfaceFence = NULL; + + /* Stores encode result for submission error control in the D3D12_VIDEO_ENC_ASYNC_DEPTH slots */ + enum pipe_video_feedback_encode_result_flags encode_result = PIPE_VIDEO_FEEDBACK_METADATA_ENCODE_FLAG_OK; }; std::vector m_inflightResourcesPool; @@ -432,7 +450,7 @@ d3d12_video_encoder_get_current_gop_desc(struct d3d12_video_encoder *pD3D12Enc); uint32_t d3d12_video_encoder_get_current_max_dpb_capacity(struct d3d12_video_encoder *pD3D12Enc); void -d3d12_video_encoder_create_reference_picture_manager(struct d3d12_video_encoder *pD3D12Enc); +d3d12_video_encoder_create_reference_picture_manager(struct d3d12_video_encoder *pD3D12Enc, struct pipe_picture_desc * picture); void d3d12_video_encoder_update_picparams_tracking(struct d3d12_video_encoder *pD3D12Enc, struct pipe_video_buffer * srcTexture, @@ -453,7 +471,8 @@ d3d12_video_encoder_prepare_output_buffers(struct d3d12_video_encoder *pD3D12Enc void d3d12_video_encoder_build_pre_encode_codec_headers(struct d3d12_video_encoder *pD3D12Enc, bool &postEncodeHeadersNeeded, - uint64_t &preEncodeGeneratedHeadersByteSize); + uint64_t &preEncodeGeneratedHeadersByteSize, + std::vector &pWrittenCodecUnitsSizes); void d3d12_video_encoder_extract_encode_metadata( struct d3d12_video_encoder * pD3D12Dec, diff --git a/src/gallium/drivers/d3d12/d3d12_video_enc_av1.cpp b/src/gallium/drivers/d3d12/d3d12_video_enc_av1.cpp index 2d6f4cd83b3..39102f9c50c 100644 --- a/src/gallium/drivers/d3d12/d3d12_video_enc_av1.cpp +++ b/src/gallium/drivers/d3d12/d3d12_video_enc_av1.cpp @@ -28,6 +28,7 @@ #include "d3d12_screen.h" #include "d3d12_format.h" #include +#include void d3d12_video_encoder_update_current_rate_control_av1(struct d3d12_video_encoder *pD3D12Enc, @@ -76,6 +77,7 @@ d3d12_video_encoder_update_current_rate_control_av1(struct d3d12_video_encoder * picture->rc[0].vbv_buf_initial_size; } + pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.max_frame_size = picture->rc[0].max_au_size; if (picture->rc[0].max_au_size > 0) { pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Flags |= D3D12_VIDEO_ENCODER_RATE_CONTROL_FLAG_ENABLE_MAX_FRAME_SIZE; @@ -163,6 +165,7 @@ d3d12_video_encoder_update_current_rate_control_av1(struct d3d12_video_encoder * picture->rc[0].vbv_buf_initial_size; } #endif + pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.max_frame_size = picture->rc[0].max_au_size; if (picture->rc[0].max_au_size > 0) { pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Flags |= D3D12_VIDEO_ENCODER_RATE_CONTROL_FLAG_ENABLE_MAX_FRAME_SIZE; @@ -246,6 +249,7 @@ d3d12_video_encoder_update_current_rate_control_av1(struct d3d12_video_encoder * picture->rc[0].vbv_buf_initial_size; } + pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.max_frame_size = picture->rc[0].max_au_size; if (picture->rc[0].max_au_size > 0) { pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Flags |= D3D12_VIDEO_ENCODER_RATE_CONTROL_FLAG_ENABLE_MAX_FRAME_SIZE; @@ -2229,6 +2233,8 @@ d3d12_video_encoder_build_post_encode_codec_bitstream_av1(struct d3d12_video_enc static_cast(pD3D12Enc->m_upBitstreamBuilder.get()); assert(pAV1BitstreamBuilder); + associatedMetadata.pWrittenCodecUnitsSizes.clear(); + size_t writtenTemporalDelimBytes = 0; if (picHdr.show_frame && associatedMetadata.m_CodecSpecificData.AV1HeadersInfo.temporal_delim_rendered) { pAV1BitstreamBuilder->write_temporal_delimiter_obu( @@ -2238,6 +2244,7 @@ d3d12_video_encoder_build_post_encode_codec_bitstream_av1(struct d3d12_video_enc ); assert(pD3D12Enc->m_BitstreamHeadersBuffer.size() == writtenTemporalDelimBytes); debug_printf("Written OBU_TEMPORAL_DELIMITER bytes: %" PRIu64 "\n", writtenTemporalDelimBytes); + associatedMetadata.pWrittenCodecUnitsSizes.push_back(writtenTemporalDelimBytes); } size_t writtenSequenceBytes = 0; @@ -2255,6 +2262,7 @@ d3d12_video_encoder_build_post_encode_codec_bitstream_av1(struct d3d12_video_enc pD3D12Enc->m_BitstreamHeadersBuffer.begin() + writtenTemporalDelimBytes, // placingPositionStart writtenSequenceBytes // Bytes Written AFTER placingPositionStart arg above ); + associatedMetadata.pWrittenCodecUnitsSizes.push_back(writtenSequenceBytes); assert(pD3D12Enc->m_BitstreamHeadersBuffer.size() == (writtenSequenceBytes + writtenTemporalDelimBytes)); debug_printf("Written OBU_SEQUENCE_HEADER bytes: %" PRIu64 "\n", writtenSequenceBytes); } @@ -2303,6 +2311,7 @@ d3d12_video_encoder_build_post_encode_codec_bitstream_av1(struct d3d12_video_enc ); debug_printf("Written OBU_FRAME bytes: %" PRIu64 "\n", writtenFrameBytes); + associatedMetadata.pWrittenCodecUnitsSizes.push_back(writtenFrameBytes); assert(pD3D12Enc->m_BitstreamHeadersBuffer.size() == (writtenSequenceBytes + writtenTemporalDelimBytes + writtenFrameBytes)); @@ -2339,7 +2348,8 @@ d3d12_video_encoder_build_post_encode_codec_bitstream_av1(struct d3d12_video_enc 1, associatedMetadata.m_associatedEncodeConfig.m_encoderSliceConfigDesc.m_TilesConfig_AV1.TilesPartition, associatedMetadata.m_associatedEncodeConfig.m_encoderSliceConfigDesc.m_TilesConfig_AV1.TilesGroups[0], - written_bytes_to_staging_bitstream_buffer); + written_bytes_to_staging_bitstream_buffer, + associatedMetadata.pWrittenCodecUnitsSizes); writtenTileBytes += tile_group_obu_size; comp_bitstream_offset += writtenTileBytes; @@ -2377,6 +2387,7 @@ d3d12_video_encoder_build_post_encode_codec_bitstream_av1(struct d3d12_video_enc writtenTemporalDelimBytes, // placingPositionStart writtenFrameBytes // Bytes Written AFTER placingPositionStart arg above ); + associatedMetadata.pWrittenCodecUnitsSizes.push_back(writtenFrameBytes); debug_printf("Written OBU_FRAME_HEADER bytes: %" PRIu64 "\n", writtenFrameBytes); @@ -2440,6 +2451,7 @@ d3d12_video_encoder_build_post_encode_codec_bitstream_av1(struct d3d12_video_enc staging_bitstream_buffer_offset); writtenTileBytes += writtenTileObuPrefixBytes; + associatedMetadata.pWrittenCodecUnitsSizes.push_back(writtenTileObuPrefixBytes); // Note: The buffer_subdata is queued in pD3D12Enc->base.context but doesn't execute immediately pD3D12Enc->base.context->buffer_subdata( @@ -2477,7 +2489,8 @@ d3d12_video_encoder_build_post_encode_codec_bitstream_av1(struct d3d12_video_enc 1, associatedMetadata.m_associatedEncodeConfig.m_encoderSliceConfigDesc.m_TilesConfig_AV1.TilesPartition, currentTg, - written_bytes_to_staging_bitstream_buffer); + written_bytes_to_staging_bitstream_buffer, + associatedMetadata.pWrittenCodecUnitsSizes); staging_bitstream_buffer_offset += written_bytes_to_staging_bitstream_buffer; comp_bitstream_offset += tile_group_obu_size; @@ -2560,6 +2573,7 @@ d3d12_video_encoder_build_post_encode_codec_bitstream_av1(struct d3d12_video_enc writtenTemporalDelimBytes // Bytes Written AFTER placingPositionStart arg above ); } + associatedMetadata.pWrittenCodecUnitsSizes.push_back(writtenTemporalDelimBytes); assert(writtenTemporalDelimBytes == (pD3D12Enc->m_BitstreamHeadersBuffer.size() - staging_buf_offset)); // Add current pending frame being processed in the loop @@ -2584,6 +2598,8 @@ d3d12_video_encoder_build_post_encode_codec_bitstream_av1(struct d3d12_video_enc writtenShowExistingFrameBytes // Bytes Written AFTER placingPositionStart arg above ); } + associatedMetadata.pWrittenCodecUnitsSizes.push_back(writtenShowExistingFrameBytes); + assert(writtenShowExistingFrameBytes == (pD3D12Enc->m_BitstreamHeadersBuffer.size() - staging_buf_offset - writtenTemporalDelimBytes)); @@ -2653,8 +2669,12 @@ d3d12_video_encoder_build_post_encode_codec_bitstream_av1(struct d3d12_video_enc assert((writtenSequenceBytes + writtenTemporalDelimBytes + writtenFrameBytes + extra_show_existing_frame_payload_bytes) == pD3D12Enc->m_BitstreamHeadersBuffer.size()); - return static_cast(writtenSequenceBytes + writtenTemporalDelimBytes + writtenFrameBytes + + + uint32_t total_bytes_written = static_cast(writtenSequenceBytes + writtenTemporalDelimBytes + writtenFrameBytes + writtenTileBytes + extra_show_existing_frame_payload_bytes); + assert(std::accumulate(associatedMetadata.pWrittenCodecUnitsSizes.begin(), associatedMetadata.pWrittenCodecUnitsSizes.end(), 0u) == + static_cast(total_bytes_written)); + return total_bytes_written; } void @@ -2670,7 +2690,8 @@ upload_tile_group_obu(struct d3d12_video_encoder *pD3D12Enc, size_t TileSizeBytes, // Pass already +1'd from TileSizeBytesMinus1 const D3D12_VIDEO_ENCODER_AV1_PICTURE_CONTROL_SUBREGIONS_LAYOUT_DATA_TILES &TilesPartition, const av1_tile_group_t &tileGroup, - size_t &written_bytes_to_staging_bitstream_buffer) + size_t &written_bytes_to_staging_bitstream_buffer, + std::vector &pWrittenCodecUnitsSizes) { debug_printf("[Tile group start %d to end %d] Writing to comp_bit_destination %p starts at offset %" PRIu64 "\n", tileGroup.tg_start, @@ -2843,6 +2864,13 @@ upload_tile_group_obu(struct d3d12_video_encoder *pD3D12Enc, comp_bit_destination_offset); comp_bit_destination_offset += tile_size; + + size_t cur_tile_reportable_size = tile_size; + if (TileIdx != tileGroup.tg_end) + cur_tile_reportable_size += TileSizeBytes; /* extra tile_size_bytes_minus1 in all tiles except last*/ + if (TileIdx == 0) + cur_tile_reportable_size += bitstream_tile_group_obu_bytes; // part of the obu tile group header (make part of first tile) + pWrittenCodecUnitsSizes.push_back(cur_tile_reportable_size); } // Make sure we wrote the expected bytes that match the obu_size elements diff --git a/src/gallium/drivers/d3d12/d3d12_video_enc_av1.h b/src/gallium/drivers/d3d12/d3d12_video_enc_av1.h index 0aa002356f2..73760b97444 100644 --- a/src/gallium/drivers/d3d12/d3d12_video_enc_av1.h +++ b/src/gallium/drivers/d3d12/d3d12_video_enc_av1.h @@ -67,7 +67,8 @@ upload_tile_group_obu(struct d3d12_video_encoder *pD3D12Enc, size_t TileSizeBytes, // Pass already +1'd from TileSizeBytesMinus1 const D3D12_VIDEO_ENCODER_AV1_PICTURE_CONTROL_SUBREGIONS_LAYOUT_DATA_TILES &TilesPartition, const av1_tile_group_t &tileGroup, - size_t &written_bytes_to_staging_bitstream_buffer); + size_t &written_bytes_to_staging_bitstream_buffer, + std::vector &pWrittenCodecUnitsSizes); void diff --git a/src/gallium/drivers/d3d12/d3d12_video_enc_h264.cpp b/src/gallium/drivers/d3d12/d3d12_video_enc_h264.cpp index 33d6cc7830e..31b72bc3790 100644 --- a/src/gallium/drivers/d3d12/d3d12_video_enc_h264.cpp +++ b/src/gallium/drivers/d3d12/d3d12_video_enc_h264.cpp @@ -28,6 +28,8 @@ #include "d3d12_format.h" #include +#include +#include void d3d12_video_encoder_update_current_rate_control_h264(struct d3d12_video_encoder *pD3D12Enc, @@ -72,6 +74,7 @@ d3d12_video_encoder_update_current_rate_control_h264(struct d3d12_video_encoder picture->rate_ctrl[0].vbv_buf_initial_size; } + pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.max_frame_size = picture->rate_ctrl[0].max_au_size; if (picture->rate_ctrl[0].max_au_size > 0) { pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Flags |= D3D12_VIDEO_ENCODER_RATE_CONTROL_FLAG_ENABLE_MAX_FRAME_SIZE; @@ -153,6 +156,7 @@ d3d12_video_encoder_update_current_rate_control_h264(struct d3d12_video_encoder picture->rate_ctrl[0].vbv_buf_initial_size; } #endif + pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.max_frame_size = picture->rate_ctrl[0].max_au_size; if (picture->rate_ctrl[0].max_au_size > 0) { pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Flags |= D3D12_VIDEO_ENCODER_RATE_CONTROL_FLAG_ENABLE_MAX_FRAME_SIZE; @@ -228,6 +232,7 @@ d3d12_video_encoder_update_current_rate_control_h264(struct d3d12_video_encoder picture->rate_ctrl[0].vbv_buf_initial_size; } + pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.max_frame_size = picture->rate_ctrl[0].max_au_size; if (picture->rate_ctrl[0].max_au_size > 0) { pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Flags |= D3D12_VIDEO_ENCODER_RATE_CONTROL_FLAG_ENABLE_MAX_FRAME_SIZE; @@ -406,46 +411,20 @@ d3d12_video_encoder_negotiate_current_h264_slices_configuration(struct d3d12_vid /// /// Try to see if can accomodate for multi-slice request by user /// - if (picture->num_slice_descriptors > 1) { - /* Last slice can be less for rounding frame size and leave some error for mb rounding */ - bool bUniformSizeSlices = true; - const double rounding_delta = 1.0; - for (uint32_t sliceIdx = 1; (sliceIdx < picture->num_slice_descriptors - 1) && bUniformSizeSlices; sliceIdx++) { - int64_t curSlice = picture->slices_descriptors[sliceIdx].num_macroblocks; - int64_t prevSlice = picture->slices_descriptors[sliceIdx - 1].num_macroblocks; - bUniformSizeSlices = bUniformSizeSlices && (std::abs(curSlice - prevSlice) <= rounding_delta); - } + if ((picture->slice_mode == PIPE_VIDEO_SLICE_MODE_BLOCKS) && (picture->num_slice_descriptors > 1)) { + /* Some apps send all same size slices minus 1 slice in any position in the descriptors */ + /* Lets validate that there are at most 2 different slice sizes in all the descriptors */ + std::vector slice_sizes(picture->num_slice_descriptors); + for (uint32_t i = 0; i < picture->num_slice_descriptors; i++) + slice_sizes[i] = picture->slices_descriptors[i].num_macroblocks; + std::sort(slice_sizes.begin(), slice_sizes.end()); + bool bUniformSizeSlices = (std::unique(slice_sizes.begin(), slice_sizes.end()) - slice_sizes.begin()) <= 2; uint32_t mbPerScanline = pD3D12Enc->m_currentEncodeConfig.m_currentResolution.Width / D3D12_VIDEO_H264_MB_IN_PIXELS; bool bSliceAligned = ((picture->slices_descriptors[0].num_macroblocks % mbPerScanline) == 0); - if (!bUniformSizeSlices && - d3d12_video_encoder_check_subregion_mode_support( - pD3D12Enc, - D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_UNIFORM_PARTITIONING_SUBREGIONS_PER_FRAME)) { - - if (D3D12_VIDEO_ENC_FALLBACK_SLICE_CONFIG) { // Check if fallback mode is enabled, or we should just fail - // without support - // Not supported to have custom slice sizes in D3D12 Video Encode fallback to uniform multi-slice - debug_printf( - "[d3d12_video_encoder_h264] WARNING: Requested slice control mode is not supported: All slices must " - "have the same number of macroblocks. Falling back to encoding uniform %d slices per frame.\n", - picture->num_slice_descriptors); - requestedSlicesMode = - D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_UNIFORM_PARTITIONING_SUBREGIONS_PER_FRAME; - requestedSlicesConfig.NumberOfSlicesPerFrame = picture->num_slice_descriptors; - debug_printf("[d3d12_video_encoder_h264] Using multi slice encoding mode: " - "D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_UNIFORM_PARTITIONING_SUBREGIONS_PER_FRAME " - "with %d slices per frame.\n", - requestedSlicesConfig.NumberOfSlicesPerFrame); - } else { - debug_printf("[d3d12_video_encoder_h264] Requested slice control mode is not supported: All slices must " - "have the same number of macroblocks. To continue with uniform slices as a fallback, must " - "enable the OS environment variable D3D12_VIDEO_ENC_FALLBACK_SLICE_CONFIG"); - return false; - } - } else if (bUniformSizeSlices && bSliceAligned && + if (bUniformSizeSlices && bSliceAligned && d3d12_video_encoder_check_subregion_mode_support( pD3D12Enc, D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_UNIFORM_PARTITIONING_ROWS_PER_SUBREGION)) { @@ -469,23 +448,43 @@ d3d12_video_encoder_negotiate_current_h264_slices_configuration(struct d3d12_vid "D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_UNIFORM_PARTITIONING_SUBREGIONS_PER_FRAME " "with %d slices per frame.\n", requestedSlicesConfig.NumberOfSlicesPerFrame); - } else if (D3D12_VIDEO_ENC_FALLBACK_SLICE_CONFIG) { // Check if fallback mode is enabled, or we should just fail - // without support - // Fallback to single slice encoding (assigned by default when initializing variables requestedSlicesMode, - // requestedSlicesConfig) - debug_printf( - "[d3d12_video_encoder_h264] WARNING: Slice mode for %d slices with bUniformSizeSlices: %d - bSliceAligned " - "%d not supported by the D3D12 driver, falling back to encoding a single slice per frame.\n", - picture->num_slice_descriptors, - bUniformSizeSlices, - bSliceAligned); + } else if (bUniformSizeSlices && + d3d12_video_encoder_check_subregion_mode_support( + pD3D12Enc, + D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_SQUARE_UNITS_PER_SUBREGION_ROW_UNALIGNED)) { + requestedSlicesMode = + D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_SQUARE_UNITS_PER_SUBREGION_ROW_UNALIGNED; + requestedSlicesConfig.NumberOfCodingUnitsPerSlice = picture->slices_descriptors[0].num_macroblocks; + debug_printf("[d3d12_video_encoder_h264] Using multi slice encoding mode: " + "D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_SQUARE_UNITS_PER_SUBREGION_ROW_UNALIGNED " + "with %d NumberOfCodingUnitsPerSlice per frame.\n", + requestedSlicesConfig.NumberOfCodingUnitsPerSlice); + } else { debug_printf("[d3d12_video_encoder_h264] Requested slice control mode is not supported: All slices must " - "have the same number of macroblocks. To continue with uniform slices as a fallback, must " - "enable the OS environment variable D3D12_VIDEO_ENC_FALLBACK_SLICE_CONFIG"); + "have the same number of macroblocks.\n"); + return false; + } + } else if(picture->slice_mode == PIPE_VIDEO_SLICE_MODE_MAX_SLICE_SICE) { + if ((picture->max_slice_bytes > 0) && + d3d12_video_encoder_check_subregion_mode_support( + pD3D12Enc, + D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_BYTES_PER_SUBREGION )) { + requestedSlicesMode = + D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_BYTES_PER_SUBREGION; + requestedSlicesConfig.MaxBytesPerSlice = picture->max_slice_bytes; + debug_printf("[d3d12_video_encoder_h264] Using multi slice encoding mode: " + "D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_BYTES_PER_SUBREGION " + "with %d MaxBytesPerSlice per frame.\n", + requestedSlicesConfig.MaxBytesPerSlice); + } else { + debug_printf("[d3d12_video_encoder_h264] Requested slice control mode is not supported: All slices must " + "have the same number of macroblocks.\n"); return false; } } else { + requestedSlicesMode = D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_FULL_FRAME; + requestedSlicesConfig.NumberOfSlicesPerFrame = 1; debug_printf("[d3d12_video_encoder_h264] Requested slice control mode is full frame. m_SlicesPartition_H264.NumberOfSlicesPerFrame = %d - m_encoderSliceConfigMode = %d \n", requestedSlicesConfig.NumberOfSlicesPerFrame, requestedSlicesMode); } @@ -1016,8 +1015,12 @@ d3d12_video_encoder_update_current_encoder_config_state_h264(struct d3d12_video_ if (pD3D12Enc->m_currentEncodeCapabilities.m_MaxSlicesInOutput > pD3D12Enc->m_currentEncodeCapabilities.m_currentResolutionSupportCaps.MaxSubregionsNumber) { - debug_printf("[d3d12_video_encoder_h264] Desired number of subregions is not supported (higher than max " - "reported slice number in query caps)\n."); + debug_printf("[d3d12_video_encoder_h264] Desired number of subregions %d is not supported (higher than max " + "reported slice number %d in query caps) for current resolution (%d, %d)\n.", + pD3D12Enc->m_currentEncodeCapabilities.m_MaxSlicesInOutput, + pD3D12Enc->m_currentEncodeCapabilities.m_currentResolutionSupportCaps.MaxSubregionsNumber, + pD3D12Enc->m_currentEncodeConfig.m_currentResolution.Width, + pD3D12Enc->m_currentEncodeConfig.m_currentResolution.Height); return false; } return true; @@ -1062,7 +1065,8 @@ d3d12_video_encoder_compare_slice_config_h264_hevc( } uint32_t -d3d12_video_encoder_build_codec_headers_h264(struct d3d12_video_encoder *pD3D12Enc) +d3d12_video_encoder_build_codec_headers_h264(struct d3d12_video_encoder *pD3D12Enc, + std::vector &pWrittenCodecUnitsSizes) { D3D12_VIDEO_ENCODER_PICTURE_CONTROL_CODEC_DATA currentPicParams = d3d12_video_encoder_get_current_picture_param_settings(pD3D12Enc); @@ -1072,7 +1076,20 @@ d3d12_video_encoder_build_codec_headers_h264(struct d3d12_video_encoder *pD3D12E auto codecConfigDesc = d3d12_video_encoder_get_current_codec_config_desc(pD3D12Enc); auto MaxDPBCapacity = d3d12_video_encoder_get_current_max_dpb_capacity(pD3D12Enc); - size_t writtenSPSBytesCount = 0; + d3d12_video_bitstream_builder_h264 *pH264BitstreamBuilder = + static_cast(pD3D12Enc->m_upBitstreamBuilder.get()); + assert(pH264BitstreamBuilder); + + uint64_t writtenAUDBytesCount = 0; + pWrittenCodecUnitsSizes.clear(); + if (pH264BitstreamBuilder->insert_aud_nalu_requested()) + { + pH264BitstreamBuilder->write_aud(pD3D12Enc->m_BitstreamHeadersBuffer, + pD3D12Enc->m_BitstreamHeadersBuffer.begin(), + writtenAUDBytesCount); + pWrittenCodecUnitsSizes.push_back(writtenAUDBytesCount); + } + bool isFirstFrame = (pD3D12Enc->m_fenceValue == 1); bool writeNewSPS = isFirstFrame // on first frame || ((pD3D12Enc->m_currentEncodeConfig.m_seqFlags & // also on resolution change @@ -1080,12 +1097,9 @@ d3d12_video_encoder_build_codec_headers_h264(struct d3d12_video_encoder *pD3D12E // Also on input format dirty flag for new SPS, VUI etc || (pD3D12Enc->m_currentEncodeConfig.m_ConfigDirtyFlags & d3d12_video_encoder_config_dirty_flag_sequence_info); - d3d12_video_bitstream_builder_h264 *pH264BitstreamBuilder = - static_cast(pD3D12Enc->m_upBitstreamBuilder.get()); - assert(pH264BitstreamBuilder); - uint32_t active_seq_parameter_set_id = pH264BitstreamBuilder->get_active_sps_id(); + uint64_t writtenSPSBytesCount = 0; if (writeNewSPS) { // For every new SPS for reconfiguration, increase the active_sps_id if (!isFirstFrame) { @@ -1103,11 +1117,12 @@ d3d12_video_encoder_build_codec_headers_h264(struct d3d12_video_encoder *pD3D12E pD3D12Enc->m_currentEncodeConfig.m_currentResolution, pD3D12Enc->m_currentEncodeConfig.m_FrameCroppingCodecConfig, pD3D12Enc->m_BitstreamHeadersBuffer, - pD3D12Enc->m_BitstreamHeadersBuffer.begin(), + pD3D12Enc->m_BitstreamHeadersBuffer.begin() + writtenAUDBytesCount, writtenSPSBytesCount); + pWrittenCodecUnitsSizes.push_back(writtenSPSBytesCount); } - size_t writtenPPSBytesCount = 0; + uint64_t writtenPPSBytesCount = 0; pH264BitstreamBuilder->build_pps(*profDesc.pH264Profile, *codecConfigDesc.pH264Config, *currentPicParams.pH264PicData, @@ -1121,17 +1136,20 @@ d3d12_video_encoder_build_codec_headers_h264(struct d3d12_video_encoder *pD3D12E if ( (writtenPPSBytesCount != active_pps.size()) || memcmp(pD3D12Enc->m_StagingHeadersBuffer.data(), active_pps.data(), writtenPPSBytesCount)) { active_pps = pD3D12Enc->m_StagingHeadersBuffer; - pD3D12Enc->m_BitstreamHeadersBuffer.resize(writtenSPSBytesCount + writtenPPSBytesCount); - memcpy(&pD3D12Enc->m_BitstreamHeadersBuffer.data()[writtenSPSBytesCount], pD3D12Enc->m_StagingHeadersBuffer.data(), writtenPPSBytesCount); + pD3D12Enc->m_BitstreamHeadersBuffer.resize(writtenAUDBytesCount + writtenSPSBytesCount + writtenPPSBytesCount); + memcpy(&pD3D12Enc->m_BitstreamHeadersBuffer.data()[writtenAUDBytesCount + writtenSPSBytesCount], pD3D12Enc->m_StagingHeadersBuffer.data(), writtenPPSBytesCount); + pWrittenCodecUnitsSizes.push_back(writtenPPSBytesCount); } else { writtenPPSBytesCount = 0; debug_printf("Skipping PPS (same as active PPS) for fenceValue: %" PRIu64 "\n", pD3D12Enc->m_fenceValue); } // Shrink buffer to fit the headers - if (pD3D12Enc->m_BitstreamHeadersBuffer.size() > (writtenPPSBytesCount + writtenSPSBytesCount)) { - pD3D12Enc->m_BitstreamHeadersBuffer.resize(writtenPPSBytesCount + writtenSPSBytesCount); + if (pD3D12Enc->m_BitstreamHeadersBuffer.size() > (writtenAUDBytesCount + writtenSPSBytesCount + writtenPPSBytesCount)) { + pD3D12Enc->m_BitstreamHeadersBuffer.resize(writtenAUDBytesCount + writtenSPSBytesCount + writtenPPSBytesCount); } + assert(std::accumulate(pWrittenCodecUnitsSizes.begin(), pWrittenCodecUnitsSizes.end(), 0u) == + static_cast(pD3D12Enc->m_BitstreamHeadersBuffer.size())); return pD3D12Enc->m_BitstreamHeadersBuffer.size(); } diff --git a/src/gallium/drivers/d3d12/d3d12_video_enc_h264.h b/src/gallium/drivers/d3d12/d3d12_video_enc_h264.h index 3afc3a199e4..abd7df664a5 100644 --- a/src/gallium/drivers/d3d12/d3d12_video_enc_h264.h +++ b/src/gallium/drivers/d3d12/d3d12_video_enc_h264.h @@ -57,7 +57,8 @@ d3d12_video_encoder_update_current_frame_pic_params_info_h264(struct d3d12_video D3D12_VIDEO_ENCODER_FRAME_TYPE_H264 d3d12_video_encoder_convert_frame_type_h264(enum pipe_h2645_enc_picture_type picType); uint32_t -d3d12_video_encoder_build_codec_headers_h264(struct d3d12_video_encoder *pD3D12Enc); +d3d12_video_encoder_build_codec_headers_h264(struct d3d12_video_encoder *pD3D12Enc, + std::vector &pWrittenCodecUnitsSizes); bool d3d12_video_encoder_compare_slice_config_h264_hevc( D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE targetMode, diff --git a/src/gallium/drivers/d3d12/d3d12_video_enc_hevc.cpp b/src/gallium/drivers/d3d12/d3d12_video_enc_hevc.cpp index c20b67a1400..b225b069b07 100644 --- a/src/gallium/drivers/d3d12/d3d12_video_enc_hevc.cpp +++ b/src/gallium/drivers/d3d12/d3d12_video_enc_hevc.cpp @@ -28,6 +28,8 @@ #include "d3d12_format.h" #include +#include +#include void d3d12_video_encoder_update_current_rate_control_hevc(struct d3d12_video_encoder *pD3D12Enc, @@ -72,6 +74,7 @@ d3d12_video_encoder_update_current_rate_control_hevc(struct d3d12_video_encoder picture->rc.vbv_buf_initial_size; } + pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.max_frame_size = picture->rc.max_au_size; if (picture->rc.max_au_size > 0) { pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Flags |= D3D12_VIDEO_ENCODER_RATE_CONTROL_FLAG_ENABLE_MAX_FRAME_SIZE; @@ -152,6 +155,7 @@ d3d12_video_encoder_update_current_rate_control_hevc(struct d3d12_video_encoder picture->rc.vbv_buf_initial_size; } #endif + pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.max_frame_size = picture->rc.max_au_size; if (picture->rc.max_au_size > 0) { pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Flags |= D3D12_VIDEO_ENCODER_RATE_CONTROL_FLAG_ENABLE_MAX_FRAME_SIZE; @@ -229,6 +233,7 @@ d3d12_video_encoder_update_current_rate_control_hevc(struct d3d12_video_encoder picture->rc.vbv_buf_initial_size; } + pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.max_frame_size = picture->rc.max_au_size; if (picture->rc.max_au_size > 0) { pD3D12Enc->m_currentEncodeConfig.m_encoderRateControlDesc.m_Flags |= D3D12_VIDEO_ENCODER_RATE_CONTROL_FLAG_ENABLE_MAX_FRAME_SIZE; @@ -410,15 +415,14 @@ d3d12_video_encoder_negotiate_current_hevc_slices_configuration(struct d3d12_vid /// /// Try to see if can accomodate for multi-slice request by user /// - if (picture->num_slice_descriptors > 1) { - /* Last slice can be less for rounding frame size and leave some error for mb rounding */ - bool bUniformSizeSlices = true; - const double rounding_delta = 1.0; - for (uint32_t sliceIdx = 1; (sliceIdx < picture->num_slice_descriptors - 1) && bUniformSizeSlices; sliceIdx++) { - int64_t curSlice = picture->slices_descriptors[sliceIdx].num_ctu_in_slice; - int64_t prevSlice = picture->slices_descriptors[sliceIdx - 1].num_ctu_in_slice; - bUniformSizeSlices = bUniformSizeSlices && (std::abs(curSlice - prevSlice) <= rounding_delta); - } + if ((picture->slice_mode == PIPE_VIDEO_SLICE_MODE_BLOCKS) && (picture->num_slice_descriptors > 1)) { + /* Some apps send all same size slices minus 1 slice in any position in the descriptors */ + /* Lets validate that there are at most 2 different slice sizes in all the descriptors */ + std::vector slice_sizes(picture->num_slice_descriptors); + for (uint32_t i = 0; i < picture->num_slice_descriptors; i++) + slice_sizes[i] = picture->slices_descriptors[i].num_ctu_in_slice; + std::sort(slice_sizes.begin(), slice_sizes.end()); + bool bUniformSizeSlices = (std::unique(slice_sizes.begin(), slice_sizes.end()) - slice_sizes.begin()) <= 2; uint32_t subregion_block_pixel_size = pD3D12Enc->m_currentEncodeCapabilities.m_currentResolutionSupportCaps.SubregionBlockPixelsSize; uint32_t num_subregions_per_scanline = @@ -444,18 +448,10 @@ d3d12_video_encoder_negotiate_current_hevc_slices_configuration(struct d3d12_vid bool bSliceAligned = ((num_subregions_per_slice % num_subregions_per_scanline) == 0); - if (!bUniformSizeSlices && - d3d12_video_encoder_check_subregion_mode_support( - pD3D12Enc, - D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_UNIFORM_PARTITIONING_SUBREGIONS_PER_FRAME)) { - - if (D3D12_VIDEO_ENC_FALLBACK_SLICE_CONFIG) { // Check if fallback mode is enabled, or we should just fail - // without support - // Not supported to have custom slice sizes in D3D12 Video Encode fallback to uniform multi-slice - debug_printf( - "[d3d12_video_encoder_hevc] WARNING: Requested slice control mode is not supported: All slices must " - "have the same number of macroblocks. Falling back to encoding uniform %d slices per frame.\n", - picture->num_slice_descriptors); + if (bUniformSizeSlices && + d3d12_video_encoder_check_subregion_mode_support( + pD3D12Enc, + D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_UNIFORM_PARTITIONING_SUBREGIONS_PER_FRAME)) { requestedSlicesMode = D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_UNIFORM_PARTITIONING_SUBREGIONS_PER_FRAME; requestedSlicesConfig.NumberOfSlicesPerFrame = picture->num_slice_descriptors; @@ -463,12 +459,18 @@ d3d12_video_encoder_negotiate_current_hevc_slices_configuration(struct d3d12_vid "D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_UNIFORM_PARTITIONING_SUBREGIONS_PER_FRAME " "with %d slices per frame.\n", requestedSlicesConfig.NumberOfSlicesPerFrame); - } else { - debug_printf("[d3d12_video_encoder_hevc] Requested slice control mode is not supported: All slices must " - "have the same number of macroblocks. To continue with uniform slices as a fallback, must " - "enable the OS environment variable D3D12_VIDEO_ENC_FALLBACK_SLICE_CONFIG"); - return false; - } + } else if (bUniformSizeSlices && + d3d12_video_encoder_check_subregion_mode_support( + pD3D12Enc, + D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_SQUARE_UNITS_PER_SUBREGION_ROW_UNALIGNED)) { + requestedSlicesMode = + D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_SQUARE_UNITS_PER_SUBREGION_ROW_UNALIGNED; + requestedSlicesConfig.NumberOfCodingUnitsPerSlice = num_subregions_per_slice; + debug_printf("[d3d12_video_encoder_hevc] Using multi slice encoding mode: " + "D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_SQUARE_UNITS_PER_SUBREGION_ROW_UNALIGNED " + "with %d NumberOfCodingUnitsPerSlice per frame.\n", + requestedSlicesConfig.NumberOfCodingUnitsPerSlice); + } else if (bUniformSizeSlices && bSliceAligned && d3d12_video_encoder_check_subregion_mode_support( pD3D12Enc, @@ -483,33 +485,33 @@ d3d12_video_encoder_negotiate_current_hevc_slices_configuration(struct d3d12_vid "%d subregion block rows (%d pix scanlines) per slice.\n", requestedSlicesConfig.NumberOfRowsPerSlice, pD3D12Enc->m_currentEncodeCapabilities.m_currentResolutionSupportCaps.SubregionBlockPixelsSize); - } else if (bUniformSizeSlices && - d3d12_video_encoder_check_subregion_mode_support( - pD3D12Enc, - D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_UNIFORM_PARTITIONING_SUBREGIONS_PER_FRAME)) { - requestedSlicesMode = - D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_UNIFORM_PARTITIONING_SUBREGIONS_PER_FRAME; - requestedSlicesConfig.NumberOfSlicesPerFrame = picture->num_slice_descriptors; - debug_printf("[d3d12_video_encoder_hevc] Using multi slice encoding mode: " - "D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_UNIFORM_PARTITIONING_SUBREGIONS_PER_FRAME " - "with %d slices per frame.\n", - requestedSlicesConfig.NumberOfSlicesPerFrame); - } else if (D3D12_VIDEO_ENC_FALLBACK_SLICE_CONFIG) { // Check if fallback mode is enabled, or we should just fail - // without support - // Fallback to single slice encoding (assigned by default when initializing variables requestedSlicesMode, - // requestedSlicesConfig) - debug_printf( - "[d3d12_video_encoder_hevc] WARNING: Slice mode for %d slices with bUniformSizeSlices: %d - bSliceAligned " - "%d not supported by the D3D12 driver, falling back to encoding a single slice per frame.\n", - picture->num_slice_descriptors, - bUniformSizeSlices, - bSliceAligned); } else { debug_printf("[d3d12_video_encoder_hevc] Requested slice control mode is not supported: All slices must " - "have the same number of macroblocks. To continue with uniform slices as a fallback, must " - "enable the OS environment variable D3D12_VIDEO_ENC_FALLBACK_SLICE_CONFIG\n"); + "have the same number of macroblocks.\n"); return false; } + } else if(picture->slice_mode == PIPE_VIDEO_SLICE_MODE_MAX_SLICE_SICE) { + if ((picture->max_slice_bytes > 0) && + d3d12_video_encoder_check_subregion_mode_support( + pD3D12Enc, + D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_BYTES_PER_SUBREGION )) { + requestedSlicesMode = + D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_BYTES_PER_SUBREGION; + requestedSlicesConfig.MaxBytesPerSlice = picture->max_slice_bytes; + debug_printf("[d3d12_video_encoder_hevc] Using multi slice encoding mode: " + "D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_BYTES_PER_SUBREGION " + "with %d MaxBytesPerSlice per frame.\n", + requestedSlicesConfig.MaxBytesPerSlice); + } else { + debug_printf("[d3d12_video_encoder_hevc] Requested slice control mode is not supported: All slices must " + "have the same number of macroblocks.\n"); + return false; + } + } else { + requestedSlicesMode = D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_FULL_FRAME; + requestedSlicesConfig.NumberOfSlicesPerFrame = 1; + debug_printf("[d3d12_video_encoder_hevc] Requested slice control mode is full frame. m_SlicesPartition_H264.NumberOfSlicesPerFrame = %d - m_encoderSliceConfigMode = %d \n", + requestedSlicesConfig.NumberOfSlicesPerFrame, requestedSlicesMode); } if (!d3d12_video_encoder_isequal_slice_config_hevc( @@ -878,6 +880,9 @@ d3d12_video_encoder_update_current_encoder_config_state_hevc(struct d3d12_video_ // Will call for d3d12 driver support based on the initial requested features, then // try to fallback if any of them is not supported and return the negotiated d3d12 settings D3D12_FEATURE_DATA_VIDEO_ENCODER_SUPPORT1 capEncoderSupportData1 = {}; + // Get max number of slices per frame supported + pD3D12Enc->m_currentEncodeConfig.m_encoderSliceConfigMode = + D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_UNIFORM_PARTITIONING_SUBREGIONS_PER_FRAME; if (!d3d12_video_encoder_negotiate_requested_features_and_d3d12_driver_caps(pD3D12Enc, capEncoderSupportData1)) { debug_printf("[d3d12_video_encoder_hevc] After negotiating caps, D3D12_FEATURE_VIDEO_ENCODER_SUPPORT1 " "arguments are not supported - " @@ -887,6 +892,12 @@ d3d12_video_encoder_update_current_encoder_config_state_hevc(struct d3d12_video_ return false; } + // Set slices config (configure before calling d3d12_video_encoder_calculate_max_slices_count_in_output) + if(!d3d12_video_encoder_negotiate_current_hevc_slices_configuration(pD3D12Enc, hevcPic)) { + debug_printf("d3d12_video_encoder_negotiate_current_hevc_slices_configuration failed!\n"); + return false; + } + /// // Calculate current settings based on the returned values from the caps query // @@ -898,12 +909,6 @@ d3d12_video_encoder_update_current_encoder_config_state_hevc(struct d3d12_video_ pD3D12Enc->m_currentEncodeConfig.m_currentResolution, pD3D12Enc->m_currentEncodeCapabilities.m_currentResolutionSupportCaps.SubregionBlockPixelsSize); - // Set slices config - if(!d3d12_video_encoder_negotiate_current_hevc_slices_configuration(pD3D12Enc, hevcPic)) { - debug_printf("d3d12_video_encoder_negotiate_current_hevc_slices_configuration failed!\n"); - return false; - } - // Set GOP config if(!d3d12_video_encoder_update_hevc_gop_configuration(pD3D12Enc, hevcPic)) { debug_printf("d3d12_video_encoder_update_hevc_gop_configuration failed!\n"); @@ -956,8 +961,12 @@ d3d12_video_encoder_update_current_encoder_config_state_hevc(struct d3d12_video_ if (pD3D12Enc->m_currentEncodeCapabilities.m_MaxSlicesInOutput > pD3D12Enc->m_currentEncodeCapabilities.m_currentResolutionSupportCaps.MaxSubregionsNumber) { - debug_printf("[d3d12_video_encoder_hevc] Desired number of subregions is not supported (higher than max " - "reported slice number in query caps)\n."); + debug_printf("[d3d12_video_encoder_hevc] Desired number of subregions %d is not supported (higher than max " + "reported slice number %d in query caps) for current resolution (%d, %d)\n.", + pD3D12Enc->m_currentEncodeCapabilities.m_MaxSlicesInOutput, + pD3D12Enc->m_currentEncodeCapabilities.m_currentResolutionSupportCaps.MaxSubregionsNumber, + pD3D12Enc->m_currentEncodeConfig.m_currentResolution.Width, + pD3D12Enc->m_currentEncodeConfig.m_currentResolution.Height); return false; } return true; @@ -997,7 +1006,8 @@ d3d12_video_encoder_isequal_slice_config_hevc( } uint32_t -d3d12_video_encoder_build_codec_headers_hevc(struct d3d12_video_encoder *pD3D12Enc) +d3d12_video_encoder_build_codec_headers_hevc(struct d3d12_video_encoder *pD3D12Enc, + std::vector &pWrittenCodecUnitsSizes) { D3D12_VIDEO_ENCODER_PICTURE_CONTROL_CODEC_DATA currentPicParams = d3d12_video_encoder_get_current_picture_param_settings(pD3D12Enc); @@ -1007,8 +1017,7 @@ d3d12_video_encoder_build_codec_headers_hevc(struct d3d12_video_encoder *pD3D12E auto codecConfigDesc = d3d12_video_encoder_get_current_codec_config_desc(pD3D12Enc); auto MaxDPBCapacity = d3d12_video_encoder_get_current_max_dpb_capacity(pD3D12Enc); - size_t writtenSPSBytesCount = 0; - size_t writtenVPSBytesCount = 0; + pWrittenCodecUnitsSizes.clear(); bool isFirstFrame = (pD3D12Enc->m_fenceValue == 1); bool writeNewSPS = isFirstFrame // on first frame || ((pD3D12Enc->m_currentEncodeConfig.m_seqFlags & // also on resolution change @@ -1025,6 +1034,7 @@ d3d12_video_encoder_build_codec_headers_hevc(struct d3d12_video_encoder *pD3D12E bool writeNewVPS = isFirstFrame; + uint64_t writtenVPSBytesCount = 0; if (writeNewVPS) { bool gopHasBFrames = (pD3D12Enc->m_currentEncodeConfig.m_encoderGOPConfigDesc.m_HEVCGroupOfPictures.PPicturePeriod > 1); pHEVCBitstreamBuilder->build_vps(*profDesc.pHEVCProfile, @@ -1036,8 +1046,11 @@ d3d12_video_encoder_build_codec_headers_hevc(struct d3d12_video_encoder *pD3D12E pD3D12Enc->m_BitstreamHeadersBuffer, pD3D12Enc->m_BitstreamHeadersBuffer.begin(), writtenVPSBytesCount); + + pWrittenCodecUnitsSizes.push_back(writtenVPSBytesCount); } + uint64_t writtenSPSBytesCount = 0; if (writeNewSPS) { // For every new SPS for reconfiguration, increase the active_sps_id if (!isFirstFrame) { @@ -1058,10 +1071,11 @@ d3d12_video_encoder_build_codec_headers_hevc(struct d3d12_video_encoder *pD3D12E pD3D12Enc->m_BitstreamHeadersBuffer, pD3D12Enc->m_BitstreamHeadersBuffer.begin() + writtenVPSBytesCount, writtenSPSBytesCount); + + pWrittenCodecUnitsSizes.push_back(writtenSPSBytesCount); } size_t writtenPPSBytesCount = 0; - pHEVCBitstreamBuilder->build_pps(pHEVCBitstreamBuilder->get_latest_sps(), currentPicParams.pHEVCPicData->slice_pic_parameter_set_id, *codecConfigDesc.pHEVCConfig, @@ -1076,6 +1090,7 @@ d3d12_video_encoder_build_codec_headers_hevc(struct d3d12_video_encoder *pD3D12E active_pps = pD3D12Enc->m_StagingHeadersBuffer; pD3D12Enc->m_BitstreamHeadersBuffer.resize(writtenSPSBytesCount + writtenVPSBytesCount + writtenPPSBytesCount); memcpy(&pD3D12Enc->m_BitstreamHeadersBuffer.data()[(writtenSPSBytesCount + writtenVPSBytesCount)], pD3D12Enc->m_StagingHeadersBuffer.data(), writtenPPSBytesCount); + pWrittenCodecUnitsSizes.push_back(writtenPPSBytesCount); } else { writtenPPSBytesCount = 0; debug_printf("Skipping PPS (same as active PPS) for fenceValue: %" PRIu64 "\n", pD3D12Enc->m_fenceValue); @@ -1086,5 +1101,7 @@ d3d12_video_encoder_build_codec_headers_hevc(struct d3d12_video_encoder *pD3D12E pD3D12Enc->m_BitstreamHeadersBuffer.resize(writtenPPSBytesCount + writtenSPSBytesCount + writtenVPSBytesCount); } + assert(std::accumulate(pWrittenCodecUnitsSizes.begin(), pWrittenCodecUnitsSizes.end(), 0u) == + static_cast(pD3D12Enc->m_BitstreamHeadersBuffer.size())); return pD3D12Enc->m_BitstreamHeadersBuffer.size(); } diff --git a/src/gallium/drivers/d3d12/d3d12_video_enc_hevc.h b/src/gallium/drivers/d3d12/d3d12_video_enc_hevc.h index 14141ead439..d81536b6f00 100644 --- a/src/gallium/drivers/d3d12/d3d12_video_enc_hevc.h +++ b/src/gallium/drivers/d3d12/d3d12_video_enc_hevc.h @@ -57,7 +57,8 @@ d3d12_video_encoder_update_current_frame_pic_params_info_hevc(struct d3d12_video D3D12_VIDEO_ENCODER_FRAME_TYPE_HEVC d3d12_video_encoder_convert_frame_type_hevc(enum pipe_h2645_enc_picture_type picType); uint32_t -d3d12_video_encoder_build_codec_headers_hevc(struct d3d12_video_encoder *pD3D12Enc); +d3d12_video_encoder_build_codec_headers_hevc(struct d3d12_video_encoder *pD3D12Enc, + std::vector &pWrittenCodecUnitsSizes); bool d3d12_video_encoder_isequal_slice_config_hevc( D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE targetMode, diff --git a/src/gallium/drivers/d3d12/d3d12_video_encoder_bitstream_builder_h264.cpp b/src/gallium/drivers/d3d12/d3d12_video_encoder_bitstream_builder_h264.cpp index d53664b057a..953f1c24117 100644 --- a/src/gallium/drivers/d3d12/d3d12_video_encoder_bitstream_builder_h264.cpp +++ b/src/gallium/drivers/d3d12/d3d12_video_encoder_bitstream_builder_h264.cpp @@ -25,6 +25,10 @@ #include +d3d12_video_bitstream_builder_h264::d3d12_video_bitstream_builder_h264(bool insert_aud_nalu) + : m_insert_aud_nalu(insert_aud_nalu) +{ } + inline H264_SPEC_PROFILES Convert12ToSpecH264Profiles(D3D12_VIDEO_ENCODER_PROFILE_H264 profile12) { @@ -192,6 +196,14 @@ d3d12_video_bitstream_builder_h264::write_end_of_sequence_nalu(std::vector & headerBitstream, + std::vector::iterator placingPositionStart, + size_t & writtenBytes) +{ + m_h264Encoder.write_access_unit_delimiter_nalu(headerBitstream, placingPositionStart, writtenBytes); +} + void d3d12_video_bitstream_builder_h264::build_pps(const D3D12_VIDEO_ENCODER_PROFILE_H264 & profile, const D3D12_VIDEO_ENCODER_CODEC_CONFIGURATION_H264 & codecConfig, diff --git a/src/gallium/drivers/d3d12/d3d12_video_encoder_bitstream_builder_h264.h b/src/gallium/drivers/d3d12/d3d12_video_encoder_bitstream_builder_h264.h index 7be98e18b93..d385e395b08 100644 --- a/src/gallium/drivers/d3d12/d3d12_video_encoder_bitstream_builder_h264.h +++ b/src/gallium/drivers/d3d12/d3d12_video_encoder_bitstream_builder_h264.h @@ -31,7 +31,7 @@ class d3d12_video_bitstream_builder_h264 : public d3d12_video_bitstream_builder_ { public: - d3d12_video_bitstream_builder_h264() {}; + d3d12_video_bitstream_builder_h264(bool insert_aud_nalu = false); ~d3d12_video_bitstream_builder_h264() {}; void build_sps(const struct pipe_h264_enc_seq_param & seqData, @@ -64,6 +64,10 @@ class d3d12_video_bitstream_builder_h264 : public d3d12_video_bitstream_builder_ std::vector::iterator placingPositionStart, size_t & writtenBytes); + void write_aud(std::vector & headerBitstream, + std::vector::iterator placingPositionStart, + size_t & writtenBytes); + void print_pps(const H264_PPS &pps); void print_sps(const H264_SPS &sps); @@ -92,11 +96,14 @@ class d3d12_video_bitstream_builder_h264 : public d3d12_video_bitstream_builder_ debug_printf("[d3d12_video_bitstream_builder_h264] Setting new active PPS ID: %d ", m_activePPSIndex); }; + bool insert_aud_nalu_requested() { return m_insert_aud_nalu; } + private: d3d12_video_nalu_writer_h264 m_h264Encoder; std::vector m_activePPS; uint32_t m_activeSPSIndex = 0; uint32_t m_activePPSIndex = 0; + bool m_insert_aud_nalu = false; }; #endif diff --git a/src/gallium/drivers/d3d12/d3d12_video_encoder_nalu_writer_h264.cpp b/src/gallium/drivers/d3d12/d3d12_video_encoder_nalu_writer_h264.cpp index 5e0f218de4e..8bae97ef163 100644 --- a/src/gallium/drivers/d3d12/d3d12_video_encoder_nalu_writer_h264.cpp +++ b/src/gallium/drivers/d3d12/d3d12_video_encoder_nalu_writer_h264.cpp @@ -483,3 +483,45 @@ d3d12_video_nalu_writer_h264::write_end_of_sequence_nalu(std::vector & writtenBytes = naluByteSize; } + +void +d3d12_video_nalu_writer_h264::write_access_unit_delimiter_nalu(std::vector & headerBitstream, + std::vector::iterator placingPositionStart, + size_t & writtenBytes) +{ + d3d12_video_encoder_bitstream rbsp, nalu; + if (!rbsp.create_bitstream(8)) { + debug_printf("rbsp.create_bitstream(8) failed.\n"); + assert(false); + } + + if (!nalu.create_bitstream(2 * MAX_COMPRESSED_PPS)) { + debug_printf("nalu.create_bitstream(2 * MAX_COMPRESSED_PPS) failed.\n"); + assert(false); + } + + rbsp.set_start_code_prevention(true); + rbsp.put_bits(3, 2/*primary_pic_type*/); + rbsp_trailing(&rbsp); + rbsp.flush(); + if (wrap_rbsp_into_nalu(&nalu, &rbsp, NAL_REFIDC_NONREF, NAL_TYPE_ACCESS_UNIT_DELIMITER) <= 0u) { + + debug_printf( + "wrap_rbsp_into_nalu(&nalu, &rbsp, NAL_REFIDC_NONREF, NAL_TYPE_ACCESS_UNIT_DELIMITER) didn't write any bytes.\n"); + assert(false); + } + + // Deep copy nalu into headerBitstream, nalu gets out of scope here and its destructor frees the nalu object buffer + // memory. + uint8_t *naluBytes = nalu.get_bitstream_buffer(); + size_t naluByteSize = nalu.get_byte_count(); + + auto startDstIndex = std::distance(headerBitstream.begin(), placingPositionStart); + if (headerBitstream.size() < (startDstIndex + naluByteSize)) { + headerBitstream.resize(startDstIndex + naluByteSize); + } + + std::copy_n(&naluBytes[0], naluByteSize, &headerBitstream.data()[startDstIndex]); + + writtenBytes = naluByteSize; +} diff --git a/src/gallium/drivers/d3d12/d3d12_video_encoder_nalu_writer_h264.h b/src/gallium/drivers/d3d12/d3d12_video_encoder_nalu_writer_h264.h index a20c17b4357..c69339707a0 100644 --- a/src/gallium/drivers/d3d12/d3d12_video_encoder_nalu_writer_h264.h +++ b/src/gallium/drivers/d3d12/d3d12_video_encoder_nalu_writer_h264.h @@ -43,7 +43,7 @@ enum H264_NALU_TYPE NAL_TYPE_SEI = 6, NAL_TYPE_SPS = 7, NAL_TYPE_PPS = 8, - NAL_TYPE_ACCESS_UNIT_DEMILITER = 9, + NAL_TYPE_ACCESS_UNIT_DELIMITER = 9, NAL_TYPE_END_OF_SEQUENCE = 10, NAL_TYPE_END_OF_STREAM = 11, NAL_TYPE_FILLER_DATA = 12, @@ -185,6 +185,10 @@ class d3d12_video_nalu_writer_h264 std::vector::iterator placingPositionStart, size_t & writtenBytes); + void write_access_unit_delimiter_nalu(std::vector & headerBitstream, + std::vector::iterator placingPositionStart, + size_t & writtenBytes); + private: // Writes from structure into bitstream with RBSP trailing but WITHOUT NAL unit wrap (eg. nal_idc_type, etc) uint32_t write_sps_bytes(d3d12_video_encoder_bitstream *pBitstream, H264_SPS *pSPS); diff --git a/src/gallium/drivers/d3d12/d3d12_video_screen.cpp b/src/gallium/drivers/d3d12/d3d12_video_screen.cpp index 7776bff610a..7a1fd7ede92 100644 --- a/src/gallium/drivers/d3d12/d3d12_video_screen.cpp +++ b/src/gallium/drivers/d3d12/d3d12_video_screen.cpp @@ -515,10 +515,27 @@ d3d12_video_encode_supported_slice_structures(const D3D12_VIDEO_ENCODER_CODEC &c supportedSliceStructuresBitMask |= PIPE_VIDEO_CAP_SLICE_STRUCTURE_POWER_OF_TWO_ROWS; } - /* Needs more work in VA frontend to support VAEncMiscParameterMaxSliceSize - and the driver potentially reporting back status in VACodedBufferSegment */ + capDataSubregionLayout.SubregionMode = + D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_SQUARE_UNITS_PER_SUBREGION_ROW_UNALIGNED; + hr = pD3D12VideoDevice->CheckFeatureSupport(D3D12_FEATURE_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE, + &capDataSubregionLayout, + sizeof(capDataSubregionLayout)); + if (FAILED(hr)) { + debug_printf("CheckFeatureSupport failed with HR %x\n", hr); + } else if (capDataSubregionLayout.IsSupported) { + /* This would be setting K rows per subregions in this D3D12 mode */ + supportedSliceStructuresBitMask |= PIPE_VIDEO_CAP_SLICE_STRUCTURE_EQUAL_MULTI_ROWS; + /* Assuming height/blocksize >= max_supported_slices, which is reported + in PIPE_VIDEO_CAP_ENC_MAX_SLICES_PER_FRAME and should be checked by the client*/ + /* This would be setting 1 row per subregion in this D3D12 mode */ + supportedSliceStructuresBitMask |= PIPE_VIDEO_CAP_SLICE_STRUCTURE_EQUAL_ROWS; + /* This is ok, would be setting K rows per subregions in this D3D12 mode (and rounding the last one) */ + supportedSliceStructuresBitMask |= PIPE_VIDEO_CAP_SLICE_STRUCTURE_POWER_OF_TWO_ROWS; + /* This is ok, would be setting K MBs per subregions in this D3D12 mode*/ + supportedSliceStructuresBitMask |= PIPE_VIDEO_CAP_SLICE_STRUCTURE_ARBITRARY_MACROBLOCKS; + } - /*capDataSubregionLayout.SubregionMode = D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_BYTES_PER_SUBREGION; + capDataSubregionLayout.SubregionMode = D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_BYTES_PER_SUBREGION; hr = pD3D12VideoDevice->CheckFeatureSupport(D3D12_FEATURE_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE, &capDataSubregionLayout, sizeof(capDataSubregionLayout)); @@ -526,7 +543,7 @@ d3d12_video_encode_supported_slice_structures(const D3D12_VIDEO_ENCODER_CODEC &c debug_printf("CheckFeatureSupport failed with HR %x\n", hr); } else if (capDataSubregionLayout.IsSupported) { supportedSliceStructuresBitMask |= PIPE_VIDEO_CAP_SLICE_STRUCTURE_MAX_SLICE_SIZE; - }*/ + } return supportedSliceStructuresBitMask; } @@ -1666,6 +1683,11 @@ d3d12_screen_get_video_param_encode(struct pipe_screen *pscreen, return 1; case PIPE_VIDEO_CAP_NPOT_TEXTURES: return 1; + case PIPE_VIDEO_CAP_ENC_SUPPORTS_FEEDBACK_METADATA: + return (PIPE_VIDEO_FEEDBACK_METADATA_TYPE_BITSTREAM_SIZE | + PIPE_VIDEO_FEEDBACK_METADATA_TYPE_ENCODE_RESULT | + PIPE_VIDEO_FEEDBACK_METADATA_TYPE_CODEC_UNIT_LOCATION | + PIPE_VIDEO_FEEDBACK_METADATA_TYPE_MAX_FRAME_SIZE_OVERFLOW); case PIPE_VIDEO_CAP_MAX_WIDTH: case PIPE_VIDEO_CAP_MAX_HEIGHT: case PIPE_VIDEO_CAP_MIN_WIDTH: diff --git a/src/gallium/drivers/d3d12/d3d12_video_types.h b/src/gallium/drivers/d3d12/d3d12_video_types.h index 9a9b61c8ca1..fb319278fb7 100644 --- a/src/gallium/drivers/d3d12/d3d12_video_types.h +++ b/src/gallium/drivers/d3d12/d3d12_video_types.h @@ -67,14 +67,6 @@ GetDesc(ID3D12VideoDecoderHeap *heap) */ const bool D3D12_VIDEO_ENC_CBR_FORCE_VBV_EQUAL_BITRATE = debug_get_bool_option("D3D12_VIDEO_ENC_CBR_FORCE_VBV_EQUAL_BITRATE", false); -// Allow encoder to continue the encoding session when aa slice mode -// is requested but not supported. -// -// If setting this OS Env variable to true, the encoder will try to adjust to the closest slice -// setting available and encode using that configuration anyway -// -const bool D3D12_VIDEO_ENC_FALLBACK_SLICE_CONFIG = debug_get_bool_option("D3D12_VIDEO_ENC_FALLBACK_SLICE_CONFIG", false); - const bool D3D12_VIDEO_ENC_ASYNC = debug_get_bool_option("D3D12_VIDEO_ENC_ASYNC", true); /** @@ -179,5 +171,8 @@ d3d12_video_encoder_convert_12tusize_to_pixel_size_hevc(const D3D12_VIDEO_ENCODE DEFINE_ENUM_FLAG_OPERATORS(pipe_enc_feature); DEFINE_ENUM_FLAG_OPERATORS(pipe_h265_enc_pred_direction); +DEFINE_ENUM_FLAG_OPERATORS(codec_unit_location_flags); +DEFINE_ENUM_FLAG_OPERATORS(pipe_video_feedback_encode_result_flags); +DEFINE_ENUM_FLAG_OPERATORS(pipe_video_feedback_metadata_type); #endif