From 3e2c768aa860f96074df73cd3171960e76f5c312 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 6 Dec 2022 15:19:08 +1000 Subject: [PATCH] radv/vcn: enable dynamic dpb tier 2 for h264/h265 on navi21+ navi21 can do separate image support, using tier 2 DPB messages. This enables support for doing that in the vulkan video driver. Reviewed-by: Bas Nieuwenhuizen Part-of: --- src/amd/vulkan/radv_video.c | 109 ++++++++++++++++++++++++++++++++---- 1 file changed, 99 insertions(+), 10 deletions(-) diff --git a/src/amd/vulkan/radv_video.c b/src/amd/vulkan/radv_video.c index aa4013f2d59..c348fac3af4 100644 --- a/src/amd/vulkan/radv_video.c +++ b/src/amd/vulkan/radv_video.c @@ -213,13 +213,18 @@ radv_CreateVideoSessionKHR(VkDevice _device, } vid->interlaced = false; + vid->dpb_type = DPB_MAX_RES; switch (vid->vk.op) { case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: vid->stream_type = RDECODE_CODEC_H264_PERF; + if (device->physical_device->rad_info.family >= CHIP_NAVI21) + vid->dpb_type = DPB_DYNAMIC_TIER_2; break; case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR: vid->stream_type = RDECODE_CODEC_H265; + if (device->physical_device->rad_info.family >= CHIP_NAVI21) + vid->dpb_type = DPB_DYNAMIC_TIER_2; break; default: return VK_ERROR_FEATURE_NOT_PRESENT; @@ -227,7 +232,6 @@ radv_CreateVideoSessionKHR(VkDevice _device, vid->stream_handle = si_vid_alloc_stream_handle(); vid->dbg_frame_cnt = 0; - vid->dpb_type = DPB_MAX_RES; vid->db_alignment = (device->physical_device->rad_info.family >= CHIP_RENOIR && vid->vk.max_coded.width > 32 && (vid->stream_type == RDECODE_CODEC_H265 && @@ -334,6 +338,9 @@ radv_GetPhysicalDeviceVideoCapabilitiesKHR(VkPhysicalDevice physicalDevice, pCapabilities->maxDpbSlots = NUM_H264_REFS; pCapabilities->maxActiveReferencePictures = NUM_H264_REFS; + /* for h264 on navi21+ separate dpb images should work */ + if (pdevice->rad_info.family >= CHIP_NAVI21) + pCapabilities->flags |= VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR; ext->fieldOffsetGranularity.x = 0; ext->fieldOffsetGranularity.y = 0; ext->maxLevelIdc = 51; @@ -346,6 +353,9 @@ radv_GetPhysicalDeviceVideoCapabilitiesKHR(VkPhysicalDevice physicalDevice, vk_find_struct(pCapabilities->pNext, VIDEO_DECODE_H265_CAPABILITIES_KHR); pCapabilities->maxDpbSlots = NUM_H264_REFS; pCapabilities->maxActiveReferencePictures = NUM_H265_REFS; + /* for h265 on navi21+ separate dpb images should work */ + if (pdevice->rad_info.family >= CHIP_NAVI21) + pCapabilities->flags |= VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR; ext->maxLevelIdc = 51; strcpy(pCapabilities->stdHeaderVersion.extensionName, VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_EXTENSION_NAME); pCapabilities->stdHeaderVersion.specVersion = VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_SPEC_VERSION; @@ -625,7 +635,8 @@ static rvcn_dec_message_avc_t get_h264_msg(struct radv_video_session *vid, result.sps_info_flags |= sps->flags.mb_adaptive_frame_field_flag << 1; result.sps_info_flags |= sps->flags.frame_mbs_only_flag << 2; result.sps_info_flags |= sps->flags.delta_pic_order_always_zero_flag << 3; - result.sps_info_flags |= 1 << RDECODE_SPS_INFO_H264_EXTENSION_SUPPORT_FLAG_SHIFT; + if (vid->dpb_type != DPB_DYNAMIC_TIER_2) + result.sps_info_flags |= 1 << RDECODE_SPS_INFO_H264_EXTENSION_SUPPORT_FLAG_SHIFT; result.bit_depth_luma_minus8 = sps->bit_depth_luma_minus8; result.bit_depth_chroma_minus8 = sps->bit_depth_chroma_minus8; @@ -676,6 +687,11 @@ static rvcn_dec_message_avc_t get_h264_msg(struct radv_video_session *vid, result.frame_num = h264_pic_info->pStdPictureInfo->frame_num; result.num_ref_frames = sps->max_num_ref_frames; + result.non_existing_frame_flags = 0; + result.used_for_reference_flags = 0; + + memset(result.ref_frame_list, 0xff, sizeof(unsigned char) * 16); + memset(result.frame_num_list, 0, sizeof(unsigned int) * 16); for (unsigned i = 0; i < frame_info->referenceSlotCount; i++) { int idx = frame_info->pReferenceSlots[i].slotIndex; const struct VkVideoDecodeH264DpbSlotInfoKHR *dpb_slot = @@ -684,7 +700,23 @@ static rvcn_dec_message_avc_t get_h264_msg(struct radv_video_session *vid, result.frame_num_list[idx] = idx; result.field_order_cnt_list[idx][0] = dpb_slot->pStdReferenceInfo->PicOrderCnt[0]; result.field_order_cnt_list[idx][1] = dpb_slot->pStdReferenceInfo->PicOrderCnt[1]; + + result.ref_frame_list[idx] = idx; + + if (dpb_slot->pStdReferenceInfo->flags.top_field_flag) + result.used_for_reference_flags |= (1 << (2 * idx)); + if (dpb_slot->pStdReferenceInfo->flags.bottom_field_flag) + result.used_for_reference_flags |= (1 << (2 * idx + 1)); + else + result.used_for_reference_flags |= (3 << (2 * idx)); + + if (dpb_slot->pStdReferenceInfo->flags.used_for_long_term_reference) + result.ref_frame_list[idx] |= 0x80; + if (dpb_slot->pStdReferenceInfo->flags.is_non_existing) + result.non_existing_frame_flags |= 1 << idx; + } + result.curr_pic_ref_frame_num = frame_info->referenceSlotCount; result.decoded_pic_idx = frame_info->pSetupReferenceSlot->slotIndex; return result; @@ -841,7 +873,7 @@ static rvcn_dec_message_hevc_t get_h265_msg(struct radv_device *device, return result; } -static bool rvcn_dec_message_decode(struct radv_device *device, +static bool rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, struct radv_video_session *vid, struct radv_video_session_params *params, void *ptr, @@ -849,11 +881,14 @@ static bool rvcn_dec_message_decode(struct radv_device *device, uint32_t *slice_offset, const struct VkVideoDecodeInfoKHR *frame_info) { + struct radv_device *device = cmd_buffer->device; rvcn_dec_message_header_t *header; rvcn_dec_message_index_t *index_codec; rvcn_dec_message_decode_t *decode; + rvcn_dec_message_index_t *index_dynamic_dpb = NULL; + rvcn_dec_message_dynamic_dpb_t2_t *dynamic_dpb_t2 = NULL; void *codec; - unsigned sizes = 0, offset_decode, offset_codec; + unsigned sizes = 0, offset_decode, offset_codec, offset_dynamic_dpb; struct radv_image_view *dst_iv = radv_image_view_from_handle(frame_info->dstPictureResource.imageViewBinding); struct radv_image *img = dst_iv->image; struct radv_image_plane *luma = &img->planes[0]; @@ -867,10 +902,21 @@ static bool rvcn_dec_message_decode(struct radv_device *device, index_codec = (void *)((char *)header + sizes); sizes += sizeof(rvcn_dec_message_index_t); + if (vid->dpb_type == DPB_DYNAMIC_TIER_2) { + index_dynamic_dpb = (void *)((char *)header + sizes); + sizes += sizeof(rvcn_dec_message_index_t); + } + offset_decode = sizes; decode = (void *)((char*)header + sizes); sizes += sizeof(rvcn_dec_message_decode_t); + if (vid->dpb_type == DPB_DYNAMIC_TIER_2) { + offset_dynamic_dpb = sizes; + dynamic_dpb_t2 = (void*)((char *)header + sizes); + sizes += sizeof(rvcn_dec_message_dynamic_dpb_t2_t); + } + offset_codec = sizes; codec = (void *)((char *)header + sizes); @@ -893,6 +939,14 @@ static bool rvcn_dec_message_decode(struct radv_device *device, index_codec->filled = 0; ++header->num_buffers; + if (vid->dpb_type == DPB_DYNAMIC_TIER_2) { + index_dynamic_dpb->message_id = RDECODE_MESSAGE_DYNAMIC_DPB; + index_dynamic_dpb->offset = offset_dynamic_dpb; + index_dynamic_dpb->filled = 0; + ++header->num_buffers; + index_dynamic_dpb->size = sizeof(rvcn_dec_message_dynamic_dpb_t2_t); + } + decode->stream_type = vid->stream_type; decode->decode_flags = 0; decode->width_in_samples = dst_iv->image->vk.extent.width; @@ -915,8 +969,6 @@ static bool rvcn_dec_message_decode(struct radv_device *device, vid->vk.h265.profile_idc == STD_VIDEO_H265_PROFILE_IDC_MAIN_10)) decode->db_aligned_height = align(frame_info->dstPictureResource.codedExtent.height, 64); - decode->db_surf_tile_config = 0; - decode->dt_pitch = luma->surface.u.gfx9.surf_pitch * luma->surface.blk_w; decode->dt_uv_pitch = chroma->surface.u.gfx9.surf_pitch * chroma->surface.blk_w; @@ -965,6 +1017,39 @@ static bool rvcn_dec_message_decode(struct radv_device *device, decode->hw_ctxt_size = vid->ctx.size; + if (vid->dpb_type != DPB_DYNAMIC_TIER_2) + return true; + + uint64_t addr; + for (int i = 0; i < frame_info->referenceSlotCount; i++) { + struct radv_image_view *f_dpb_iv = radv_image_view_from_handle(frame_info->pReferenceSlots[i].pPictureResource->imageViewBinding); + struct radv_image *dpb_img = f_dpb_iv->image; + int idx = frame_info->pReferenceSlots[i].slotIndex; + + radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dpb_img->bindings[0].bo); + addr = radv_buffer_get_va(dpb_img->bindings[0].bo) + dpb_img->bindings[0].offset; + + dynamic_dpb_t2->dpbAddrLo[idx] = addr; + dynamic_dpb_t2->dpbAddrHi[idx] = addr >> 32; + ++dynamic_dpb_t2->dpbArraySize; + } + + radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dpb->bindings[0].bo); + addr = radv_buffer_get_va(dpb->bindings[0].bo); + + dynamic_dpb_t2->dpbCurrLo = addr; + dynamic_dpb_t2->dpbCurrHi = addr >> 32; + + decode->decode_flags = 1; + dynamic_dpb_t2->dpbConfigFlags = 0; + dynamic_dpb_t2->dpbLumaPitch = luma->surface.u.gfx9.surf_pitch; + dynamic_dpb_t2->dpbLumaAlignedHeight = luma->surface.u.gfx9.surf_height; + dynamic_dpb_t2->dpbLumaAlignedSize = luma->surface.u.gfx9.surf_slice_size; + + dynamic_dpb_t2->dpbChromaPitch = chroma->surface.u.gfx9.surf_pitch; + dynamic_dpb_t2->dpbChromaAlignedHeight = chroma->surface.u.gfx9.surf_height; + dynamic_dpb_t2->dpbChromaAlignedSize = chroma->surface.u.gfx9.surf_slice_size; + return true; } @@ -1509,9 +1594,13 @@ radv_vcn_decode_video(struct radv_cmd_buffer *cmd_buffer, uint32_t out_offset, fb_offset, it_offset = 0; struct radeon_winsys_bo *msg_bo, *fb_bo, *it_bo = NULL; - size += sizeof(rvcn_dec_message_header_t); - size += sizeof(rvcn_dec_message_index_t); - size += sizeof(rvcn_dec_message_decode_t); + size += sizeof(rvcn_dec_message_header_t); /* header */ + size += sizeof(rvcn_dec_message_index_t); /* codec */ + if (vid->dpb_type == DPB_DYNAMIC_TIER_2) { + size += sizeof(rvcn_dec_message_index_t); + size += sizeof(rvcn_dec_message_dynamic_dpb_t2_t); + } + size += sizeof(rvcn_dec_message_decode_t); /* decode */ switch (vid->vk.op) { case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: size += sizeof(rvcn_dec_message_avc_t); @@ -1537,7 +1626,7 @@ radv_vcn_decode_video(struct radv_cmd_buffer *cmd_buffer, msg_bo = cmd_buffer->upload.upload_bo; uint32_t slice_offset; - rvcn_dec_message_decode(cmd_buffer->device, vid, params, ptr, it_ptr, &slice_offset, frame_info); + rvcn_dec_message_decode(cmd_buffer, vid, params, ptr, it_ptr, &slice_offset, frame_info); rvcn_dec_message_feedback(fb_ptr); send_cmd(cmd_buffer, RDECODE_CMD_SESSION_CONTEXT_BUFFER, vid->sessionctx.mem->bo, vid->sessionctx.offset); send_cmd(cmd_buffer, RDECODE_CMD_MSG_BUFFER, msg_bo, out_offset);