diff --git a/.github/workflows/build_libraries.yml b/.github/workflows/build_libraries.yml index 6bdb4bf77..6cb7517b1 100644 --- a/.github/workflows/build_libraries.yml +++ b/.github/workflows/build_libraries.yml @@ -31,7 +31,8 @@ jobs: apt-get update -qq && apt-get install -y build-essential g++ glslang-tools \ python3 python3-pip libglfw3-dev libvulkan-dev locales wget pkg-config \ - protobuf-compiler libprotoc-dev libopencv-dev + protobuf-compiler libprotoc-dev libopencv-dev \ + libavcodec-dev libavformat-dev libavutil-dev python3 -m pip install --upgrade pip python3 -m pip install cmake future==1.0.0 pytz==2022.1 numpy==1.23.0 \ google==3.0.0 protobuf==3.12.4 @@ -48,6 +49,7 @@ jobs: rocsolver-dev hipsolver-dev \ rocfft-dev hipfft-dev \ rocalution-dev \ + rocdecode-dev \ rocjpeg-dev \ rocsparse-dev \ rocthrust-dev \ diff --git a/Common/rocdecode_utils.hpp b/Common/rocdecode_utils.hpp new file mode 100644 index 000000000..7aeb7a5dc --- /dev/null +++ b/Common/rocdecode_utils.hpp @@ -0,0 +1,110 @@ +/* +Copyright (c) 2023 - 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef COMMON_ROCDECODE_UTILS_HPP +#define COMMON_ROCDECODE_UTILS_HPP + +#include "example_utils.hpp" + +#include +#include +#include +#include + +// Include rocDecode headers for type definitions +#include "rocvideodecode/roc_video_dec.h" +#include "md5.h" + +typedef enum reconfigure_flush_mode_enum { + RECONFIG_FLUSH_MODE_NONE = 0x0, /**< Just flush to get the frame count */ + RECONFIG_FLUSH_MODE_DUMP_TO_FILE = 0x1, /**< The remaining frames will be dumped to file in this mode */ + RECONFIG_FLUSH_MODE_CALCULATE_MD5 = (0x1 << 1), /**< Calculate the MD5 of the flushed frames */ +} reconfigure_flush_mode; + +// This struct is used by sample apps to dump last frames to file +typedef struct reconfig_dump_file_struct_t { + bool b_dump_frames_to_file; + std::string output_file_name; + void *md5_generator_handle; +} reconfig_dump_file_struct; + +// Callback function to flush last frames and save it to file when reconfigure happens +inline int reconfigure_flush_callback(void *p_viddec_obj, uint32_t flush_mode, void *p_user_struct) +{ + int n_frames_flushed = 0; + if ((p_viddec_obj == nullptr) || (p_user_struct == nullptr)) + { + return n_frames_flushed; + } + + RocVideoDecoder *viddec = static_cast(p_viddec_obj); + OutputSurfaceInfo *surf_info; + if (!viddec->GetOutputSurfaceInfo(&surf_info)) + { + std::cerr << "Error: Failed to get Output Surface Info!" << std::endl; + return n_frames_flushed; + } + + uint8_t *pframe = nullptr; + int64_t pts; + while ((pframe = viddec->GetFrame(&pts))) + { + if (flush_mode != RECONFIG_FLUSH_MODE_NONE) + { + reconfig_dump_file_struct *p_dump_file_struct = static_cast(p_user_struct); + if (flush_mode & reconfigure_flush_mode::RECONFIG_FLUSH_MODE_DUMP_TO_FILE) + { + if (p_dump_file_struct->b_dump_frames_to_file) + { + viddec->SaveFrameToFile(p_dump_file_struct->output_file_name, pframe, surf_info); + } + } + if (flush_mode & reconfigure_flush_mode::RECONFIG_FLUSH_MODE_CALCULATE_MD5) + { + MD5Generator *md5_generator = static_cast(p_dump_file_struct->md5_generator_handle); + md5_generator->UpdateMd5ForFrame(pframe, surf_info); + } + } + // release and flush frame + viddec->ReleaseFrame(pts, true); + n_frames_flushed++; + } + + return n_frames_flushed; +} + +inline int get_env_var(const char *name, int &dev_count) +{ + char *v = std::getenv(name); + if (v) + { + char* p_tkn = std::strtok(v, ","); + while (p_tkn != nullptr) + { + dev_count++; + p_tkn = strtok(nullptr, ","); + } + } + return dev_count; +} + +#endif // COMMON_ROCDECODE_UTILS_HPP diff --git a/Libraries/CMakeLists.txt b/Libraries/CMakeLists.txt index efc9ae5c0..c772cb693 100644 --- a/Libraries/CMakeLists.txt +++ b/Libraries/CMakeLists.txt @@ -55,6 +55,7 @@ if( add_subdirectory(rocALUTION) add_subdirectory(rocBLAS) add_subdirectory(rocCV) + add_subdirectory(rocDecode) add_subdirectory(rocJPEG) add_subdirectory(rocFFT) add_subdirectory(rocPRIM) diff --git a/Libraries/Makefile b/Libraries/Makefile index c029d40fe..d13f1f1ea 100644 --- a/Libraries/Makefile +++ b/Libraries/Makefile @@ -40,6 +40,7 @@ LIBRARIES += \ rocALUTION \ rocBLAS \ rocCV \ + rocDecode \ rocFFT \ rocJPEG \ rocPRIM \ diff --git a/Libraries/rocDecode/CMakeLists.txt b/Libraries/rocDecode/CMakeLists.txt new file mode 100644 index 000000000..8f3d1f9e0 --- /dev/null +++ b/Libraries/rocDecode/CMakeLists.txt @@ -0,0 +1,58 @@ +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +cmake_minimum_required(VERSION 3.21 FATAL_ERROR) +project(rocDecode_examples LANGUAGES CXX) +include(CTest) + +file(RELATIVE_PATH folder_bin ${CMAKE_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin/${folder_bin}) + +if(CMAKE_SYSTEM_NAME MATCHES "Windows") + message(STATUS "rocDecode examples are only available on Linux") + return() +else() + set(ROCM_ROOT + "/opt/rocm" + CACHE PATH + "Root directory of the ROCm installation" + ) +endif() + +list(APPEND CMAKE_PREFIX_PATH "${ROCM_ROOT}") + +find_package(rocdecode REQUIRED) +if(NOT rocdecode_FOUND) + message(STATUS "rocDecode could not be found, not building rocDecode examples") + return() +endif() + +add_subdirectory(rocdec_decode) +add_subdirectory(video_decode) +add_subdirectory(video_decode_batch) +add_subdirectory(video_decode_mem) +add_subdirectory(video_decode_multi_files) +add_subdirectory(video_decode_perf) +add_subdirectory(video_decode_pic_files) +add_subdirectory(video_decode_raw) +add_subdirectory(video_decode_rgb) +add_subdirectory(video_to_sequence) diff --git a/Libraries/rocDecode/Makefile b/Libraries/rocDecode/Makefile new file mode 100644 index 000000000..4672a0407 --- /dev/null +++ b/Libraries/rocDecode/Makefile @@ -0,0 +1,43 @@ +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +EXAMPLES := \ + rocdec_decode \ + video_decode \ + video_decode_batch \ + video_decode_mem \ + video_decode_multi_files \ + video_decode_perf \ + video_decode_pic_files \ + video_decode_raw \ + video_decode_rgb \ + video_to_sequence + +all: $(EXAMPLES) + +clean: TARGET=clean +clean: all + +$(EXAMPLES): + $(MAKE) -C $@ $(TARGET) + +.PHONY: all clean $(EXAMPLES) diff --git a/Libraries/rocDecode/README.md b/Libraries/rocDecode/README.md new file mode 100644 index 000000000..935f692fc --- /dev/null +++ b/Libraries/rocDecode/README.md @@ -0,0 +1,42 @@ +# rocDecode Examples + +## Summary + +The examples in this subdirectory showcase the functionality of the [rocDecode](https://github.com/ROCm/rocDecode) library. rocDecode is AMD's high-performance video decode SDK for AMD GPUs, providing hardware-accelerated video decoding capabilities. The examples demonstrate various use cases including basic video decoding, batch processing, color space conversion, and performance optimization. The examples build only on Linux for the ROCm (AMD GPU) backend. + +## Prerequisites + +### Linux + +- [CMake](https://cmake.org/download/) (at least version 3.21) +- Or GNU Make - available via the distribution's package manager +- [ROCm](https://rocm.docs.amd.com/projects/HIP/en/latest/install/install.html) (at least version 6.0) +- [rocDecode](https://github.com/ROCm/rocDecode): `rocdecode` and `rocdecode-dev` packages available from [repo.radeon.com](https://repo.radeon.com/rocm/). The repository is added during the standard ROCm [install procedure](https://rocm.docs.amd.com/projects/HIP/en/latest/install/install.html) +- [FFMPEG](https://ffmpeg.org/about.html) development libraries: + - On Ubuntu: `sudo apt install libavcodec-dev libavformat-dev libavutil-dev` + - On RHEL/SLES: Install FFMPEG development packages manually or use the [rocDecode-setup.py](https://github.com/ROCm/rocDecode/blob/develop/rocDecode-setup.py) script + +### Windows + +Support for Windows will be included in the future. + +## Building + +### Linux + +Ensure the dependencies are installed, or use the [provided Dockerfiles](../../Dockerfiles/) to build and run the examples in a containerized environment that has all prerequisites installed. + +#### Using CMake + +All examples in the `rocDecode` subdirectory can either be built by a single CMake project or be built independently. + +- `$ cd Libraries/rocDecode` +- `$ cmake -S . -B build` +- `$ cmake --build build` + +#### Using Make + +All examples can be built by a single invocation to Make or be built independently. + +- `$ cd Libraries/rocDecode` +- `$ make` diff --git a/Libraries/rocDecode/rocdec_decode/.gitignore b/Libraries/rocDecode/rocdec_decode/.gitignore new file mode 100644 index 000000000..8b2794d57 --- /dev/null +++ b/Libraries/rocDecode/rocdec_decode/.gitignore @@ -0,0 +1 @@ +rocdecode_rocdec_decode diff --git a/Libraries/rocDecode/rocdec_decode/CMakeLists.txt b/Libraries/rocDecode/rocdec_decode/CMakeLists.txt new file mode 100644 index 000000000..03903038e --- /dev/null +++ b/Libraries/rocDecode/rocdec_decode/CMakeLists.txt @@ -0,0 +1,79 @@ +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +set(example_name rocdecode_rocdec_decode) + +cmake_minimum_required(VERSION 3.21 FATAL_ERROR) +project(${example_name} LANGUAGES CXX) + +include("../../../Common/HipPlatform.cmake") +select_gpu_language() + +enable_language(${ROCM_EXAMPLES_GPU_LANGUAGE}) +set(CMAKE_${ROCM_EXAMPLES_GPU_LANGUAGE}_STANDARD 17) +set(CMAKE_${ROCM_EXAMPLES_GPU_LANGUAGE}_EXTENSIONS OFF) +set(CMAKE_${ROCM_EXAMPLES_GPU_LANGUAGE}_STANDARD_REQUIRED ON) +select_hip_platform() +verify_hip_platform(PLATFORMS "amd") + +if(CMAKE_SYSTEM_NAME MATCHES "Windows") + message(STATUS "rocDecode examples are only available on Linux") + return() +else() + set(ROCM_ROOT + "/opt/rocm" + CACHE PATH + "Root directory of the ROCm installation" + ) +endif() + +list(APPEND CMAKE_PREFIX_PATH "${ROCM_ROOT}") + +find_package(rocdecode REQUIRED) + +# Try to find the host library directly (handles both naming conventions) +find_library(ROCDECODE_HOST_LIB + NAMES rocdecode-host rocdecodehost + PATHS ${ROCM_ROOT}/lib + NO_DEFAULT_PATH +) + +add_executable(${example_name} main.cpp) + +target_link_libraries(${example_name} PRIVATE rocdecode::rocdecode) + +# Link host library if found +if(ROCDECODE_HOST_LIB) + target_link_libraries(${example_name} PRIVATE ${ROCDECODE_HOST_LIB}) + target_compile_definitions(${example_name} PRIVATE ENABLE_HOST_DECODE=1) +else() + target_compile_definitions(${example_name} PRIVATE ENABLE_HOST_DECODE=0) +endif() + +target_include_directories( + ${example_name} + PRIVATE "../../../Common" "../../../External" +) + +set_source_files_properties(main.cpp PROPERTIES LANGUAGE ${ROCM_EXAMPLES_GPU_LANGUAGE}) + +install(TARGETS ${example_name}) diff --git a/Libraries/rocDecode/rocdec_decode/Makefile b/Libraries/rocDecode/rocdec_decode/Makefile new file mode 100644 index 000000000..e27497556 --- /dev/null +++ b/Libraries/rocDecode/rocdec_decode/Makefile @@ -0,0 +1,78 @@ +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +EXAMPLE := rocdecode_rocdec_decode +COMMON_INCLUDE_DIR := ../../../Common +EXTERNAL_DIR := ../../../External +GPU_RUNTIME := HIP + +# HIP variables +ROCM_INSTALL_DIR := /opt/rocm + +HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include +ROCDECODE_INCLUDE_DIR := $(HIP_INCLUDE_DIR) + +HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc + +# Detect which rocdecode host library is available +ROCDECODE_HOST_LIB := $(shell if [ -f $(ROCM_INSTALL_DIR)/lib/librocdecode-host.so ]; then \ + echo "rocdecode-host"; \ + elif [ -f $(ROCM_INSTALL_DIR)/lib/librocdecodehost.so ]; then \ + echo "rocdecodehost"; \ + fi) + +# Common variables and flags +CXX_STD := c++17 +ICXXFLAGS := -std=$(CXX_STD) +ICPPFLAGS := -isystem $(ROCDECODE_INCLUDE_DIR) -I $(COMMON_INCLUDE_DIR) -I $(EXTERNAL_DIR) +ILDFLAGS := -L $(ROCM_INSTALL_DIR)/lib +ILDLIBS := -lrocdecode + +ifeq ($(GPU_RUNTIME), HIP) + CXXFLAGS ?= -Wall -Wextra + CPPFLAGS += -D__HIP_PLATFORM_AMD__ + + # Add host decode library and flag if available + ifneq ($(ROCDECODE_HOST_LIB),) + ILDLIBS += -l$(ROCDECODE_HOST_LIB) + CPPFLAGS += -DENABLE_HOST_DECODE=1 + else + CPPFLAGS += -DENABLE_HOST_DECODE=0 + endif + + COMPILER := $(HIPCXX) +else + $(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be HIP; CUDA not supported) +endif + +ICXXFLAGS += $(CXXFLAGS) +ICPPFLAGS += $(CPPFLAGS) +ILDFLAGS += $(LDFLAGS) +ILDLIBS += $(LDLIBS) + +$(EXAMPLE): main.cpp $(COMMON_INCLUDE_DIR)/example_utils.hpp $(COMMON_INCLUDE_DIR)/rocdecode_utils.hpp + $(COMPILER) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $< $(ILDLIBS) + +clean: + $(RM) $(EXAMPLE) + +.PHONY: clean diff --git a/Libraries/rocDecode/rocdec_decode/README.md b/Libraries/rocDecode/rocdec_decode/README.md new file mode 100644 index 000000000..99aceed0c --- /dev/null +++ b/Libraries/rocDecode/rocdec_decode/README.md @@ -0,0 +1,105 @@ +# rocDecode Low-Level API Example + +## Description + +This example demonstrates the use of low-level rocDecode APIs for hardware-accelerated video decoding on AMD GPUs. It showcases both device-based and host-based decoding backends, providing direct control over the decoder initialization, frame decoding, and output retrieval. This sample is ideal for understanding the fundamental rocDecode API workflow without high-level wrapper abstractions. + +## Application Flow + +1. Parse command-line arguments for input file, device ID, and backend selection. +2. Initialize the video demuxer to extract codec information and video packets. +3. Set up the video parser with callback functions for sequence, decode, and display events. +4. Create the decoder instance based on the selected backend (device or host). +5. Configure decoder parameters including output surface format and dimensions. +6. Loop through video packets: + - Parse video data using the video parser. + - Decode frames through parser callbacks. + - Retrieve decoded frames via display callbacks. +7. Extract decoded frame data to host or device memory based on backend. +8. Optionally save decoded frames to output file. +9. Clean up parser and decoder resources. + +## Key APIs and Concepts + +- **Decoder Initialization**: The rocDecode decoder is initialized using either `rocDecCreateDecoder()` for device-based decoding or `rocDecCreateDecoderHost()` for host-based decoding. The decoder configuration includes codec type, output surface format, dimensions, and number of decode surfaces. + +- **Video Parser**: + - `rocDecCreateVideoParser()`: Creates a parser instance that handles bitstream parsing and triggers callbacks for sequence changes, picture decode, and picture display events. + - `rocDecParseVideoData()`: Parses video packet data and invokes registered callbacks to drive the decode process. + - `rocDecDestroyVideoParser()`: Releases parser resources. + +- **Frame Decoding**: + - `rocDecDecodeFrame()` / `rocDecDecodeFrameHost()`: Decodes a single frame using the provided picture parameters. Called from the picture decode callback. + - Picture parameters include current picture index, bitstream data, and decode-specific information. + +- **Frame Retrieval**: + - `rocDecGetVideoFrame()`: Retrieves decoded frame from device memory (device backend). + - `rocDecGetVideoFrameHost()`: Retrieves decoded frame to host memory (host backend). + - Both functions provide frame data, pitch information, and surface parameters. + +- **Decoder Cleanup**: + - `rocDecDestroyDecoder()`: Destroys device-based decoder instance. + - `rocDecDestroyDecoderHost()`: Destroys host-based decoder instance. + +- **Callback Functions**: The parser uses three callback functions: + - **Sequence Callback**: Invoked when video sequence parameters are detected, used to initialize or reconfigure the decoder. + - **Picture Decode Callback**: Called when a picture is ready to be decoded, triggers `rocDecDecodeFrame()`. + - **Picture Display Callback**: Invoked when a decoded frame is ready for display, retrieves frame data using `rocDecGetVideoFrame()`. + +## Demonstrated API Calls + +### rocDecode APIs + +- `rocDecCreateDecoder` +- `rocDecCreateDecoderHost` +- `rocDecDecodeFrame` +- `rocDecDecodeFrameHost` +- `rocDecGetVideoFrame` +- `rocDecGetVideoFrameHost` +- `rocDecDestroyDecoder` +- `rocDecDestroyDecoderHost` +- `rocDecCreateVideoParser` +- `rocDecParseVideoData` +- `rocDecDestroyVideoParser` +- `rocDecGetErrorName` + +### HIP Runtime APIs + +- `hipGetDeviceCount` +- `hipSetDevice` +- `hipGetDeviceProperties` +- `hipMalloc` +- `hipFree` +- `hipMemcpy` +- `hipMemcpyDtoH` + +### FFMPEG APIs + +- `avformat_open_input` +- `avformat_find_stream_info` +- `av_find_best_stream` +- `av_read_frame` +- `av_packet_alloc` +- `av_packet_free` +- `av_packet_unref` +- `avformat_close_input` +- `av_bsf_get_by_name` +- `av_bsf_alloc` +- `av_bsf_init` +- `av_bsf_send_packet` +- `av_bsf_receive_packet` +- `av_bsf_free` + +### Data Types and Enums + +- `rocDecDecoderHandle` +- `RocdecVideoParser` +- `rocDecVideoCodec` +- `rocDecVideoSurfaceFormat` +- `rocDecVideoChromaFormat` +- `rocDecDecoderCreateInfo` +- `RocdecParserParams` +- `RocdecVideoFormat` +- `RocdecPicParams` +- `RocdecParserDispInfo` +- `rocDecDecodeStatus` diff --git a/Libraries/rocDecode/rocdec_decode/main.cpp b/Libraries/rocDecode/rocdec_decode/main.cpp new file mode 100644 index 000000000..c7c4e4491 --- /dev/null +++ b/Libraries/rocDecode/rocdec_decode/main.cpp @@ -0,0 +1,796 @@ +/* +Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "example_utils.hpp" +#include "CmdParser/cmdparser.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace fs = std::filesystem; + + +__attribute__((visibility("hidden"))) inline bool is_error(rocDecStatus status) +{ + return status != ROCDEC_SUCCESS; +} + +__attribute__((visibility("hidden"))) inline const char* error_string(rocDecStatus status) +{ + return rocDecGetErrorName(status); +} + +struct rect +{ + int left; + int top; + int right; + int bottom; +}; + +template +__attribute__((visibility("hidden"))) inline void report_error( + Status status, const char* function_name, const char* file_name, int line, Args&&... args) +{ + ((std::cerr << "ERROR: " << error_string(status) << "; " << function_name << "; " + << file_name << ":" << line) + << ... << std::forward(args)) + << std::endl; + std::abort(); +} + +//hardcoding for this sample +#define DEFAULT_WIDTH 2912 +#define DEFAULT_HEIGHT 1888 + +// helper functions for saving output to file + +static inline float get_chroma_height_factor(rocDecVideoSurfaceFormat surface_format) +{ + float factor = 0.5; + switch (surface_format) + { + case rocDecVideoSurfaceFormat_NV12: + case rocDecVideoSurfaceFormat_P016: + case rocDecVideoSurfaceFormat_YUV420: + case rocDecVideoSurfaceFormat_YUV420_16Bit: + factor = 0.5; + break; + case rocDecVideoSurfaceFormat_YUV422: + case rocDecVideoSurfaceFormat_YUV422_16Bit: + case rocDecVideoSurfaceFormat_YUV444: + case rocDecVideoSurfaceFormat_YUV444_16Bit: + factor = 1.0; + break; + } + + return factor; +} + +static inline rocDecVideoCodec codec_type_to_roc_dec_video_codec(int codec_type) +{ + switch (codec_type) + { + case 0: return rocDecVideoCodec_HEVC; + case 1: return rocDecVideoCodec_AVC; + case 2: return rocDecVideoCodec_AV1; + case 3: return rocDecVideoCodec_VP9; + case 4: return rocDecVideoCodec_VP8; + case 5: return rocDecVideoCodec_JPEG; + default: return rocDecVideoCodec_NumCodecs; + } +} + +static inline float get_chroma_width_factor(rocDecVideoSurfaceFormat surface_format) +{ + float factor = 0.5; + switch (surface_format) + { + case rocDecVideoSurfaceFormat_NV12: + case rocDecVideoSurfaceFormat_P016: + case rocDecVideoSurfaceFormat_YUV444: + case rocDecVideoSurfaceFormat_YUV444_16Bit: + factor = 1.0; + break; + case rocDecVideoSurfaceFormat_YUV420: + case rocDecVideoSurfaceFormat_YUV420_16Bit: + case rocDecVideoSurfaceFormat_YUV422: + case rocDecVideoSurfaceFormat_YUV422_16Bit: + factor = 0.5; + break; + } + return factor; +} + +// only 2 types of memory mode is supported in this sample for simplicity. +typedef enum output_surface_memory_type_enum +{ + OUT_SURFACE_MEM_DEV_INTERNAL = 0, /**< Internal interopped decoded surface memory(original mapped decoded surface) */ + OUT_SURFACE_MEM_HOST = 2, /**< decoded output will be in host memory (true for host based decoding) **/ +} output_surface_memory_type; + +// Enum for decoder backend +typedef enum decoder_backend_enum +{ + DECODER_BACKEND_DEVICE = 0, /**< Decoding using VCN hardware in the device specified by user */ + DECODER_BACKEND_HOST = 1, /**< decoded using host and ffmpeg avcodec **/ +} decoder_backend; + +#define CHECK(callable, ...) \ + do \ + { \ + auto status__ = callable; /* invoke the callable and assign the return status */ \ + if (is_error(status__)) \ + { \ + report_error(status__, __FUNCTION__, __FILE__, __LINE__, ##__VA_ARGS__); \ + } \ + } while (false) + +/** + * @brief Struct containing all the information for decoding and displaying output + * + */ +struct decoder_info +{ + int dec_device_id; + decoder_backend backend; //0: device, 1: host + rocDecDecoderHandle decoder; + RocdecVideoParser parser; + std::uint32_t bit_depth; + rocDecVideoCodec rocdec_codec_id; + int dump_decoded_frames; + std::string output_file_path; + output_surface_memory_type mem_type; + rocDecVideoSurfaceFormat surf_format; + rocDecVideoSurfaceFormat video_chroma_format; + uint32_t coded_width, coded_height; + uint32_t bytes_per_pixel; + bool is_decoder_reconfigured; + rect disp_rect; + FILE *fp_out; + decoder_info() : dec_device_id(0), backend(DECODER_BACKEND_DEVICE), decoder(nullptr), bit_depth(8), dump_decoded_frames(0), mem_type{OUT_SURFACE_MEM_DEV_INTERNAL}, + surf_format{rocDecVideoSurfaceFormat_NV12}, video_chroma_format{rocDecVideoSurfaceFormat_NV12}, + is_decoder_reconfigured{false}, fp_out{nullptr} {} +}; + +/** + * @brief Funtion to save internal frame buffer to file for device buffer : chroma format is assumed to be NV12 for internal device memory + * + * @param p_dec_info + * @param surf_mem device mem pointers of luma and chroma planes + * @param pitch stride in bytes of luma and chroma planes + */ +void save_frame_to_file(decoder_info *p_dec_info, void *surf_mem[], uint32_t *pitch) +{ + uint8_t *hst_ptr = nullptr; + uint64_t output_image_size_luma = pitch[0] * p_dec_info->coded_height; + uint64_t output_image_size_chroma = pitch[1] * ((p_dec_info->coded_height * get_chroma_height_factor(p_dec_info->surf_format))); + if (p_dec_info->mem_type == OUT_SURFACE_MEM_DEV_INTERNAL) + { + if (hst_ptr == nullptr) + { + hst_ptr = new uint8_t [output_image_size_luma + output_image_size_chroma]; + } + hipError_t hip_status = hipSuccess; + // copy luma + hip_status = hipMemcpyDtoH((void *)hst_ptr, surf_mem[0], output_image_size_luma); + if (hip_status != hipSuccess) + { + std::cerr << "ERROR: hipMemcpyDtoH failed for luma! (" << hipGetErrorName(hip_status) << ")" << std::endl; + delete [] hst_ptr; + return; + } + hip_status = hipMemcpyDtoH((void *)(hst_ptr + output_image_size_luma), surf_mem[1], output_image_size_chroma); + if (hip_status != hipSuccess) + { + std::cerr << "ERROR: hipMemcpyDtoH failed for chroma! (" << hipGetErrorName(hip_status) << ")" << std::endl; + delete [] hst_ptr; + return; + } + } + else + { + hst_ptr = static_cast (surf_mem[0]); + } + + if (p_dec_info->is_decoder_reconfigured) + { + if (p_dec_info->fp_out) + { + fclose(p_dec_info->fp_out); + p_dec_info->fp_out = nullptr; + } + p_dec_info->is_decoder_reconfigured = false; + } + + if (p_dec_info->fp_out == nullptr && !p_dec_info->output_file_path.empty()) + { + p_dec_info->fp_out = fopen(p_dec_info->output_file_path.c_str(), "wb"); + } + + if (p_dec_info->fp_out) + { + uint8_t *tmp_hst_ptr = hst_ptr; + if (p_dec_info->mem_type == OUT_SURFACE_MEM_DEV_INTERNAL) + { + tmp_hst_ptr += (p_dec_info->disp_rect.top * pitch[0]) + (p_dec_info->disp_rect.left * p_dec_info->bytes_per_pixel); + } + int img_width = p_dec_info->disp_rect.right - p_dec_info->disp_rect.left; + int img_height = p_dec_info->disp_rect.bottom - p_dec_info->disp_rect.top; + uint32_t output_stride = pitch[0]; + if ((img_width * p_dec_info->bytes_per_pixel) == output_stride) + { + fwrite(tmp_hst_ptr, 1, output_image_size_luma, p_dec_info->fp_out); + tmp_hst_ptr += output_image_size_luma; + fwrite(tmp_hst_ptr, 1, output_image_size_chroma, p_dec_info->fp_out); + } + else + { + uint32_t width = img_width * p_dec_info->bytes_per_pixel; + if (p_dec_info->bit_depth <= 16) + { + for (int i = 0; i < img_height; i++) + { + fwrite(tmp_hst_ptr, 1, width, p_dec_info->fp_out); + tmp_hst_ptr += output_stride; + } + // dump chroma + uint8_t *uv_hst_ptr = hst_ptr + output_image_size_luma; + uint32_t chroma_height = static_cast(get_chroma_height_factor(p_dec_info->surf_format) * img_height); + if (p_dec_info->mem_type == OUT_SURFACE_MEM_DEV_INTERNAL) + { + uv_hst_ptr += ((p_dec_info->disp_rect.top >> 1) * output_stride) + (p_dec_info->disp_rect.left * p_dec_info->bytes_per_pixel); + } + for (uint32_t i = 0; i < chroma_height; i++) + { + fwrite(uv_hst_ptr, 1, width, p_dec_info->fp_out); + uv_hst_ptr += pitch[1]; + } + } + } + } + + if (hst_ptr != nullptr) + { + delete [] hst_ptr; + } +} + +/** + * @brief Funtion to save internal frame buffer to file for host buffer + * + * @param p_dec_info + * @param frame_mem + * @param pitch + */ +void save_frame_to_file_host(decoder_info *p_dec_info, void *frame_mem[], uint32_t *pitch) +{ + if (p_dec_info->is_decoder_reconfigured) + { + if (p_dec_info->fp_out) + { + fclose(p_dec_info->fp_out); + p_dec_info->fp_out = nullptr; + } + p_dec_info->is_decoder_reconfigured = false; + } + + if (p_dec_info->fp_out == nullptr && !p_dec_info->output_file_path.empty()) + { + p_dec_info->fp_out = fopen(p_dec_info->output_file_path.c_str(), "wb"); + } + + if (p_dec_info->fp_out) + { + uint8_t *p_src_ptr_y = static_cast(frame_mem[0]) + (p_dec_info->disp_rect.top * pitch[0] + p_dec_info->disp_rect.left * p_dec_info->bytes_per_pixel); + if (!p_src_ptr_y) + { + std::cerr << "save_frame_to_file_host: Invalid Memory address for src/dst" << std::endl; + return; + } + int img_width = p_dec_info->disp_rect.right - p_dec_info->disp_rect.left; + int img_height = p_dec_info->disp_rect.bottom - p_dec_info->disp_rect.top; + int output_stride = pitch[0]; + + uint32_t width = img_width * p_dec_info->bytes_per_pixel; + if (p_dec_info->bit_depth <= 16) + { + for (int i = 0; i < img_height; i++) + { + fwrite(p_src_ptr_y, 1, width, p_dec_info->fp_out); + p_src_ptr_y += output_stride; + } + // dump chroma + uint8_t *p_src_ptr_uv = static_cast(frame_mem[1]) + ((p_dec_info->disp_rect.top >> 1) * pitch[1] + (p_dec_info->disp_rect.left >> 1) * p_dec_info->bytes_per_pixel); + int32_t chroma_height = static_cast(get_chroma_height_factor(p_dec_info->surf_format) * img_height); + int32_t chroma_width = static_cast(get_chroma_width_factor(p_dec_info->surf_format) * img_width) * p_dec_info->bytes_per_pixel; + for (int32_t i = 0; i < chroma_height; i++) + { + fwrite(p_src_ptr_uv, 1, chroma_width, p_dec_info->fp_out); + p_src_ptr_uv += pitch[1]; + } + if (frame_mem[2] != nullptr) + { + uint8_t *p_src_ptr_v = static_cast(frame_mem[2]) + p_dec_info->disp_rect.top * pitch[2] + (p_dec_info->disp_rect.left >> 1) * p_dec_info->bytes_per_pixel; + for (int32_t i = 0; i < chroma_height; i++) + { + fwrite(p_src_ptr_v, 1, chroma_width, p_dec_info->fp_out); + p_src_ptr_v += pitch[2]; + } + } + } + } +} + +std::vector> read_frames(std::vector& names) +{ + std::vector> frames; + // sort the frames file so it is consecutive + for (std::string name : names) + { + std::ifstream input_file(name.c_str(), std::ios::binary); + if (!input_file) + { + std::cerr << "Error opening " << name << " for reading." << std::endl; + std::abort(); + } + std::cout << "Reading " << name << " for reading." << std::endl; + // Determine the file size + input_file.seekg(0, std::ios::end); + std::streamsize file_size = input_file.tellg(); + input_file.seekg(0, std::ios::beg); + + // Read the file contents into a byte array + std::vector frame(file_size); + if (!input_file.read(reinterpret_cast(frame.data()), file_size)) + { + std::cerr << "Error reading " << name << "." << std::endl; + std::abort(); + } + // Close the file + input_file.close(); + frames.push_back(std::move(frame)); + } + + return frames; +} + +void init() {} + +void create_decoder(decoder_info& dec_info) +{ + RocDecoderCreateInfo create_info = {}; + create_info.codec_type = dec_info.rocdec_codec_id; // user specified codec_type for raw files + create_info.max_width = DEFAULT_WIDTH; + create_info.max_height = DEFAULT_HEIGHT; + create_info.width = DEFAULT_WIDTH; + create_info.height = DEFAULT_HEIGHT; + create_info.num_decode_surfaces = 6; + create_info.target_width = DEFAULT_WIDTH; + create_info.target_height = DEFAULT_HEIGHT; + create_info.display_rect.left = 0; + create_info.display_rect.right = static_cast(DEFAULT_WIDTH); + create_info.display_rect.top = 0; + create_info.display_rect.bottom = static_cast(DEFAULT_HEIGHT); + // for decode creation: assuming chroma_format is 4:2:0 and output_format is NV12. + // video dimensions ( width, height, max_width, max_height), num_decode_surfaces, and bit_depth_minus_8 are hardcoded here + // this will get changed in reconfigure when the sequence header is parsed from the stream to detect the actual video parameters + create_info.chroma_format = rocDecVideoChromaFormat_420; + create_info.output_format = rocDecVideoSurfaceFormat_NV12; + create_info.bit_depth_minus_8 = 2; + create_info.num_output_surfaces = 1; + CHECK(rocDecCreateDecoder(&dec_info.decoder, &create_info)); +} + +int ROCDECAPI handle_video_sequence_host(void* user_data, RocdecVideoFormatHost* format_host) +{ + decoder_info *p_dec_info = static_cast(user_data); + RocdecVideoFormat *format = &format_host->video_format; + RocdecReconfigureDecoderInfo reconfig_params = {}; + reconfig_params.width = format->coded_width; + reconfig_params.height = format->coded_height; + reconfig_params.num_decode_surfaces = 6; + reconfig_params.target_width = format->coded_width; + reconfig_params.target_height = format->coded_height; + reconfig_params.display_rect.left = 0; + reconfig_params.display_rect.right = static_cast(format->coded_width); + reconfig_params.display_rect.top = 0; + reconfig_params.display_rect.bottom = static_cast(format->coded_height); + p_dec_info->surf_format = format_host->video_surface_format; + p_dec_info->disp_rect.top = format->display_area.top; + p_dec_info->disp_rect.bottom = format->display_area.bottom; + p_dec_info->disp_rect.left = format->display_area.left; + p_dec_info->disp_rect.right = format->display_area.right; + CHECK(rocDecReconfigureDecoderHost(p_dec_info->decoder, &reconfig_params)); + p_dec_info->is_decoder_reconfigured = true; + int bitdepth_minus_8 = format->bit_depth_luma_minus8; + p_dec_info->coded_width = format->coded_width; + p_dec_info->coded_height = format->coded_height; + p_dec_info->bytes_per_pixel = bitdepth_minus_8 > 0 ? 2 : 1; + std::ostringstream input_video_info_str; + input_video_info_str.str(""); + input_video_info_str.clear(); + input_video_info_str << "Input Video Information" << std::endl + << "\tCodec : " << format->codec << std::endl; + if (format->frame_rate.numerator && format->frame_rate.denominator) + { + input_video_info_str << "\tFrame rate : " << format->frame_rate.numerator << "/" << format->frame_rate.denominator << " = " << 1.0 * format->frame_rate.numerator / format->frame_rate.denominator << " fps" << std::endl; + } + input_video_info_str << "\tSequence : " << (format->progressive_sequence ? "Progressive" : "Interlaced") << std::endl + << "\tCoded size : [" << format->coded_width << ", " << format->coded_height << "]" << std::endl + << "\tDisplay area : [" << format->display_area.left << ", " << format->display_area.top << ", " + << format->display_area.right << ", " << format->display_area.bottom << "]" << std::endl + << "\tBit depth : " << format->bit_depth_luma_minus8 + 8 + ; + input_video_info_str << std::endl; + std::cout << input_video_info_str.str(); + + return 1; +} + +int ROCDECAPI handle_picture_display_host(void* user_data, RocdecParserDispInfo* disp_info) +{ + decoder_info *p_dec_info = static_cast(user_data); + RocdecParserDispInfo *p_disp_info = static_cast(disp_info); + RocdecProcParams params = {}; + params.progressive_frame = p_disp_info->progressive_frame; + params.top_field_first = p_disp_info->top_field_first; + void* frame_mem_ptr[3] = {nullptr}; + uint32_t pitch[3] = {0}; + CHECK(rocDecGetVideoFrameHost(p_dec_info->decoder, p_disp_info->picture_index, frame_mem_ptr, pitch, ¶ms)); + p_dec_info->mem_type = OUT_SURFACE_MEM_HOST; + if (p_dec_info->dump_decoded_frames) + { + save_frame_to_file_host(p_dec_info, frame_mem_ptr, pitch); + } + + return 1; +} + +void create_decoder_host(decoder_info& dec_info) +{ + // many of the decoder parameters are hardcoded below for just creating the decoder. + // In the handlevideosequence callback, the decoder will get reconfigured to the actual parameters in the sequence header + RocDecoderHostCreateInfo create_info = {}; + create_info.codec_type = dec_info.rocdec_codec_id; + create_info.num_decode_threads = 0; // default + create_info.max_width = DEFAULT_WIDTH; + create_info.max_height = DEFAULT_HEIGHT; + create_info.width = DEFAULT_WIDTH; + create_info.height = DEFAULT_HEIGHT; + create_info.target_width = DEFAULT_WIDTH; + create_info.target_height = DEFAULT_HEIGHT; + create_info.display_rect.left = 0; + create_info.display_rect.right = static_cast(DEFAULT_WIDTH); + create_info.display_rect.top = 0; + create_info.display_rect.bottom = static_cast(DEFAULT_HEIGHT); + create_info.chroma_format = rocDecVideoChromaFormat_420; + create_info.output_format = rocDecVideoSurfaceFormat_P016; + create_info.bit_depth_minus_8 = 2; + create_info.num_output_surfaces = 1; + create_info.user_data = &dec_info; + create_info.pfn_sequence_callback = handle_video_sequence_host; + create_info.pfn_display_picture = handle_picture_display_host; + CHECK(rocDecCreateDecoderHost(&dec_info.decoder, &create_info)); + dec_info.backend = DECODER_BACKEND_HOST; +} + +int ROCDECAPI handle_video_sequence(void* user_data, RocdecVideoFormat* format) +{ + decoder_info *p_dec_info = static_cast(user_data); + RocdecReconfigureDecoderInfo reconfig_params = {}; + int bitdepth_minus_8 = format->bit_depth_luma_minus8; + uint32_t target_width = (format->display_area.right - format->display_area.left + 1) & ~1; + uint32_t target_height = (format->display_area.bottom - format->display_area.top + 1) & ~1; + reconfig_params.width = format->coded_width; + reconfig_params.height = format->coded_height; + reconfig_params.bit_depth_minus_8 = bitdepth_minus_8; + reconfig_params.num_decode_surfaces = format->min_num_decode_surfaces; + reconfig_params.target_width = target_width; + reconfig_params.target_height = target_height; + reconfig_params.display_rect.left = format->display_area.left; + reconfig_params.display_rect.right = format->display_area.right; + reconfig_params.display_rect.top = format->display_area.top; + reconfig_params.display_rect.bottom = format->display_area.bottom; + CHECK(rocDecReconfigureDecoder(p_dec_info->decoder, &reconfig_params)); + p_dec_info->is_decoder_reconfigured = true; + p_dec_info->disp_rect.top = format->display_area.top; + p_dec_info->disp_rect.bottom = format->display_area.bottom; + p_dec_info->disp_rect.left = format->display_area.left; + p_dec_info->disp_rect.right = format->display_area.right; + rocDecVideoChromaFormat video_chroma_format = format->chroma_format; + if (video_chroma_format == rocDecVideoChromaFormat_420 || rocDecVideoChromaFormat_Monochrome) + { + p_dec_info->surf_format = bitdepth_minus_8 ? rocDecVideoSurfaceFormat_P016 : rocDecVideoSurfaceFormat_NV12; + } + else if (video_chroma_format == rocDecVideoChromaFormat_444) + { + p_dec_info->surf_format = bitdepth_minus_8 ? rocDecVideoSurfaceFormat_YUV444_16Bit : rocDecVideoSurfaceFormat_YUV444; + } + else if (video_chroma_format == rocDecVideoChromaFormat_422) + { + p_dec_info->surf_format = bitdepth_minus_8 ? rocDecVideoSurfaceFormat_YUV422_16Bit : rocDecVideoSurfaceFormat_YUV422; + } + p_dec_info->coded_width = format->coded_width; + p_dec_info->coded_height = format->coded_height; + p_dec_info->bytes_per_pixel = bitdepth_minus_8 > 0 ? 2 : 1; + std::ostringstream input_video_info_str; + input_video_info_str.str(""); + input_video_info_str.clear(); + input_video_info_str << "Input Video Information" << std::endl + << "\tCodec : " << format->codec << std::endl; + if (format->frame_rate.numerator && format->frame_rate.denominator) + { + input_video_info_str << "\tFrame rate : " << format->frame_rate.numerator << "/" << format->frame_rate.denominator << " = " << 1.0 * format->frame_rate.numerator / format->frame_rate.denominator << " fps" << std::endl; + } + input_video_info_str << "\tSequence : " << (format->progressive_sequence ? "Progressive" : "Interlaced") << std::endl + << "\tCoded size : [" << format->coded_width << ", " << format->coded_height << "]" << std::endl + << "\tDisplay area : [" << format->display_area.left << ", " << format->display_area.top << ", " + << format->display_area.right << ", " << format->display_area.bottom << "]" << std::endl + << "\tBit depth : " << format->bit_depth_luma_minus8 + 8 + ; + input_video_info_str << std::endl; + std::cout << input_video_info_str.str(); + return 1; +} + +int ROCDECAPI handle_picture_decode(void* user_data, RocdecPicParams* params) +{ + decoder_info *p_dec_info = static_cast(user_data); + CHECK(rocDecDecodeFrame(p_dec_info->decoder, params)); + return 1; +} + +int ROCDECAPI handle_picture_display(void* user_data, RocdecParserDispInfo* disp_info) +{ + decoder_info *p_dec_info = static_cast(user_data); + RocdecProcParams params = {}; + params.progressive_frame = disp_info->progressive_frame; + params.top_field_first = disp_info->top_field_first; + // get device memory pointer for decoded output surface + void* dev_mem_ptr[3] = { 0 }; + uint32_t pitch[3] = { 0 }; + CHECK(rocDecGetVideoFrame(p_dec_info->decoder, disp_info->picture_index, dev_mem_ptr, pitch, ¶ms)); + + if (p_dec_info->dump_decoded_frames) + { + save_frame_to_file(p_dec_info, dev_mem_ptr, pitch); + } + return 1; +} + +void create_parser(decoder_info& dec_info) +{ + RocdecParserParams params = {}; + params.codec_type = dec_info.rocdec_codec_id; + params.max_num_decode_surfaces = 6; + params.max_display_delay = 1; // min display delay of 1 is recommented to get optimal performance from hardware decoder + params.user_data = &dec_info; + params.pfn_sequence_callback = handle_video_sequence; + params.pfn_decode_picture = handle_picture_decode; + params.pfn_display_picture = handle_picture_display; + CHECK(rocDecCreateVideoParser(&dec_info.parser, ¶ms)); +} + +void decode_frames(decoder_info& dec_info, const std::vector>& frames) +{ + // gpu backend using VCN + if (dec_info.backend == DECODER_BACKEND_DEVICE) + { + for (int i=0; i < static_cast(frames.size()); ++i) + { + RocdecSourceDataPacket packet = {}; + packet.payload_size = frames[i].size(); + packet.payload = frames[i].data(); + if (i == static_cast(frames.size() - 1)) + { + packet.flags = ROCDEC_PKT_ENDOFPICTURE; // mark end_of_picture flag for last frame + } + CHECK(rocDecParseVideoData(dec_info.parser, &packet)); + } + } + else if (dec_info.backend == DECODER_BACKEND_HOST) + { + for (int i=0; i < static_cast(frames.size()); ++i) + { + RocdecPicParamsHost pic_params = {}; + pic_params.bitstream_data_len = frames[i].size(); + pic_params.bitstream_data = frames[i].data(); + if (i == static_cast(frames.size() - 1)) + { + pic_params.flags = ROCDEC_PKT_ENDOFPICTURE; // mark end_of_picture flag for last frame + } + CHECK(rocDecDecodeFrameHost(dec_info.decoder, &pic_params)); + } + } +} + +void destroy_decoder(decoder_info& dec_info) +{ + if (dec_info.backend == DECODER_BACKEND_DEVICE) + { + CHECK(rocDecDestroyDecoder(dec_info.decoder)); + } + else if (dec_info.backend == DECODER_BACKEND_HOST) + { + CHECK(rocDecDestroyDecoderHost(dec_info.decoder)); + } +} + +void destroy_parser(decoder_info& dec_info) +{ + if (dec_info.backend == DECODER_BACKEND_DEVICE) + { + CHECK(rocDecDestroyVideoParser(dec_info.parser)); + } +} + +// helper function for sort +std::string get_last_part(const std::string& str, char delimiter) +{ + size_t pos = str.find_last_of(delimiter); + if (pos == std::string::npos) + { + return str; // Delimiter not found, return the whole string + } + return str.substr(pos + 1); +} + +// helper function for sort +int extract_number(const std::string& filename) +{ + std::string num_str; + for (char c : filename) + { + if (std::isdigit(c)) + { + num_str += c; + } + else if (!num_str.empty()) + { + break; // Stop at first non-digit after a digit sequence + } + } + return num_str.empty() ? 0 : std::stoi(num_str); +} + +// helper function for sort +// Sort entries based on the numerical part of their filenames +bool compare_filenames(const std::string& a, const std::string& b) +{ + int num_a = extract_number(a); + int num_b = extract_number(b); + if (num_a != num_b) + { + return num_a < num_b; + } + return a < b; // Fallback to lexicographical comparison +} + +int main(int argc, char** argv) +{ + std::string input_file_path, output_file_path; + int dump_output_frames = 0; + int device_id = 0; + decoder_backend backend = DECODER_BACKEND_DEVICE; + int num_iterations = 1; + std::vector input_file_names; + int codec_type = 0; // default for HEVC + decoder_info dec_info; + + // Parse command-line arguments + cli::Parser parser(argc, argv); + parser.set_required("i", "input", "Input File Path"); + parser.set_optional("o", "output", "", "Output File Path - dumps output if requested"); + parser.set_optional("d", "device", 0, "GPU device ID (0 for the first device, 1 for the second, etc.)"); + parser.set_optional("b", "backend", 0, "backend (0 for GPU, 1 CPU-FFMpeg)"); + parser.set_optional("c", "codec", 0, "codec (0 : HEVC, 1 : H264, 2: AV1, 3: VP9, 4: VP8, 5: JPEG)"); + parser.set_optional("n", "iterations", 1, "Number of iteration - specify the number of iterations for performance evaluation"); + parser.run_and_exit_if_error(); + + // Get parsed arguments + input_file_path = parser.get("i"); + output_file_path = parser.get("o"); + device_id = parser.get("d"); + backend = static_cast(parser.get("b")); + codec_type = parser.get("c"); + num_iterations = parser.get("n"); + + if (!output_file_path.empty()) + { + dec_info.output_file_path = output_file_path; + dump_output_frames = true; + } + + bool b_sort_filenames = false; + if (std::filesystem::is_directory(input_file_path)) + { + for (const auto& entry : std::filesystem::directory_iterator(input_file_path)) + { + if (entry.is_directory()) + { + std::vector file_names_sub_folder; + for (const auto& sub_entry : std::filesystem::directory_iterator(entry)) + { + file_names_sub_folder.push_back(sub_entry.path()); + } + std::sort(file_names_sub_folder.begin(), file_names_sub_folder.end(), compare_filenames); + input_file_names.insert(input_file_names.end(), file_names_sub_folder.begin(), file_names_sub_folder.end()); + file_names_sub_folder.clear(); + } + else if(entry.is_regular_file()) + { + b_sort_filenames = true; + input_file_names.push_back(entry.path()); + } + else + { + std::cout << "unknown file type in input folder: " << entry.path().string() << '\n'; + continue; + } + } + if (b_sort_filenames) + { + std::sort(input_file_names.begin(), input_file_names.end(), compare_filenames); + } + } + else + { + input_file_names.push_back(input_file_path); + } + + std::cout << "Read " << input_file_names.size() << " frames from disk." << std::endl; + + dec_info.rocdec_codec_id = codec_type_to_roc_dec_video_codec(codec_type); + dec_info.dec_device_id = device_id; + dec_info.mem_type = (!backend) ? OUT_SURFACE_MEM_DEV_INTERNAL : OUT_SURFACE_MEM_HOST; + init(); + if (backend == DECODER_BACKEND_DEVICE) + { + create_parser(dec_info); + create_decoder(dec_info); + } + else + { + create_decoder_host(dec_info); + } + dec_info.dump_decoded_frames = dump_output_frames; + auto input_frames = read_frames(input_file_names); + auto start = std::chrono::high_resolution_clock::now(); + for (int i = 0; i < num_iterations; i++) + { + decode_frames(dec_info, input_frames); + } + auto end = std::chrono::high_resolution_clock::now(); + auto elapsed = std::chrono::duration_cast(end - start).count(); + std::cout << "Decoding time: " << elapsed << " microseconds" << std::endl; + destroy_decoder(dec_info); + destroy_parser(dec_info); + std::cout << "Success." << std::endl << std::endl << std::endl; + return 0; +} diff --git a/Libraries/rocDecode/video_decode/.gitignore b/Libraries/rocDecode/video_decode/.gitignore new file mode 100644 index 000000000..659db89d9 --- /dev/null +++ b/Libraries/rocDecode/video_decode/.gitignore @@ -0,0 +1 @@ +rocdecode_video_decode diff --git a/Libraries/rocDecode/video_decode/CMakeLists.txt b/Libraries/rocDecode/video_decode/CMakeLists.txt new file mode 100644 index 000000000..4d94d6230 --- /dev/null +++ b/Libraries/rocDecode/video_decode/CMakeLists.txt @@ -0,0 +1,148 @@ +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +set(example_name rocdecode_video_decode) + +cmake_minimum_required(VERSION 3.21 FATAL_ERROR) +project(${example_name} LANGUAGES CXX) + +include("../../../Common/HipPlatform.cmake") +select_gpu_language() + +enable_language(${ROCM_EXAMPLES_GPU_LANGUAGE}) +set(CMAKE_${ROCM_EXAMPLES_GPU_LANGUAGE}_STANDARD 17) +set(CMAKE_${ROCM_EXAMPLES_GPU_LANGUAGE}_EXTENSIONS OFF) +set(CMAKE_${ROCM_EXAMPLES_GPU_LANGUAGE}_STANDARD_REQUIRED ON) +select_hip_platform() +verify_hip_platform(PLATFORMS "amd") + +if(CMAKE_SYSTEM_NAME MATCHES "Windows") + message(STATUS "rocDecode examples are only available on Linux") + return() +else() + set(ROCM_ROOT + "/opt/rocm" + CACHE PATH + "Root directory of the ROCm installation" + ) +endif() + +list(APPEND CMAKE_PREFIX_PATH "${ROCM_ROOT}") + +find_package(rocdecode REQUIRED) + +# Try to find the host library directly (handles both naming conventions) +find_library(ROCDECODE_HOST_LIB + NAMES rocdecode-host rocdecodehost + PATHS ${ROCM_ROOT}/lib + NO_DEFAULT_PATH +) + +find_package(rocprofiler-register REQUIRED) +find_package(Threads REQUIRED) + +# Find FFmpeg libraries +find_library(AVCODEC_LIBRARY avcodec REQUIRED) +find_library(AVFORMAT_LIBRARY avformat REQUIRED) +find_library(AVUTIL_LIBRARY avutil REQUIRED) + +find_path(AVCODEC_INCLUDE_DIR libavcodec/avcodec.h REQUIRED) +find_path(AVFORMAT_INCLUDE_DIR libavformat/avformat.h REQUIRED) +find_path(AVUTIL_INCLUDE_DIR libavutil/avutil.h REQUIRED) + +# Check FFmpeg version for compatibility using pkg-config (same as Makefile) +execute_process( + COMMAND pkg-config --modversion libavcodec + OUTPUT_VARIABLE AVCODEC_VERSION + OUTPUT_STRIP_TRAILING_WHITESPACE + ERROR_QUIET +) + +# If pkg-config fails, try to get version from header as fallback +if(NOT AVCODEC_VERSION AND AVCODEC_INCLUDE_DIR) + file(STRINGS "${AVCODEC_INCLUDE_DIR}/libavcodec/version.h" AVCODEC_VERSION_MAJOR_LINE + REGEX "^#define LIBAVCODEC_VERSION_MAJOR") + file(STRINGS "${AVCODEC_INCLUDE_DIR}/libavcodec/version.h" AVCODEC_VERSION_MINOR_LINE + REGEX "^#define LIBAVCODEC_VERSION_MINOR") + file(STRINGS "${AVCODEC_INCLUDE_DIR}/libavcodec/version.h" AVCODEC_VERSION_MICRO_LINE + REGEX "^#define LIBAVCODEC_VERSION_MICRO") + + string(REGEX REPLACE "^#define LIBAVCODEC_VERSION_MAJOR[ \t]+([0-9]+).*$" "\\1" AVCODEC_MAJOR "${AVCODEC_VERSION_MAJOR_LINE}") + string(REGEX REPLACE "^#define LIBAVCODEC_VERSION_MINOR[ \t]+([0-9]+).*$" "\\1" AVCODEC_MINOR "${AVCODEC_VERSION_MINOR_LINE}") + string(REGEX REPLACE "^#define LIBAVCODEC_VERSION_MICRO[ \t]+([0-9]+).*$" "\\1" AVCODEC_MICRO "${AVCODEC_VERSION_MICRO_LINE}") + + set(AVCODEC_VERSION "${AVCODEC_MAJOR}.${AVCODEC_MINOR}.${AVCODEC_MICRO}") +endif() + +add_executable(${example_name} + main.cpp + ${ROCM_ROOT}/share/rocdecode/utils/rocvideodecode/roc_video_dec.cpp + ${ROCM_ROOT}/share/rocdecode/utils/ffmpegvideodecode/ffmpeg_video_dec.cpp +) + +target_link_libraries(${example_name} + PRIVATE + rocdecode::rocdecode + rocprofiler-register::rocprofiler-register + ${AVCODEC_LIBRARY} + ${AVFORMAT_LIBRARY} + ${AVUTIL_LIBRARY} + Threads::Threads +) + +# Link host library if found +if(ROCDECODE_HOST_LIB) + target_link_libraries(${example_name} PRIVATE ${ROCDECODE_HOST_LIB}) + target_compile_definitions(${example_name} PRIVATE ENABLE_HOST_DECODE=1) +else() + target_compile_definitions(${example_name} PRIVATE ENABLE_HOST_DECODE=0) +endif() + +target_include_directories( + ${example_name} + PRIVATE + "../../../Common" + "../../../External" + ".." + "${ROCM_ROOT}/share/rocdecode/utils" + "${ROCM_ROOT}/share/rocdecode/utils/rocvideodecode" + "${ROCM_ROOT}/share/rocdecode/utils/ffmpegvideodecode" + ${AVCODEC_INCLUDE_DIR} + ${AVFORMAT_INCLUDE_DIR} + ${AVUTIL_INCLUDE_DIR} +) + +# FFMPEG multi-version support +if(AVCODEC_VERSION VERSION_LESS_EQUAL 58.134.100) + target_compile_definitions(${example_name} PRIVATE USE_AVCODEC_GREATER_THAN_58_134=0) +else() + target_compile_definitions(${example_name} PRIVATE USE_AVCODEC_GREATER_THAN_58_134=1) +endif() + +set_source_files_properties( + main.cpp + ${ROCM_ROOT}/share/rocdecode/utils/rocvideodecode/roc_video_dec.cpp + ${ROCM_ROOT}/share/rocdecode/utils/ffmpegvideodecode/ffmpeg_video_dec.cpp + PROPERTIES LANGUAGE ${ROCM_EXAMPLES_GPU_LANGUAGE} +) + +install(TARGETS ${example_name}) diff --git a/Libraries/rocDecode/video_decode/Makefile b/Libraries/rocDecode/video_decode/Makefile new file mode 100644 index 000000000..6ab81ec5b --- /dev/null +++ b/Libraries/rocDecode/video_decode/Makefile @@ -0,0 +1,96 @@ +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +EXAMPLE := rocdecode_video_decode +COMMON_INCLUDE_DIR := ../../../Common +EXTERNAL_DIR := ../../../External +GPU_RUNTIME := HIP + +# HIP variables +ROCM_INSTALL_DIR := /opt/rocm +UTILS_DIR := ${ROCM_INSTALL_DIR}/share/rocdecode/utils + +HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include +ROCDECODE_INCLUDE_DIR := $(HIP_INCLUDE_DIR) + +HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc + +# Detect which rocdecode host library is available +ROCDECODE_HOST_LIB := $(shell if [ -f $(ROCM_INSTALL_DIR)/lib/librocdecode-host.so ]; then \ + echo "rocdecode-host"; \ + elif [ -f $(ROCM_INSTALL_DIR)/lib/librocdecodehost.so ]; then \ + echo "rocdecodehost"; \ + fi) + +# Common variables and flags +CXX_STD := c++17 +ICXXFLAGS := -std=$(CXX_STD) +ICPPFLAGS := -isystem $(ROCDECODE_INCLUDE_DIR) -I $(COMMON_INCLUDE_DIR) -I $(EXTERNAL_DIR) -I .. -I $(UTILS_DIR) -I $(UTILS_DIR)/rocvideodecode -I $(UTILS_DIR)/ffmpegvideodecode +ILDFLAGS := -L $(ROCM_INSTALL_DIR)/lib +ILDLIBS := -lrocdecode -lrocprofiler-register -lavcodec -lavformat -lavutil -lpthread + +ifeq ($(GPU_RUNTIME), HIP) + CXXFLAGS ?= -Wall -Wextra + CPPFLAGS += -D__HIP_PLATFORM_AMD__ + + # Add host decode library and flag if available + ifneq ($(ROCDECODE_HOST_LIB),) + ILDLIBS += -l$(ROCDECODE_HOST_LIB) + CPPFLAGS += -DENABLE_HOST_DECODE=1 + else + CPPFLAGS += -DENABLE_HOST_DECODE=0 + endif + + # FFmpeg version detection + AVCODEC_VERSION := $(shell pkg-config --modversion libavcodec 2>/dev/null || echo "0") + AVCODEC_MAJOR := $(shell echo $(AVCODEC_VERSION) | cut -d. -f1) + AVCODEC_MINOR := $(shell echo $(AVCODEC_VERSION) | cut -d. -f2) + AVCODEC_PATCH := $(shell echo $(AVCODEC_VERSION) | cut -d. -f3) + + # Compare version (58.134.100) + ifeq ($(shell test $(AVCODEC_MAJOR) -lt 58 || \ + (test $(AVCODEC_MAJOR) -eq 58 && test $(AVCODEC_MINOR) -lt 134) || \ + (test $(AVCODEC_MAJOR) -eq 58 && test $(AVCODEC_MINOR) -eq 134 && test $(AVCODEC_PATCH) -le 100); echo $$?),0) + CPPFLAGS += -DUSE_AVCODEC_GREATER_THAN_58_134=0 + else + CPPFLAGS += -DUSE_AVCODEC_GREATER_THAN_58_134=1 + endif + + COMPILER := $(HIPCXX) +else + $(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be HIP; CUDA not supported) +endif + +ICXXFLAGS += $(CXXFLAGS) +ICPPFLAGS += $(CPPFLAGS) +ILDFLAGS += $(LDFLAGS) +ILDLIBS += $(LDLIBS) + +SOURCES := main.cpp $(UTILS_DIR)/rocvideodecode/roc_video_dec.cpp $(UTILS_DIR)/ffmpegvideodecode/ffmpeg_video_dec.cpp + +$(EXAMPLE): $(SOURCES) $(COMMON_INCLUDE_DIR)/example_utils.hpp $(COMMON_INCLUDE_DIR)/rocdecode_utils.hpp + $(COMPILER) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $(SOURCES) $(ILDLIBS) + +clean: + $(RM) $(EXAMPLE) + +.PHONY: clean diff --git a/Libraries/rocDecode/video_decode/README.md b/Libraries/rocDecode/video_decode/README.md new file mode 100644 index 000000000..ca646943c --- /dev/null +++ b/Libraries/rocDecode/video_decode/README.md @@ -0,0 +1,114 @@ +# rocDecode Video Decode + +## Description + +This example illustrates the use of the rocDecode library for decoding a single packetized video stream using FFMPEG demuxer, video parser, and hardware-accelerated decoder to obtain individual decoded frames in YUV format. The sample demonstrates the standard video decoding workflow with configurable options for device selection, output file dumping, frame limiting, and MD5 validation. It uses a high-level wrapper class that integrates both the video parser and decoder for simplified usage. + +## Application Flow + +1. Parse command-line arguments for input file, output path, device ID, and decoding options. +2. Initialize the FFMPEG video demuxer to extract codec information and video packets. +3. Create the video decoder instance with specified codec, device, and output configuration. +4. Verify codec support on the selected GPU device. +5. Set up optional MD5 generator for frame validation. +6. Loop through video stream: + - Demux video packets from input file. + - Decode frames using the rocDecode API. + - Retrieve decoded frames from the decoder. + - Optionally save frames to output file. + - Optionally generate MD5 digest for validation. + - Release decoded frames back to the decoder. +7. Display decoding statistics including frame count and performance metrics. +8. Optionally compare generated MD5 digest with reference. +9. Clean up decoder and demuxer resources. + +## Key APIs and Concepts + +- **Video Demuxer**: Uses FFMPEG libraries to demux video files and extract codec parameters, frame rate, resolution, and compressed video packets. The demuxer supports various container formats (MP4, MKV, AVI, etc.) and provides packet-level access to the video stream. + +- **Decoder Initialization**: The decoder is initialized with: + - `rocDecCreateDecoder()`: Creates a decoder instance configured with codec type, output surface format, target dimensions, and memory type. + - Configuration includes output surface memory type (device internal, device copied, host copied, or not mapped). + - Optional crop rectangle for region-of-interest decoding. + - Display delay parameter for controlling output latency. + +- **Video Parser**: + - `rocDecCreateVideoParser()`: Creates a parser that handles bitstream parsing and manages the decode pipeline through callbacks. + - `rocDecParseVideoData()`: Parses compressed video data and triggers decode operations. + - Parser callbacks handle sequence changes, picture decode, and picture display events. + +- **Frame Decoding**: + - `rocDecDecodeFrame()`: Decodes a frame using hardware acceleration. Called internally by the parser callback. + - `rocDecGetDecodeStatus()`: Queries the decode status of a frame to ensure completion before retrieval. + - Supports various output formats including NV12, P016, YUV444, and their 16-bit variants. + +- **Frame Management**: + - Decoded frames are retrieved using `rocDecGetVideoFrame()` which provides device memory pointers. + - Frames must be explicitly released using the release mechanism to return surfaces to the decoder pool. + - Supports configurable display delay for reordering frames from decode order to display order. + +- **Output Options**: + - Frames can be saved to file in raw YUV format. + - MD5 digest generation for decoded frame validation. + - SEI (Supplemental Enhancement Information) message extraction. + - Configurable output surface memory types for different use cases. + +## Demonstrated API Calls + +### rocDecode APIs + +- `rocDecCreateDecoder` +- `rocDecDecodeFrame` +- `rocDecGetVideoFrame` +- `rocDecGetDecodeStatus` +- `rocDecDestroyDecoder` +- `rocDecCreateVideoParser` +- `rocDecParseVideoData` +- `rocDecDestroyVideoParser` +- `rocDecGetErrorName` + +### HIP Runtime APIs + +- `hipSetDevice` +- `hipGetDeviceProperties` +- `hipMalloc` +- `hipFree` +- `hipMemcpy` +- `hipMemcpyDtoH` + +### FFMPEG APIs + +- `avformat_open_input` +- `avformat_find_stream_info` +- `av_find_best_stream` +- `av_read_frame` +- `av_packet_alloc` +- `av_packet_free` +- `av_packet_unref` +- `avformat_close_input` +- `av_bsf_get_by_name` +- `av_bsf_alloc` +- `av_bsf_init` +- `av_bsf_send_packet` +- `av_bsf_receive_packet` +- `av_bsf_free` +- `av_rescale_q` +- `av_q2d` + +### Data Types and Enums + +- `rocDecDecoderHandle` +- `RocdecVideoParser` +- `rocDecVideoCodec` +- `rocDecVideoSurfaceFormat` +- `rocDecVideoChromaFormat` +- `rocDecDecoderCreateInfo` +- `RocdecParserParams` +- `RocdecVideoFormat` +- `RocdecPicParams` +- `RocdecParserDispInfo` +- `RocdecSeiMessageInfo` +- `rocDecDecodeStatus` +- `AVCodecID` +- `AVFormatContext` +- `AVPacket` diff --git a/Libraries/rocDecode/video_decode/main.cpp b/Libraries/rocDecode/video_decode/main.cpp new file mode 100644 index 000000000..877865d7d --- /dev/null +++ b/Libraries/rocDecode/video_decode/main.cpp @@ -0,0 +1,505 @@ +/* +Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "CmdParser/cmdparser.hpp" +#include "example_utils.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if __cplusplus >= 201703L && __has_include() + #include +#else + #include +#endif + +#include "ffmpeg_video_dec.h" +#include "rocdecode/roc_bitstream_reader.h" +#include "roc_video_dec.h" +#include "video_demuxer.h" + +#include "rocdecode_utils.hpp" + +//hardcoding for host based decoder creation if demux is not available +#define DEFAULT_WIDTH 2912 +#define DEFAULT_HEIGHT 1888 + +int main(int argc, char** argv) +{ + std::string input_file_path, output_file_path, md5_file_path; + std::fstream ref_md5_file; + int dump_output_frames = 0; + int device_id = 0; + int disp_delay = 1; + int backend = 0; + bool b_force_zero_latency + = false; // false by default: enabling this option might affect decoding performance + bool b_extract_sei_messages = false; + bool b_generate_md5 = false; + bool b_md5_check = false; + Rect crop_rect = {}; + Rect* p_crop_rect = nullptr; + OutputSurfaceMemoryType mem_type = OUT_SURFACE_MEM_DEV_INTERNAL; // set to internal + ReconfigParams reconfig_params = {}; + reconfig_dump_file_struct reconfig_user_struct = {}; + uint32_t num_decoded_frames = 0; // default value is 0, meaning decode the entire stream + // seek options + uint64_t seek_to_frame = 0; + int seek_criteria = 0, seek_mode = 0; + bool b_use_ffmpeg_demuxer + = true; // true by default to use FFMPEG demuxer. set to false to use the built-in bitstream reader. + + // Parse command-line arguments + cli::Parser parser(argc, argv); + parser.set_required("i", "input", "Input File Path"); + parser.set_optional("o", + "output", + "", + "Output File Path - dumps output if requested"); + parser.set_optional("d", + "device", + 0, + "GPU device ID (0 for the first device, 1 for the second, etc.)"); + parser.set_optional("backend", "backend", 0, "backend (0 for GPU, 1 CPU-FFMpeg)"); + parser.set_optional( + "f", + "frames", + 0, + "Number of decoded frames - specify the number of pictures to be decoded"); + parser.set_optional( + "z", + "zero_latency", + false, + "force_zero_latency (Decoded frames will be flushed out for display immediately)"); + parser.set_optional("disp_delay", + "disp_delay", + 1, + "specify the number of frames to be delayed for display"); + parser.set_optional("sei", "sei", false, "extract SEI messages"); + parser.set_optional("md5", + "md5", + false, + "generate MD5 message digest on the decoded YUV image sequence"); + parser.set_optional( + "md5_check", + "md5_check", + "", + "MD5 File Path - generate MD5 message digest and compare to reference"); + parser.set_optional("crop", + "crop", + "", + "crop rectangle for output (format: left,top,right,bottom)"); + parser.set_optional("m", + "memory_type", + 0, + "output_surface_memory_type - decoded surface memory " + "[0:DEV_INTERNAL/1:DEV_COPIED/2:HOST_COPIED/3:NOT_MAPPED]"); + parser.set_optional( + "seek_criteria", + "seek_criteria", + "0,0", + "Demux seek criteria & value [0:no seek;1:FRAME_NUM;2:TIME_STAMP]"); + parser.set_optional("seek_mode", + "seek_mode", + 0, + "Seek mode [0:PREV_KEY_FRAME;1:EXACT_FRAME]"); + parser.set_optional("no_ffmpeg_demux", + "no_ffmpeg_demux", + false, + "use the built-in bitstream reader instead of FFMPEG demuxer"); + parser.run_and_exit_if_error(); + + // Get parsed arguments + input_file_path = parser.get("i"); + output_file_path = parser.get("o"); + device_id = parser.get("d"); + backend = parser.get("backend"); + num_decoded_frames = parser.get("f"); + b_force_zero_latency = parser.get("z"); + disp_delay = parser.get("disp_delay"); + b_extract_sei_messages = parser.get("sei"); + b_generate_md5 = parser.get("md5"); + md5_file_path = parser.get("md5_check"); + mem_type = static_cast(parser.get("m")); + seek_mode = parser.get("seek_mode"); + b_use_ffmpeg_demuxer = !parser.get("no_ffmpeg_demux"); + + if(!output_file_path.empty()) + { + dump_output_frames = 1; + } + + if(!md5_file_path.empty()) + { + b_generate_md5 = true; + b_md5_check = true; + } + + // Parse crop rectangle + std::string crop_str = parser.get("crop"); + if(!crop_str.empty()) + { + if(4 + != sscanf(crop_str.c_str(), + "%d,%d,%d,%d", + &crop_rect.left, + &crop_rect.top, + &crop_rect.right, + &crop_rect.bottom)) + { + std::cerr << "Error: Invalid crop format. Use: left,top,right,bottom" << std::endl; + return 1; + } + if((crop_rect.right - crop_rect.left) % 2 == 1 + || (crop_rect.bottom - crop_rect.top) % 2 == 1) + { + std::cout << "output crop rectangle must have width and height of even numbers" + << std::endl; + return 1; + } + p_crop_rect = &crop_rect; + } + + // Parse seek criteria + std::string seek_str = parser.get("seek_criteria"); + if(2 != sscanf(seek_str.c_str(), "%d,%lu", &seek_criteria, &seek_to_frame)) + { + std::cerr << "Error: Invalid seek_criteria format. Use: criteria,frame" << std::endl; + return 1; + } + if(0 > seek_criteria || seek_criteria >= 3) + { + std::cerr << "Error: Invalid seek_criteria value" << std::endl; + return 1; + } + + try + { + std::size_t found_file = input_file_path.find_last_of('/'); + std::cout << "info: Input file: " << input_file_path.substr(found_file + 1) << std::endl; + VideoDemuxer* demuxer; + RocdecBitstreamReader bs_reader = nullptr; + rocDecVideoCodec rocdec_codec_id; + int bit_depth; + + if(b_use_ffmpeg_demuxer) + { + std::cout << "info: Using FFMPEG demuxer" << std::endl; + demuxer = new VideoDemuxer(input_file_path.c_str()); + rocdec_codec_id = AVCodec2RocDecVideoCodec(demuxer->GetCodecID()); + bit_depth = demuxer->GetBitDepth(); + } + else + { + std::cout << "info: Using built-in bitstream reader" << std::endl; + if(rocDecCreateBitstreamReader(&bs_reader, input_file_path.c_str()) != ROCDEC_SUCCESS) + { + std::cerr << "Failed to create the bitstream reader." << std::endl; + return 1; + } + if(rocDecGetBitstreamCodecType(bs_reader, &rocdec_codec_id) != ROCDEC_SUCCESS) + { + std::cerr << "Failed to get stream codec type." << std::endl; + return 1; + } + if(rocdec_codec_id >= rocDecVideoCodec_NumCodecs) + { + std::cerr << "Unsupported stream file type or codec type by the bitstream reader. " + "Exiting." + << std::endl; + return 1; + } + if(rocDecGetBitstreamBitDepth(bs_reader, &bit_depth) != ROCDEC_SUCCESS) + { + std::cerr << "Failed to get stream bit depth." << std::endl; + return 1; + } + } + + RocVideoDecoder* viddec; + VideoSeekContext video_seek_ctx; + if(!backend) // gpu backend + { + viddec = new RocVideoDecoder(device_id, + mem_type, + rocdec_codec_id, + b_force_zero_latency, + p_crop_rect, + b_extract_sei_messages, + disp_delay); + } + else + { +#if ENABLE_HOST_DECODE + std::cout << "info: RocDecode is using CPU backend!" << std::endl; + uint32_t max_width = b_use_ffmpeg_demuxer ? demuxer->GetWidth() : DEFAULT_WIDTH; + uint32_t max_height = b_use_ffmpeg_demuxer ? demuxer->GetHeight() : DEFAULT_HEIGHT; + if(mem_type == OUT_SURFACE_MEM_DEV_INTERNAL) + { + mem_type + = OUT_SURFACE_MEM_DEV_COPIED; // mem_type internal is not supported in this mode + } + viddec = new FFMpegVideoDecoder(device_id, + mem_type, + rocdec_codec_id, + b_force_zero_latency, + p_crop_rect, + b_extract_sei_messages, + disp_delay, + max_width, + max_height); +#else + std::cout << "Error: RocDecode HOST library is not found and backend is not supported!" + << std::endl; + return 0; +#endif + } + + if(!viddec->CodecSupported(device_id, rocdec_codec_id, bit_depth)) + { + std::cerr << "rocDecode doesn't support codec!" << std::endl; + return 0; + } + std::string device_name, gcn_arch_name; + int pci_bus_id, pci_domain_id, pci_device_id; + + viddec->GetDeviceinfo(device_name, gcn_arch_name, pci_bus_id, pci_domain_id, pci_device_id); + std::cout << "info: Using GPU device " << device_id << " - " << device_name << "[" + << gcn_arch_name << "] on PCI bus " << std::setfill('0') << std::setw(2) + << std::right << std::hex << pci_bus_id << ":" << std::setfill('0') + << std::setw(2) << std::right << std::hex << pci_domain_id << "." << pci_device_id + << std::dec << std::endl; + std::cout << "info: decoding started, please wait!" << std::endl; + + int n_video_bytes = 0, n_frame_returned = 0, n_frame = 0; + int n_pic_decoded = 0, decoded_pics = 0; + uint8_t* pvideo = nullptr; + int pkg_flags = 0; + uint8_t* pframe = nullptr; + int64_t pts = 0; + OutputSurfaceInfo* surf_info; + double total_dec_time = 0; + bool first_frame = true; + MD5Generator* md5_generator = nullptr; + + // initialize reconfigure params: the following is configured to dump to output which is relevant for this sample + reconfig_params.p_fn_reconfigure_flush = reconfigure_flush_callback; + reconfig_user_struct.b_dump_frames_to_file = dump_output_frames; + reconfig_user_struct.output_file_name = output_file_path; + reconfig_params.reconfig_flush_mode = RECONFIG_FLUSH_MODE_NONE; + if(dump_output_frames) + { + reconfig_params.reconfig_flush_mode |= RECONFIG_FLUSH_MODE_DUMP_TO_FILE; + } + if(b_generate_md5) + { + reconfig_params.reconfig_flush_mode |= RECONFIG_FLUSH_MODE_CALCULATE_MD5; + } + reconfig_params.p_reconfig_user_struct = &reconfig_user_struct; + + if(b_generate_md5) + { + md5_generator = new MD5Generator(); + md5_generator->InitMd5(); + reconfig_user_struct.md5_generator_handle = static_cast(md5_generator); + } + viddec->SetReconfigParams(&reconfig_params); + + do + { + auto start_time = std::chrono::high_resolution_clock::now(); + if(b_use_ffmpeg_demuxer) + { + if(seek_criteria == 1 && first_frame) + { + // use VideoSeekContext class to seek to given frame number + video_seek_ctx.seek_frame_ = seek_to_frame; + video_seek_ctx.seek_crit_ = SEEK_CRITERIA_FRAME_NUM; + video_seek_ctx.seek_mode_ + = (seek_mode ? SEEK_MODE_EXACT_FRAME : SEEK_MODE_PREV_KEY_FRAME); + demuxer->Seek(video_seek_ctx, &pvideo, &n_video_bytes); + pts = video_seek_ctx.out_frame_pts_; + std::cout << "info: Number of frames that were decoded during seek - " + << video_seek_ctx.num_frames_decoded_ << std::endl; + first_frame = false; + } + else if(seek_criteria == 2 && first_frame) + { + // use VideoSeekContext class to seek to given timestamp + video_seek_ctx.seek_frame_ = seek_to_frame; + video_seek_ctx.seek_crit_ = SEEK_CRITERIA_TIME_STAMP; + video_seek_ctx.seek_mode_ + = (seek_mode ? SEEK_MODE_EXACT_FRAME : SEEK_MODE_PREV_KEY_FRAME); + demuxer->Seek(video_seek_ctx, &pvideo, &n_video_bytes); + pts = video_seek_ctx.out_frame_pts_; + std::cout << "info: Duration of frame found after seek - " + << video_seek_ctx.out_frame_duration_ << " ms" << std::endl; + first_frame = false; + } + else + { + demuxer->Demux(&pvideo, &n_video_bytes, &pts); + } + } + else + { + if(rocDecGetBitstreamPicData(bs_reader, &pvideo, &n_video_bytes, &pts) + != ROCDEC_SUCCESS) + { + std::cerr << "Failed to get picture data." << std::endl; + return 1; + } + } + // Treat 0 bitstream size as end of stream indicator + if(n_video_bytes == 0) + { + pkg_flags |= ROCDEC_PKT_ENDOFSTREAM; + } + n_frame_returned + = viddec->DecodeFrame(pvideo, n_video_bytes, pkg_flags, pts, &decoded_pics); + + // get output surface info after the first decoded frame + if(!n_frame && !viddec->GetOutputSurfaceInfo(&surf_info)) + { + std::cerr << "Error: Failed to get Output Surface Info!" << std::endl; + break; + } + for(int i = 0; i < n_frame_returned; i++) + { + pframe = viddec->GetFrame(&pts); + if(b_generate_md5) + { + md5_generator->UpdateMd5ForFrame(pframe, surf_info); + } + if(dump_output_frames && mem_type != OUT_SURFACE_MEM_NOT_MAPPED) + { + viddec->SaveFrameToFile(output_file_path, pframe, surf_info); + } + // release frame + viddec->ReleaseFrame(pts); + } + auto end_time = std::chrono::high_resolution_clock::now(); + auto time_per_decode + = std::chrono::duration(end_time - start_time).count(); + total_dec_time += time_per_decode; + n_frame += n_frame_returned; + n_pic_decoded += decoded_pics; + if(num_decoded_frames && num_decoded_frames <= static_cast(n_frame)) + { + break; + } + } + while(n_video_bytes); + + n_frame += viddec->GetNumOfFlushedFrames(); + std::cout << "info: Total pictures decoded: " << n_pic_decoded << std::endl; + std::cout << "info: Total frames output/displayed: " << n_frame << std::endl; + if(!dump_output_frames) + { + std::cout << "info: avg decoding time per picture: " << total_dec_time / n_pic_decoded + << " ms" << std::endl; + std::cout << "info: avg decode FPS: " << (n_pic_decoded / total_dec_time) * 1000 + << std::endl; + std::cout << "info: avg output/display time per frame: " << total_dec_time / n_frame + << " ms" << std::endl; + std::cout << "info: avg output/display FPS: " << (n_frame / total_dec_time) * 1000 + << std::endl; + } + else + { + if(mem_type == OUT_SURFACE_MEM_NOT_MAPPED) + { + std::cout << "info: saving frames with -m 3 option is not supported!" << std::endl; + } + else + { + std::cout << "info: saved frames into " << output_file_path << std::endl; + } + } + if(b_generate_md5) + { + uint8_t* digest; + md5_generator->FinalizeMd5(&digest); + std::cout << "MD5 message digest: "; + for(int i = 0; i < 16; i++) + { + std::cout << std::setfill('0') << std::setw(2) << std::hex + << static_cast(digest[i]); + } + std::cout << std::endl; + if(b_md5_check) + { + std::string ref_md5_string(33, 0); + uint8_t ref_md5[16]; + ref_md5_file.open(md5_file_path.c_str(), std::ios::in); + if((ref_md5_file.rdstate() & std::ifstream::failbit) != 0) + { + std::cerr << "Failed to open MD5 file." << std::endl; + return 1; + } + ref_md5_file.getline(ref_md5_string.data(), ref_md5_string.length()); + if((ref_md5_file.rdstate() & std::ifstream::badbit) != 0) + { + std::cerr << "Failed to read MD5 digest string." << std::endl; + return 1; + } + for(int i = 0; i < 16; i++) + { + std::string part = ref_md5_string.substr(i * 2, 2); + ref_md5[i] = std::stoi(part, nullptr, 16); + } + if(memcmp(digest, ref_md5, 16) == 0) + { + std::cout << "MD5 digest matches the reference MD5 digest: "; + } + else + { + std::cout << "MD5 digest does not match the reference MD5 digest: "; + } + std::cout << ref_md5_string.c_str() << std::endl; + ref_md5_file.close(); + } + delete md5_generator; + } + if(b_use_ffmpeg_demuxer && demuxer) + { + delete demuxer; + } + else if(bs_reader) + { + rocDecDestroyBitstreamReader(bs_reader); + } + } + catch(const std::exception& ex) + { + std::cout << ex.what() << std::endl; + return 1; + } + + return 0; +} diff --git a/Libraries/rocDecode/video_decode_batch/.gitignore b/Libraries/rocDecode/video_decode_batch/.gitignore new file mode 100644 index 000000000..5114d95ae --- /dev/null +++ b/Libraries/rocDecode/video_decode_batch/.gitignore @@ -0,0 +1 @@ +rocdecode_video_decode_batch diff --git a/Libraries/rocDecode/video_decode_batch/CMakeLists.txt b/Libraries/rocDecode/video_decode_batch/CMakeLists.txt new file mode 100644 index 000000000..3f063eafe --- /dev/null +++ b/Libraries/rocDecode/video_decode_batch/CMakeLists.txt @@ -0,0 +1,129 @@ +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +set(example_name rocdecode_video_decode_batch) + +cmake_minimum_required(VERSION 3.21 FATAL_ERROR) +project(${example_name} LANGUAGES CXX) + +include("../../../Common/HipPlatform.cmake") +select_gpu_language() + +enable_language(${ROCM_EXAMPLES_GPU_LANGUAGE}) +set(CMAKE_${ROCM_EXAMPLES_GPU_LANGUAGE}_STANDARD 17) +set(CMAKE_${ROCM_EXAMPLES_GPU_LANGUAGE}_EXTENSIONS OFF) +set(CMAKE_${ROCM_EXAMPLES_GPU_LANGUAGE}_STANDARD_REQUIRED ON) +select_hip_platform() +verify_hip_platform(PLATFORMS "amd") + +if(CMAKE_SYSTEM_NAME MATCHES "Windows") + message(STATUS "rocDecode examples are only available on Linux") + return() +else() + set(ROCM_ROOT + "/opt/rocm" + CACHE PATH + "Root directory of the ROCm installation" + ) +endif() + +list(APPEND CMAKE_PREFIX_PATH "${ROCM_ROOT}") + +find_package(rocdecode REQUIRED) +find_package(rocprofiler-register REQUIRED) +find_package(Threads REQUIRED) + +# Find FFmpeg libraries +find_library(AVCODEC_LIBRARY avcodec REQUIRED) +find_library(AVFORMAT_LIBRARY avformat REQUIRED) +find_library(AVUTIL_LIBRARY avutil REQUIRED) + +find_path(AVCODEC_INCLUDE_DIR libavcodec/avcodec.h REQUIRED) +find_path(AVFORMAT_INCLUDE_DIR libavformat/avformat.h REQUIRED) +find_path(AVUTIL_INCLUDE_DIR libavutil/avutil.h REQUIRED) + +# Check FFmpeg version for compatibility using pkg-config (same as Makefile) +execute_process( + COMMAND pkg-config --modversion libavcodec + OUTPUT_VARIABLE AVCODEC_VERSION + OUTPUT_STRIP_TRAILING_WHITESPACE + ERROR_QUIET +) + +# If pkg-config fails, try to get version from header as fallback +if(NOT AVCODEC_VERSION AND AVCODEC_INCLUDE_DIR) + file(STRINGS "${AVCODEC_INCLUDE_DIR}/libavcodec/version.h" AVCODEC_VERSION_MAJOR_LINE + REGEX "^#define LIBAVCODEC_VERSION_MAJOR") + file(STRINGS "${AVCODEC_INCLUDE_DIR}/libavcodec/version.h" AVCODEC_VERSION_MINOR_LINE + REGEX "^#define LIBAVCODEC_VERSION_MINOR") + file(STRINGS "${AVCODEC_INCLUDE_DIR}/libavcodec/version.h" AVCODEC_VERSION_MICRO_LINE + REGEX "^#define LIBAVCODEC_VERSION_MICRO") + + string(REGEX REPLACE "^#define LIBAVCODEC_VERSION_MAJOR[ \t]+([0-9]+).*$" "\\1" AVCODEC_MAJOR "${AVCODEC_VERSION_MAJOR_LINE}") + string(REGEX REPLACE "^#define LIBAVCODEC_VERSION_MINOR[ \t]+([0-9]+).*$" "\\1" AVCODEC_MINOR "${AVCODEC_VERSION_MINOR_LINE}") + string(REGEX REPLACE "^#define LIBAVCODEC_VERSION_MICRO[ \t]+([0-9]+).*$" "\\1" AVCODEC_MICRO "${AVCODEC_VERSION_MICRO_LINE}") + + set(AVCODEC_VERSION "${AVCODEC_MAJOR}.${AVCODEC_MINOR}.${AVCODEC_MICRO}") +endif() + +add_executable(${example_name} + main.cpp + ${ROCM_ROOT}/share/rocdecode/utils/rocvideodecode/roc_video_dec.cpp +) + +target_link_libraries(${example_name} + PRIVATE + rocdecode::rocdecode + rocprofiler-register::rocprofiler-register + ${AVCODEC_LIBRARY} + ${AVFORMAT_LIBRARY} + ${AVUTIL_LIBRARY} + Threads::Threads + stdc++fs +) + +target_include_directories( + ${example_name} + PRIVATE + "../../../Common" + "../../../External" + "${ROCM_ROOT}/share/rocdecode/utils" + "${ROCM_ROOT}/share/rocdecode/utils/rocvideodecode" + ${AVCODEC_INCLUDE_DIR} + ${AVFORMAT_INCLUDE_DIR} + ${AVUTIL_INCLUDE_DIR} +) + +# FFMPEG multi-version support +if(AVCODEC_VERSION VERSION_LESS_EQUAL 58.134.100) + target_compile_definitions(${example_name} PRIVATE USE_AVCODEC_GREATER_THAN_58_134=0) +else() + target_compile_definitions(${example_name} PRIVATE USE_AVCODEC_GREATER_THAN_58_134=1) +endif() + +set_source_files_properties( + main.cpp + ${ROCM_ROOT}/share/rocdecode/utils/rocvideodecode/roc_video_dec.cpp + PROPERTIES LANGUAGE ${ROCM_EXAMPLES_GPU_LANGUAGE} +) + +install(TARGETS ${example_name}) diff --git a/Libraries/rocDecode/video_decode_batch/Makefile b/Libraries/rocDecode/video_decode_batch/Makefile new file mode 100644 index 000000000..03a43b1aa --- /dev/null +++ b/Libraries/rocDecode/video_decode_batch/Makefile @@ -0,0 +1,81 @@ +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +EXAMPLE := rocdecode_video_decode_batch +COMMON_INCLUDE_DIR := ../../../Common +EXTERNAL_DIR := ../../../External +GPU_RUNTIME := HIP + +# HIP variables +ROCM_INSTALL_DIR := /opt/rocm +UTILS_DIR := ${ROCM_INSTALL_DIR}/share/rocdecode/utils + +HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include +ROCDECODE_INCLUDE_DIR := $(HIP_INCLUDE_DIR) + +HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc + +# Common variables and flags +CXX_STD := c++17 +ICXXFLAGS := -std=$(CXX_STD) +ICPPFLAGS := -isystem $(ROCDECODE_INCLUDE_DIR) -I $(COMMON_INCLUDE_DIR) -I $(EXTERNAL_DIR) -I $(UTILS_DIR) -I $(UTILS_DIR)/rocvideodecode +ILDFLAGS := -L $(ROCM_INSTALL_DIR)/lib +ILDLIBS := -lrocdecode -lrocprofiler-register -lavcodec -lavformat -lavutil -lpthread -lstdc++fs + +ifeq ($(GPU_RUNTIME), HIP) + CXXFLAGS ?= -Wall -Wextra + CPPFLAGS += -D__HIP_PLATFORM_AMD__ + + # FFmpeg version detection + AVCODEC_VERSION := $(shell pkg-config --modversion libavcodec 2>/dev/null || echo "0") + AVCODEC_MAJOR := $(shell echo $(AVCODEC_VERSION) | cut -d. -f1) + AVCODEC_MINOR := $(shell echo $(AVCODEC_VERSION) | cut -d. -f2) + AVCODEC_PATCH := $(shell echo $(AVCODEC_VERSION) | cut -d. -f3) + + # Compare version (58.134.100) + ifeq ($(shell test $(AVCODEC_MAJOR) -lt 58 || \ + (test $(AVCODEC_MAJOR) -eq 58 && test $(AVCODEC_MINOR) -lt 134) || \ + (test $(AVCODEC_MAJOR) -eq 58 && test $(AVCODEC_MINOR) -eq 134 && test $(AVCODEC_PATCH) -le 100); echo $$?),0) + CPPFLAGS += -DUSE_AVCODEC_GREATER_THAN_58_134=0 + else + CPPFLAGS += -DUSE_AVCODEC_GREATER_THAN_58_134=1 + endif + + COMPILER := $(HIPCXX) +else + $(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be HIP; CUDA not supported) +endif + +ICXXFLAGS += $(CXXFLAGS) +ICPPFLAGS += $(CPPFLAGS) +ILDFLAGS += $(LDFLAGS) +ILDLIBS += $(LDLIBS) + +SOURCES := main.cpp $(UTILS_DIR)/rocvideodecode/roc_video_dec.cpp + +$(EXAMPLE): $(SOURCES) $(COMMON_INCLUDE_DIR)/example_utils.hpp $(COMMON_INCLUDE_DIR)/rocdecode_utils.hpp + $(COMPILER) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $(SOURCES) $(ILDLIBS) + +clean: + $(RM) $(EXAMPLE) + +.PHONY: clean diff --git a/Libraries/rocDecode/video_decode_batch/README.md b/Libraries/rocDecode/video_decode_batch/README.md new file mode 100644 index 000000000..f014f4eac --- /dev/null +++ b/Libraries/rocDecode/video_decode_batch/README.md @@ -0,0 +1,120 @@ +# rocDecode Batch Video Decode + +## Description + +This example demonstrates batch video decoding using multiple threads with the rocDecode library. The sample decodes multiple video files concurrently, distributing the workload across multiple threads to maximize GPU utilization and throughput. It showcases efficient multi-threaded video decoding with configurable thread count and automatic load balancing across available files. + +## Application Flow + +1. Parse command-line arguments for input directory, number of threads, device ID, and output options. +2. Scan the input directory to collect all video files. +3. Determine the optimal number of threads based on file count and user request (maximum 64 threads). +4. Distribute video files across threads using round-robin assignment. +5. For each thread: + - Initialize a separate video demuxer for assigned files. + - Create a dedicated decoder instance. + - Process assigned files sequentially within the thread. + - Decode all frames from each file. + - Optionally save decoded frames to output. + - Generate MD5 digest for validation if requested. +6. Synchronize all threads and collect decoding statistics. +7. Display aggregate performance metrics including total frames decoded and throughput. +8. Clean up all decoder and demuxer resources. + +## Key APIs and Concepts + +- **Multi-Threading**: The sample creates multiple threads, each with its own decoder instance, to process video files in parallel. This approach maximizes GPU utilization by keeping the hardware decoder busy with multiple decode streams. + +- **Thread-Safe Decoder Instances**: Each thread maintains its own: + - `rocDecCreateDecoder()`: Creates an independent decoder instance per thread. + - Video demuxer for reading input files. + - Frame buffers and output resources. + - This design avoids synchronization overhead and allows true parallel decoding. + +- **Load Balancing**: Files are distributed across threads using round-robin assignment: + - If files > threads: Multiple files per thread, distributed evenly. + - If files < threads: One file per thread, unused threads are not created. + - Ensures balanced workload across all active threads. + +- **Decoder Configuration**: Each decoder is configured with: + - Device ID for GPU selection. + - Output surface memory type (typically device internal for performance). + - Codec-specific parameters extracted from input files. + - Display delay and surface pool size for optimal throughput. + +- **Frame Processing Pipeline**: Within each thread: + - `rocDecParseVideoData()`: Parses video packets from demuxer. + - `rocDecDecodeFrame()`: Decodes frames using hardware acceleration. + - `rocDecGetVideoFrame()`: Retrieves decoded frames. + - Frames are processed and released to maintain decoder surface pool. + +- **Performance Optimization**: + - Parallel decoding across multiple streams. + - Minimal synchronization between threads. + - Efficient memory management with surface reuse. + - Optional output to avoid I/O bottlenecks during performance testing. + +## Demonstrated API Calls + +### rocDecode APIs + +- `rocDecCreateDecoder` +- `rocDecDecodeFrame` +- `rocDecGetVideoFrame` +- `rocDecGetDecodeStatus` +- `rocDecDestroyDecoder` +- `rocDecCreateVideoParser` +- `rocDecParseVideoData` +- `rocDecDestroyVideoParser` +- `rocDecGetErrorName` + +### HIP Runtime APIs + +- `hipSetDevice` +- `hipGetDeviceCount` +- `hipGetDeviceProperties` +- `hipMalloc` +- `hipFree` +- `hipMemcpy` +- `hipMemcpyDtoH` + +### FFMPEG APIs + +- `avformat_open_input` +- `avformat_find_stream_info` +- `av_find_best_stream` +- `av_read_frame` +- `av_packet_alloc` +- `av_packet_free` +- `av_packet_unref` +- `avformat_close_input` +- `av_bsf_get_by_name` +- `av_bsf_alloc` +- `av_bsf_init` +- `av_bsf_send_packet` +- `av_bsf_receive_packet` +- `av_bsf_free` + +### C++ Standard Library (Threading) + +- `std::thread` +- `std::vector` +- `std::mutex` +- `std::chrono` + +### Data Types and Enums + +- `rocDecDecoderHandle` +- `RocdecVideoParser` +- `rocDecVideoCodec` +- `rocDecVideoSurfaceFormat` +- `rocDecVideoChromaFormat` +- `rocDecDecoderCreateInfo` +- `RocdecParserParams` +- `RocdecVideoFormat` +- `RocdecPicParams` +- `RocdecParserDispInfo` +- `rocDecDecodeStatus` +- `AVCodecID` +- `AVFormatContext` +- `AVPacket` diff --git a/Libraries/rocDecode/video_decode_batch/main.cpp b/Libraries/rocDecode/video_decode_batch/main.cpp new file mode 100644 index 000000000..ad7ad97c6 --- /dev/null +++ b/Libraries/rocDecode/video_decode_batch/main.cpp @@ -0,0 +1,708 @@ +/* +Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if __cplusplus >= 201703L && __has_include() + #include +#else + #include +#endif +#include "CmdParser/cmdparser.hpp" +#include "roc_video_dec.h" +#include "video_demuxer.h" + +#include "rocdecode_utils.hpp" + +class ThreadPool +{ +public: + ThreadPool(int n_threads) : shutdown_(false) + { + // Create the specified number of threads + threads_.reserve(n_threads); + for(int i = 0; i < n_threads; ++i) + { + threads_.emplace_back(std::bind(&ThreadPool::ThreadEntry, this, i)); + } + } + + ~ThreadPool() {} + + void JoinThreads() + { + { + // Unblock any threads and tell them to stop + std::unique_lock lock(mutex_); + shutdown_ = true; + cond_var_.notify_all(); + } + + // Wait for all threads to stop + for(auto& thread : threads_) + { + thread.join(); + } + } + + void ExecuteJob(std::function func) + { + // Place a job on the queue and unblock a thread + std::unique_lock lock(mutex_); + decode_jobs_queue_.emplace(std::move(func)); + cond_var_.notify_one(); + } + +protected: + void ThreadEntry(int /*i*/) + { + std::function execute_decode_job; + + while(true) + { + { + std::unique_lock lock(mutex_); + cond_var_.wait(lock, [&] { return shutdown_ || !decode_jobs_queue_.empty(); }); + if(decode_jobs_queue_.empty()) + { + // No jobs to do; shutting down + return; + } + + execute_decode_job = std::move(decode_jobs_queue_.front()); + decode_jobs_queue_.pop(); + } + + // Execute the decode job without holding any locks + execute_decode_job(); + } + } + + std::mutex mutex_; + std::condition_variable cond_var_; + bool shutdown_; + std::queue> decode_jobs_queue_; + std::vector threads_; +}; + +struct DecoderInfo +{ + int dec_device_id; + std::unique_ptr viddec; + std::uint32_t bit_depth; + rocDecVideoCodec rocdec_codec_id; + std::atomic_bool decoding_complete; + + DecoderInfo() : dec_device_id(0), viddec(nullptr), bit_depth(8), decoding_complete(false) {} +}; + +void DecProc(RocVideoDecoder* p_dec, + VideoDemuxer* demuxer, + int* pn_frame, + double* pn_fps, + std::atomic_bool& decoding_complete, + bool& b_dump_output_frames, + std::string& output_file_name, + OutputSurfaceMemoryType mem_type) +{ + int n_video_bytes = 0, n_frame_returned = 0, n_frame = 0; + uint8_t * p_video = nullptr, *p_frame = nullptr; + int64_t pts = 0; + double total_dec_time = 0.0; + OutputSurfaceInfo* surf_info; + auto start_time = std::chrono::high_resolution_clock::now(); + do + { + demuxer->Demux(&p_video, &n_video_bytes, &pts); + n_frame_returned = p_dec->DecodeFrame(p_video, n_video_bytes, 0, pts); + n_frame += n_frame_returned; + if(b_dump_output_frames && mem_type != OUT_SURFACE_MEM_NOT_MAPPED) + { + if(n_frame_returned) + { + if(!p_dec->GetOutputSurfaceInfo(&surf_info)) + { + std::cerr << "Error: Failed to get Output Surface Info!" << std::endl; + break; + } + } + for(int i = 0; i < n_frame_returned; i++) + { + p_frame = p_dec->GetFrame(&pts); + p_dec->SaveFrameToFile(output_file_name, p_frame, surf_info); + // release frame + p_dec->ReleaseFrame(pts); + } + } + } + while(n_video_bytes); + n_frame += p_dec->GetNumOfFlushedFrames(); + + auto end_time = std::chrono::high_resolution_clock::now(); + auto time_per_decode = std::chrono::duration(end_time - start_time).count(); + + // Calculate average decoding time + total_dec_time = time_per_decode; + double average_decoding_time = total_dec_time / n_frame; + double n_fps = 1000 / average_decoding_time; + *pn_fps = n_fps; + *pn_frame = n_frame; + p_dec->ResetSaveFrameToFile(); + decoding_complete = true; +} + +int main(int argc, char** argv) +{ + // Parse command-line arguments + cli::Parser parser(argc, argv); + parser.set_required("i", "input", "Directory containing input video files"); + parser.set_optional("t", "threads", 4, "Number of threads (1 >= n_thread <= 64)"); + parser.set_optional("d", "device", 0, "Device ID (>= 0)"); + parser.set_optional("o", "output", "", "Directory for output YUV files"); + parser.set_optional("m", + "mem_type", + 3, + "Output surface memory type [0: DEV_INTERNAL, 1: DEV_COPIED, 2: " + "HOST_COPIED, 3: NOT_MAPPED]"); + parser.set_optional("disp_delay", + "disp_delay", + 1, + "Number of frames to be delayed for display"); + parser.run_and_exit_if_error(); + + std::string input_folder_path = parser.get("i"); + std::string output_folder_path = parser.get("o"); + int device_id = parser.get("d"); + int n_thread = parser.get("t"); + int disp_delay = parser.get("disp_delay"); + OutputSurfaceMemoryType mem_type = static_cast(parser.get("m")); + + if(n_thread <= 0 || n_thread > 64) + { + std::cerr << "Error: Number of threads must be between 1 and 64" << std::endl; + return 1; + } + + if(device_id < 0) + { + std::cerr << "Error: Device ID must be >= 0" << std::endl; + return 1; + } + + bool b_dump_output_frames = !output_folder_path.empty(); + if(b_dump_output_frames) + { +#if __cplusplus >= 201703L && __has_include() + if(std::filesystem::is_directory(output_folder_path)) + { + std::filesystem::remove_all(output_folder_path); + } + std::filesystem::create_directory(output_folder_path); +#else + if(std::experimental::filesystem::is_directory(output_folder_path)) + { + std::experimental::filesystem::remove_all(output_folder_path); + } + std::experimental::filesystem::create_directory(output_folder_path); +#endif + } + + int num_files = 0; + Rect* p_crop_rect = nullptr; + bool b_extract_sei_messages = false; + bool b_force_zero_latency = false; + std::vector input_file_names; + + try + { +#if __cplusplus >= 201703L && __has_include() + for(const auto& entry : std::filesystem::directory_iterator(input_folder_path)) + { +#else + for(const auto& entry : + std::experimental::filesystem::directory_iterator(input_folder_path)) + { +#endif + input_file_names.push_back(entry.path()); + num_files++; + } + + std::vector output_file_names(num_files); + n_thread = ((n_thread > num_files) ? num_files : n_thread); + int num_devices = 0, sd = 0; + hipError_t hip_status = hipSuccess; + hipDeviceProp_t hip_dev_prop; + std::string gcn_arch_name; + hip_status = hipGetDeviceCount(&num_devices); + if(hip_status != hipSuccess) + { + std::cout << "ERROR: hipGetDeviceCount failed! (" << hip_status << ")" << std::endl; + return -1; + } + if(num_devices < 1) + { + ROCDEC_ERR("ERROR: didn't find any GPU!"); + return -1; + } + + hip_status = hipGetDeviceProperties(&hip_dev_prop, device_id); + if(hip_status != hipSuccess) + { + ROCDEC_ERR("ERROR: hipGetDeviceProperties for device (" + TOSTR(device_id) + + " ) failed! (" + hipGetErrorName(hip_status) + ")"); + return -1; + } + + gcn_arch_name = hip_dev_prop.gcnArchName; + std::size_t pos = gcn_arch_name.find_first_of(":"); + std::string gcn_arch_name_base + = (pos != std::string::npos) ? gcn_arch_name.substr(0, pos) : gcn_arch_name; + + // gfx90a has two GCDs as two separate devices + if(!gcn_arch_name_base.compare("gfx90a") && num_devices > 1) + { + sd = 1; + } + + std::string device_name; + int pci_bus_id, pci_domain_id, pci_device_id; + double total_fps = 0; + int n_total = 0; + std::vector v_fps; + std::vector v_frame; + v_fps.resize(num_files, 0); + v_frame.resize(num_files, 0); + int hip_vis_dev_count = 0; + get_env_var("HIP_VISIBLE_DEVICES", hip_vis_dev_count); + + std::cout << "info: Number of threads: " << n_thread << std::endl; + + std::vector> v_demuxer(num_files); + std::unique_ptr dec_8bit_avc(nullptr), dec_8bit_hevc(nullptr), + dec_10bit_hevc(nullptr), dec_8bit_av1(nullptr), dec_10bit_av1(nullptr), + dec_8bit_vp9(nullptr), dec_10bit_vp9(nullptr); + std::vector> v_dec_info; + ThreadPool thread_pool(n_thread); + + //reconfig parameters + ReconfigParams reconfig_params = {}; + reconfig_dump_file_struct reconfig_user_struct = {}; + reconfig_params.p_fn_reconfigure_flush = reconfigure_flush_callback; + if(!b_dump_output_frames) + { + reconfig_user_struct.b_dump_frames_to_file = false; + reconfig_params.reconfig_flush_mode = RECONFIG_FLUSH_MODE_NONE; + } + else + { + reconfig_user_struct.b_dump_frames_to_file = true; + reconfig_params.reconfig_flush_mode = RECONFIG_FLUSH_MODE_DUMP_TO_FILE; + } + reconfig_params.p_reconfig_user_struct = &reconfig_user_struct; + + for(int i = 0; i < num_files; i++) + { + std::unique_ptr demuxer(new VideoDemuxer(input_file_names[i].c_str())); + v_demuxer[i] = std::move(demuxer); + std::size_t found_file = input_file_names[i].find_last_of('/'); + input_file_names[i] = input_file_names[i].substr(found_file + 1); + if(b_dump_output_frames) + { + std::size_t found_ext = input_file_names[i].find_last_of('.'); + std::string path = output_folder_path + "/output_" + + input_file_names[i].substr(0, found_ext) + ".yuv"; + output_file_names[i] = path; + } + } + + for(int i = 0; i < n_thread; i++) + { + v_dec_info.emplace_back(std::make_unique()); + if(!hip_vis_dev_count) + { + if(device_id % 2 == 0) + { + v_dec_info[i]->dec_device_id = (i % 2 == 0) ? device_id : device_id + sd; + } + else + { + v_dec_info[i]->dec_device_id = (i % 2 == 0) ? device_id - sd : device_id; + } + } + else + { + v_dec_info[i]->dec_device_id = i % hip_vis_dev_count; + } + + v_dec_info[i]->rocdec_codec_id = AVCodec2RocDecVideoCodec(v_demuxer[i]->GetCodecID()); + v_dec_info[i]->bit_depth = v_demuxer[i]->GetBitDepth(); + if(v_dec_info[i]->bit_depth == 8) + { + if(v_dec_info[i]->rocdec_codec_id == rocDecVideoCodec_AVC) + { + std::unique_ptr dec_8bit_avc( + new RocVideoDecoder(v_dec_info[i]->dec_device_id, + mem_type, + v_dec_info[i]->rocdec_codec_id, + b_force_zero_latency, + p_crop_rect, + b_extract_sei_messages, + disp_delay)); + v_dec_info[i]->viddec = std::move(dec_8bit_avc); + } + else if(v_dec_info[i]->rocdec_codec_id == rocDecVideoCodec_HEVC) + { + std::unique_ptr dec_8bit_hevc( + new RocVideoDecoder(v_dec_info[i]->dec_device_id, + mem_type, + v_dec_info[i]->rocdec_codec_id, + b_force_zero_latency, + p_crop_rect, + b_extract_sei_messages, + disp_delay)); + v_dec_info[i]->viddec = std::move(dec_8bit_hevc); + } + else if(v_dec_info[i]->rocdec_codec_id == rocDecVideoCodec_AV1) + { + std::unique_ptr dec_8bit_av1( + new RocVideoDecoder(v_dec_info[i]->dec_device_id, + mem_type, + v_dec_info[i]->rocdec_codec_id, + b_force_zero_latency, + p_crop_rect, + b_extract_sei_messages, + disp_delay)); + v_dec_info[i]->viddec = std::move(dec_8bit_av1); + } + else if(v_dec_info[i]->rocdec_codec_id == rocDecVideoCodec_VP9) + { + std::unique_ptr dec_8bit_vp9( + new RocVideoDecoder(v_dec_info[i]->dec_device_id, + mem_type, + v_dec_info[i]->rocdec_codec_id, + b_force_zero_latency, + p_crop_rect, + b_extract_sei_messages, + disp_delay)); + v_dec_info[i]->viddec = std::move(dec_8bit_vp9); + } + else + { + ROCDEC_ERR("ERROR: codec type is not supported!"); + return -1; + } + } + else + { //bit depth = 10bit + if(v_dec_info[i]->rocdec_codec_id == rocDecVideoCodec_HEVC) + { + std::unique_ptr dec_10bit_hevc( + new RocVideoDecoder(v_dec_info[i]->dec_device_id, + mem_type, + v_dec_info[i]->rocdec_codec_id, + b_force_zero_latency, + p_crop_rect, + b_extract_sei_messages, + disp_delay)); + v_dec_info[i]->viddec = std::move(dec_10bit_hevc); + } + else if(v_dec_info[i]->rocdec_codec_id == rocDecVideoCodec_AV1) + { + std::unique_ptr dec_10bit_av1( + new RocVideoDecoder(v_dec_info[i]->dec_device_id, + mem_type, + v_dec_info[i]->rocdec_codec_id, + b_force_zero_latency, + p_crop_rect, + b_extract_sei_messages, + disp_delay)); + v_dec_info[i]->viddec = std::move(dec_10bit_av1); + } + else if(v_dec_info[i]->rocdec_codec_id == rocDecVideoCodec_VP9) + { + std::unique_ptr dec_10bit_vp9( + new RocVideoDecoder(v_dec_info[i]->dec_device_id, + mem_type, + v_dec_info[i]->rocdec_codec_id, + b_force_zero_latency, + p_crop_rect, + b_extract_sei_messages, + disp_delay)); + v_dec_info[i]->viddec = std::move(dec_10bit_vp9); + } + else + { + ROCDEC_ERR("ERROR: codec type is not supported!"); + return -1; + } + } + + v_dec_info[i]->viddec->GetDeviceinfo(device_name, + gcn_arch_name, + pci_bus_id, + pci_domain_id, + pci_device_id); + std::cout << "info: decoding " << input_file_names[i] << " using GPU device " + << v_dec_info[i]->dec_device_id << " - " << device_name << "[" + << gcn_arch_name << "] on PCI bus " << std::setfill('0') << std::setw(2) + << std::right << std::hex << pci_bus_id << ":" << std::setfill('0') + << std::setw(2) << std::right << std::hex << pci_domain_id << "." + << pci_device_id << std::dec << std::endl; + } + + std::mutex mutex; + + for(int j = 0; j < num_files; j++) + { + int thread_idx = j % n_thread; + if(j >= n_thread) + { + { + std::unique_lock lock(mutex); + while(!v_dec_info[thread_idx]->decoding_complete) + { + } + v_dec_info[thread_idx]->decoding_complete = false; + } + uint32_t bit_depth = v_demuxer[j]->GetBitDepth(); + rocDecVideoCodec codec_id = AVCodec2RocDecVideoCodec(v_demuxer[j]->GetCodecID()); + if(v_dec_info[thread_idx]->bit_depth != bit_depth + || v_dec_info[thread_idx]->rocdec_codec_id != codec_id) + { + if(bit_depth == 8) + { // can be HEVC or H.264 or AV1 + if(dec_8bit_avc == nullptr && codec_id == rocDecVideoCodec_AVC) + { + std::unique_ptr dec_8bit_avc( + new RocVideoDecoder(v_dec_info[thread_idx]->dec_device_id, + mem_type, + codec_id, + b_force_zero_latency, + p_crop_rect, + b_extract_sei_messages, + disp_delay)); + v_dec_info[thread_idx]->viddec = std::move(dec_8bit_avc); + } + else if(dec_8bit_hevc == nullptr && codec_id == rocDecVideoCodec_HEVC) + { + std::unique_ptr dec_8bit_hevc( + new RocVideoDecoder(v_dec_info[thread_idx]->dec_device_id, + mem_type, + codec_id, + b_force_zero_latency, + p_crop_rect, + b_extract_sei_messages, + disp_delay)); + v_dec_info[thread_idx]->viddec = std::move(dec_8bit_hevc); + } + else if(dec_8bit_av1 == nullptr && codec_id == rocDecVideoCodec_AV1) + { + std::unique_ptr dec_8bit_av1( + new RocVideoDecoder(v_dec_info[thread_idx]->dec_device_id, + mem_type, + codec_id, + b_force_zero_latency, + p_crop_rect, + b_extract_sei_messages, + disp_delay)); + v_dec_info[thread_idx]->viddec = std::move(dec_8bit_av1); + } + else if(dec_8bit_av1 == nullptr && codec_id == rocDecVideoCodec_VP9) + { + std::unique_ptr dec_8bit_vp9( + new RocVideoDecoder(v_dec_info[thread_idx]->dec_device_id, + mem_type, + codec_id, + b_force_zero_latency, + p_crop_rect, + b_extract_sei_messages, + disp_delay)); + v_dec_info[thread_idx]->viddec = std::move(dec_8bit_vp9); + } + else + { + if(codec_id == rocDecVideoCodec_AVC) + { + v_dec_info[thread_idx]->viddec.swap(dec_8bit_avc); + } + else if(codec_id == rocDecVideoCodec_HEVC) + { + v_dec_info[thread_idx]->viddec.swap(dec_8bit_hevc); + } + else if(codec_id == rocDecVideoCodec_AV1) + { + v_dec_info[thread_idx]->viddec.swap(dec_8bit_av1); + } + else if(codec_id == rocDecVideoCodec_VP9) + { + v_dec_info[thread_idx]->viddec.swap(dec_8bit_vp9); + } + else + { + ROCDEC_ERR("ERROR: codec type is not supported!"); + return -1; + } + } + v_dec_info[thread_idx]->bit_depth = bit_depth; + v_dec_info[thread_idx]->rocdec_codec_id = codec_id; + } + else + { // bit_depth = 10bit; HEVC or AV1 + if(dec_10bit_hevc == nullptr && codec_id == rocDecVideoCodec_HEVC) + { + std::unique_ptr dec_10bit_hevc( + new RocVideoDecoder(v_dec_info[thread_idx]->dec_device_id, + mem_type, + codec_id, + b_force_zero_latency, + p_crop_rect, + b_extract_sei_messages, + disp_delay)); + v_dec_info[thread_idx]->viddec = std::move(dec_10bit_hevc); + } + else if(dec_10bit_av1 == nullptr && codec_id == rocDecVideoCodec_AV1) + { + std::unique_ptr dec_10bit_av1( + new RocVideoDecoder(v_dec_info[thread_idx]->dec_device_id, + mem_type, + codec_id, + b_force_zero_latency, + p_crop_rect, + b_extract_sei_messages, + disp_delay)); + v_dec_info[thread_idx]->viddec = std::move(dec_10bit_av1); + } + else if(dec_10bit_vp9 == nullptr && codec_id == rocDecVideoCodec_VP9) + { + std::unique_ptr dec_10bit_vp9( + new RocVideoDecoder(v_dec_info[thread_idx]->dec_device_id, + mem_type, + codec_id, + b_force_zero_latency, + p_crop_rect, + b_extract_sei_messages, + disp_delay)); + v_dec_info[thread_idx]->viddec = std::move(dec_10bit_vp9); + } + else + { + if(codec_id == rocDecVideoCodec_HEVC) + { + v_dec_info[thread_idx]->viddec.swap(dec_10bit_hevc); + } + else if(codec_id == rocDecVideoCodec_AV1) + { + v_dec_info[thread_idx]->viddec.swap(dec_10bit_av1); + } + else if(codec_id == rocDecVideoCodec_VP9) + { + v_dec_info[thread_idx]->viddec.swap(dec_10bit_vp9); + } + else + { + ROCDEC_ERR("ERROR: codec type is not supported!"); + return -1; + } + } + v_dec_info[thread_idx]->bit_depth = bit_depth; + v_dec_info[thread_idx]->rocdec_codec_id = codec_id; + } + } + v_dec_info[thread_idx]->viddec->GetDeviceinfo(device_name, + gcn_arch_name, + pci_bus_id, + pci_domain_id, + pci_device_id); + std::cout << "info: decoding " << input_file_names[j] << " using GPU device " + << v_dec_info[thread_idx]->dec_device_id << " - " << device_name << "[" + << gcn_arch_name << "] on PCI bus " << std::setfill('0') << std::setw(2) + << std::right << std::hex << pci_bus_id << ":" << std::setfill('0') + << std::setw(2) << std::right << std::hex << pci_domain_id << "." + << pci_device_id << std::dec << std::endl; + } + if(!v_dec_info[thread_idx]->viddec->CodecSupported( + v_dec_info[thread_idx]->dec_device_id, + v_dec_info[thread_idx]->rocdec_codec_id, + v_dec_info[thread_idx]->bit_depth)) + { + std::cerr << "Codec not supported on GPU, skipping this file!" << std::endl; + v_dec_info[thread_idx]->decoding_complete = true; + continue; + } + thread_pool.ExecuteJob(std::bind(DecProc, + v_dec_info[thread_idx]->viddec.get(), + v_demuxer[j].get(), + &v_frame[j], + &v_fps[j], + std::ref(v_dec_info[thread_idx]->decoding_complete), + b_dump_output_frames, + output_file_names[j], + mem_type)); + } + + thread_pool.JoinThreads(); + for(int i = 0; i < num_files; i++) + { + total_fps += v_fps[i] * static_cast(n_thread) / static_cast(num_files); + n_total += v_frame[i]; + } + if(!b_dump_output_frames) + { + std::cout << "info: Total frame decoded: " << n_total << std::endl; + std::cout << "info: avg decoding time per frame: " << 1000 / total_fps << " ms" + << std::endl; + std::cout << "info: avg FPS: " << total_fps << std::endl; + } + else + { + if(mem_type == OUT_SURFACE_MEM_NOT_MAPPED) + { + std::cout << "info: saving frames with -m 3 option is not supported!" << std::endl; + } + else + { + for(int i = 0; i < num_files; i++) + { + std::cout << "info: saved frames into " << output_file_names[i] << std::endl; + } + } + } + } + catch(const std::exception& ex) + { + std::cout << ex.what() << std::endl; + } + + return 0; +} diff --git a/Libraries/rocDecode/video_decode_mem/.gitignore b/Libraries/rocDecode/video_decode_mem/.gitignore new file mode 100644 index 000000000..4bc434920 --- /dev/null +++ b/Libraries/rocDecode/video_decode_mem/.gitignore @@ -0,0 +1 @@ +rocdecode_video_decode_mem diff --git a/Libraries/rocDecode/video_decode_mem/CMakeLists.txt b/Libraries/rocDecode/video_decode_mem/CMakeLists.txt new file mode 100644 index 000000000..31077df33 --- /dev/null +++ b/Libraries/rocDecode/video_decode_mem/CMakeLists.txt @@ -0,0 +1,124 @@ +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +set(example_name rocdecode_video_decode_mem) + +cmake_minimum_required(VERSION 3.21 FATAL_ERROR) +project(${example_name} LANGUAGES CXX) + +include("../../../Common/HipPlatform.cmake") +select_gpu_language() + +enable_language(${ROCM_EXAMPLES_GPU_LANGUAGE}) +set(CMAKE_${ROCM_EXAMPLES_GPU_LANGUAGE}_STANDARD 17) +set(CMAKE_${ROCM_EXAMPLES_GPU_LANGUAGE}_EXTENSIONS OFF) +set(CMAKE_${ROCM_EXAMPLES_GPU_LANGUAGE}_STANDARD_REQUIRED ON) +select_hip_platform() +verify_hip_platform(PLATFORMS "amd") + +if(CMAKE_SYSTEM_NAME MATCHES "Windows") + message(STATUS "rocDecode examples are only available on Linux") + return() +else() + set(ROCM_ROOT + "/opt/rocm" + CACHE PATH + "Root directory of the ROCm installation" + ) +endif() + +list(APPEND CMAKE_PREFIX_PATH "${ROCM_ROOT}") + +find_package(rocdecode REQUIRED) + +# Find FFmpeg libraries +find_library(AVCODEC_LIBRARY avcodec REQUIRED) +find_library(AVFORMAT_LIBRARY avformat REQUIRED) +find_library(AVUTIL_LIBRARY avutil REQUIRED) + +find_path(AVCODEC_INCLUDE_DIR libavcodec/avcodec.h REQUIRED) +find_path(AVFORMAT_INCLUDE_DIR libavformat/avformat.h REQUIRED) +find_path(AVUTIL_INCLUDE_DIR libavutil/avutil.h REQUIRED) + +# Check FFmpeg version for compatibility using pkg-config (same as Makefile) +execute_process( + COMMAND pkg-config --modversion libavcodec + OUTPUT_VARIABLE AVCODEC_VERSION + OUTPUT_STRIP_TRAILING_WHITESPACE + ERROR_QUIET +) + +# If pkg-config fails, try to get version from header as fallback +if(NOT AVCODEC_VERSION AND AVCODEC_INCLUDE_DIR) + file(STRINGS "${AVCODEC_INCLUDE_DIR}/libavcodec/version.h" AVCODEC_VERSION_MAJOR_LINE + REGEX "^#define LIBAVCODEC_VERSION_MAJOR") + file(STRINGS "${AVCODEC_INCLUDE_DIR}/libavcodec/version.h" AVCODEC_VERSION_MINOR_LINE + REGEX "^#define LIBAVCODEC_VERSION_MINOR") + file(STRINGS "${AVCODEC_INCLUDE_DIR}/libavcodec/version.h" AVCODEC_VERSION_MICRO_LINE + REGEX "^#define LIBAVCODEC_VERSION_MICRO") + + string(REGEX REPLACE "^#define LIBAVCODEC_VERSION_MAJOR[ \t]+([0-9]+).*$" "\\1" AVCODEC_MAJOR "${AVCODEC_VERSION_MAJOR_LINE}") + string(REGEX REPLACE "^#define LIBAVCODEC_VERSION_MINOR[ \t]+([0-9]+).*$" "\\1" AVCODEC_MINOR "${AVCODEC_VERSION_MINOR_LINE}") + string(REGEX REPLACE "^#define LIBAVCODEC_VERSION_MICRO[ \t]+([0-9]+).*$" "\\1" AVCODEC_MICRO "${AVCODEC_VERSION_MICRO_LINE}") + + set(AVCODEC_VERSION "${AVCODEC_MAJOR}.${AVCODEC_MINOR}.${AVCODEC_MICRO}") +endif() + +add_executable(${example_name} + main.cpp + ${ROCM_ROOT}/share/rocdecode/utils/rocvideodecode/roc_video_dec.cpp +) + +target_link_libraries(${example_name} + PRIVATE + rocdecode::rocdecode + ${AVCODEC_LIBRARY} + ${AVFORMAT_LIBRARY} + ${AVUTIL_LIBRARY} +) + +target_include_directories( + ${example_name} + PRIVATE + "../../../Common" + "../../../External" + "${ROCM_ROOT}/share/rocdecode/utils" + "${ROCM_ROOT}/share/rocdecode/utils/rocvideodecode" + ${AVCODEC_INCLUDE_DIR} + ${AVFORMAT_INCLUDE_DIR} + ${AVUTIL_INCLUDE_DIR} +) + +# FFMPEG multi-version support +if(AVCODEC_VERSION VERSION_LESS_EQUAL 58.134.100) + target_compile_definitions(${example_name} PRIVATE USE_AVCODEC_GREATER_THAN_58_134=0) +else() + target_compile_definitions(${example_name} PRIVATE USE_AVCODEC_GREATER_THAN_58_134=1) +endif() + +set_source_files_properties( + main.cpp + ${ROCM_ROOT}/share/rocdecode/utils/rocvideodecode/roc_video_dec.cpp + PROPERTIES LANGUAGE ${ROCM_EXAMPLES_GPU_LANGUAGE} +) + +install(TARGETS ${example_name}) diff --git a/Libraries/rocDecode/video_decode_mem/Makefile b/Libraries/rocDecode/video_decode_mem/Makefile new file mode 100644 index 000000000..0b317d0e1 --- /dev/null +++ b/Libraries/rocDecode/video_decode_mem/Makefile @@ -0,0 +1,81 @@ +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +EXAMPLE := rocdecode_video_decode_mem +COMMON_INCLUDE_DIR := ../../../Common +EXTERNAL_DIR := ../../../External +GPU_RUNTIME := HIP + +# HIP variables +ROCM_INSTALL_DIR := /opt/rocm +UTILS_DIR := ${ROCM_INSTALL_DIR}/share/rocdecode/utils + +HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include +ROCDECODE_INCLUDE_DIR := $(HIP_INCLUDE_DIR) + +HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc + +# Common variables and flags +CXX_STD := c++17 +ICXXFLAGS := -std=$(CXX_STD) +ICPPFLAGS := -isystem $(ROCDECODE_INCLUDE_DIR) -I $(COMMON_INCLUDE_DIR) -I $(EXTERNAL_DIR) -I $(UTILS_DIR) -I $(UTILS_DIR)/rocvideodecode +ILDFLAGS := -L $(ROCM_INSTALL_DIR)/lib +ILDLIBS := -lrocdecode -lavcodec -lavformat -lavutil + +ifeq ($(GPU_RUNTIME), HIP) + CXXFLAGS ?= -Wall -Wextra + CPPFLAGS += -D__HIP_PLATFORM_AMD__ + + # FFmpeg version detection + AVCODEC_VERSION := $(shell pkg-config --modversion libavcodec 2>/dev/null || echo "0") + AVCODEC_MAJOR := $(shell echo $(AVCODEC_VERSION) | cut -d. -f1) + AVCODEC_MINOR := $(shell echo $(AVCODEC_VERSION) | cut -d. -f2) + AVCODEC_PATCH := $(shell echo $(AVCODEC_VERSION) | cut -d. -f3) + + # Compare version (58.134.100) + ifeq ($(shell test $(AVCODEC_MAJOR) -lt 58 || \ + (test $(AVCODEC_MAJOR) -eq 58 && test $(AVCODEC_MINOR) -lt 134) || \ + (test $(AVCODEC_MAJOR) -eq 58 && test $(AVCODEC_MINOR) -eq 134 && test $(AVCODEC_PATCH) -le 100); echo $$?),0) + CPPFLAGS += -DUSE_AVCODEC_GREATER_THAN_58_134=0 + else + CPPFLAGS += -DUSE_AVCODEC_GREATER_THAN_58_134=1 + endif + + COMPILER := $(HIPCXX) +else + $(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be HIP; CUDA not supported) +endif + +ICXXFLAGS += $(CXXFLAGS) +ICPPFLAGS += $(CPPFLAGS) +ILDFLAGS += $(LDFLAGS) +ILDLIBS += $(LDLIBS) + +SOURCES := main.cpp $(UTILS_DIR)/rocvideodecode/roc_video_dec.cpp + +$(EXAMPLE): $(SOURCES) $(COMMON_INCLUDE_DIR)/example_utils.hpp $(COMMON_INCLUDE_DIR)/rocdecode_utils.hpp + $(COMPILER) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $(SOURCES) $(ILDLIBS) + +clean: + $(RM) $(EXAMPLE) + +.PHONY: clean diff --git a/Libraries/rocDecode/video_decode_mem/README.md b/Libraries/rocDecode/video_decode_mem/README.md new file mode 100644 index 000000000..84209d98d --- /dev/null +++ b/Libraries/rocDecode/video_decode_mem/README.md @@ -0,0 +1,112 @@ +# rocDecode Memory-Based Video Decode + +## Description + +This example demonstrates memory-based video decoding using the rocDecode library with a custom stream provider. Instead of reading directly from a file, the sample shows how to pass video data chunk-by-chunk sequentially to the FFMPEG demuxer, which is then decoded on AMD hardware. This approach is useful for scenarios where video data comes from network streams, memory buffers, or other non-file sources. + +## Application Flow + +1. Parse command-line arguments for input file path, device ID, and output options. +2. Create a custom `FileStreamProvider` class that implements the stream provider interface. +3. Initialize the video demuxer with the custom stream provider instead of a file path. +4. The stream provider reads the video file in chunks and fills the demuxer's buffer. +5. Create the video decoder instance with codec information from the demuxer. +6. Loop through the video stream: + - The demuxer requests data from the stream provider as needed. + - Stream provider reads chunks from the file into the demuxer's buffer. + - Demuxer extracts video packets from the buffered data. + - Decoder processes packets and produces decoded frames. + - Retrieve and optionally save decoded frames. +7. Display decoding statistics and performance metrics. +8. Clean up decoder, demuxer, and stream provider resources. + +## Key APIs and Concepts + +- **Custom Stream Provider**: The sample implements a `FileStreamProvider` class derived from `VideoDemuxer::StreamProvider`: + - `GetData()`: Called by the demuxer to fill its buffer with video data. + - `GetBufferSize()`: Returns the size of the buffer to allocate. + - This abstraction allows feeding video data from any source (file, network, memory, etc.). + +- **Memory-Based Demuxing**: The FFMPEG demuxer is configured to use a custom I/O context: + - `avio_alloc_context()`: Creates a custom I/O context with the stream provider. + - The demuxer reads data through callbacks instead of direct file access. + - Enables streaming scenarios where data arrives incrementally. + +- **Decoder Integration**: The decoder works seamlessly with the memory-based demuxer: + - `rocDecCreateDecoder()`: Initializes the decoder with codec parameters from the demuxer. + - `rocDecParseVideoData()`: Parses video packets provided by the memory-based demuxer. + - `rocDecDecodeFrame()`: Decodes frames using hardware acceleration. + - No changes needed in the decode pipeline compared to file-based decoding. + +- **Buffer Management**: The stream provider manages data buffering: + - Reads video data in configurable chunk sizes. + - Maintains read position and handles end-of-stream conditions. + - Provides data to the demuxer on demand without loading the entire file into memory. + +- **Use Cases**: + - Network streaming applications where video data arrives over the network. + - Processing video data from memory buffers or databases. + - Implementing custom data sources with encryption or compression. + - Real-time video processing pipelines with non-file inputs. + +## Demonstrated API Calls + +### rocDecode APIs + +- `rocDecCreateDecoder` +- `rocDecDecodeFrame` +- `rocDecGetVideoFrame` +- `rocDecGetDecodeStatus` +- `rocDecDestroyDecoder` +- `rocDecCreateVideoParser` +- `rocDecParseVideoData` +- `rocDecDestroyVideoParser` +- `rocDecGetErrorName` + +### HIP Runtime APIs + +- `hipSetDevice` +- `hipGetDeviceProperties` +- `hipMalloc` +- `hipFree` +- `hipMemcpy` +- `hipMemcpyDtoH` + +### FFMPEG APIs + +- `avformat_alloc_context` +- `avio_alloc_context` +- `avformat_open_input` +- `avformat_find_stream_info` +- `av_find_best_stream` +- `av_read_frame` +- `av_packet_alloc` +- `av_packet_free` +- `av_packet_unref` +- `avformat_close_input` +- `av_malloc` +- `av_freep` +- `av_bsf_get_by_name` +- `av_bsf_alloc` +- `av_bsf_init` +- `av_bsf_send_packet` +- `av_bsf_receive_packet` +- `av_bsf_free` + +### Data Types and Enums + +- `rocDecDecoderHandle` +- `RocdecVideoParser` +- `rocDecVideoCodec` +- `rocDecVideoSurfaceFormat` +- `rocDecVideoChromaFormat` +- `rocDecDecoderCreateInfo` +- `RocdecParserParams` +- `RocdecVideoFormat` +- `RocdecPicParams` +- `RocdecParserDispInfo` +- `rocDecDecodeStatus` +- `AVFormatContext` +- `AVIOContext` +- `AVPacket` +- `AVCodecID` diff --git a/Libraries/rocDecode/video_decode_mem/main.cpp b/Libraries/rocDecode/video_decode_mem/main.cpp new file mode 100644 index 000000000..9d5244b2a --- /dev/null +++ b/Libraries/rocDecode/video_decode_mem/main.cpp @@ -0,0 +1,330 @@ +/* +Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "CmdParser/cmdparser.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#if __cplusplus >= 201703L && __has_include() + #include +#else + #include +#endif +#include "roc_video_dec.h" +#include "video_demuxer.h" + +#include "md5.h" +#include "rocdecode_utils.hpp" + +class FileStreamProvider : public VideoDemuxer::StreamProvider +{ +public: + FileStreamProvider(const char* input_file_path) + { + fp_in_.open(input_file_path, std::ifstream::in | std::ifstream::binary); + if(!fp_in_) + { + std::cerr << "Unable to open input file: " << input_file_path << std::endl; + exit(-1); + } + fp_in_.seekg(0, fp_in_.end); + int length = fp_in_.tellg(); + fp_in_.seekg(0, fp_in_.beg); + io_buffer_size_ = length; + } + ~FileStreamProvider() + { + fp_in_.close(); + } + // Fill in the buffer owned by the demuxer + int GetData(uint8_t* p_buf, int n_buf) + { + // We read a file for this example. You may get your data from network or somewhere else + return static_cast(fp_in_.read(reinterpret_cast(p_buf), n_buf).gcount()); + } + size_t GetBufferSize() + { + return io_buffer_size_; + } + +private: + std::ifstream fp_in_; + size_t io_buffer_size_; +}; + +void configure_parser(cli::Parser& parser) +{ + parser.set_required("i", "input", "Input file path"); + parser.set_optional("o", + "output", + "", + "Output file path - dumps output if requested"); + parser.set_optional("d", + "device", + 0, + "GPU device ID (0 for the first device, 1 for the second, etc.)"); + parser.set_optional( + "z", + "force_zero_latency", + false, + "Force zero latency (decoded frames will be flushed out for display immediately)"); + parser.set_optional("sei", "extract_sei", false, "Extract SEI messages"); + parser.set_optional("md5", + "generate_md5", + false, + "Generate MD5 message digest on the decoded YUV image sequence"); + parser.set_optional( + "md5_check", + "md5_check_file", + "", + "MD5 file path - generate MD5 message digest and compare to reference"); + parser.set_optional("crop", + "crop_rect", + "", + "Crop rectangle for output (format: left,top,right,bottom)"); + parser.set_optional("m", + "mem_type", + 0, + "Output surface memory type [0: DEV_INTERNAL, 1: DEV_COPIED, 2: " + "HOST_COPIED, 3: NOT_MAPPED]"); + parser.set_optional("disp_delay", + "display_delay", + 1, + "Number of frames to be delayed for display"); +} + +int main(int argc, char** argv) +{ + // Parse command-line arguments + cli::Parser parser(argc, argv); + configure_parser(parser); + parser.run_and_exit_if_error(); + + // Get arguments + std::string input_file_path = parser.get("i"); + std::string output_file_path = parser.get("o"); + int device_id = parser.get("d"); + bool b_force_zero_latency = parser.get("z"); + bool b_extract_sei_messages = parser.get("sei"); + bool b_generate_md5 = parser.get("md5"); + std::string md5_file_path = parser.get("md5_check"); + std::string crop_str = parser.get("crop"); + OutputSurfaceMemoryType mem_type = static_cast(parser.get("m")); + int disp_delay = parser.get("disp_delay"); + + int dump_output_frames = !output_file_path.empty() ? 1 : 0; + bool b_md5_check = !md5_file_path.empty(); + if(b_md5_check) + { + b_generate_md5 = true; + } + + // Parse crop rectangle if provided + Rect crop_rect = {}; + Rect* p_crop_rect = nullptr; + if(!crop_str.empty()) + { + if(4 + != sscanf(crop_str.c_str(), + "%d,%d,%d,%d", + &crop_rect.left, + &crop_rect.top, + &crop_rect.right, + &crop_rect.bottom)) + { + std::cerr << "Invalid crop rectangle format. Expected: left,top,right,bottom" + << std::endl; + return 1; + } + if((crop_rect.right - crop_rect.left) % 2 == 1 + || (crop_rect.bottom - crop_rect.top) % 2 == 1) + { + std::cerr << "Output crop rectangle must have width and height of even numbers" + << std::endl; + return 1; + } + p_crop_rect = &crop_rect; + } + + try + { + FileStreamProvider stream_provider(input_file_path.c_str()); + VideoDemuxer demuxer(&stream_provider); + rocDecVideoCodec rocdec_codec_id = AVCodec2RocDecVideoCodec(demuxer.GetCodecID()); + RocVideoDecoder viddec(device_id, + mem_type, + rocdec_codec_id, + b_force_zero_latency, + p_crop_rect, + b_extract_sei_messages, + disp_delay); + + if(!viddec.CodecSupported(device_id, rocdec_codec_id, demuxer.GetBitDepth())) + { + std::cerr << "GPU doesn't support codec!" << std::endl; + return 0; + } + + std::string device_name, gcn_arch_name; + int pci_bus_id, pci_domain_id, pci_device_id; + + viddec.GetDeviceinfo(device_name, gcn_arch_name, pci_bus_id, pci_domain_id, pci_device_id); + std::cout << "info: Using GPU device " << device_id << " - " << device_name << "[" + << gcn_arch_name << "] on PCI bus " << std::setfill('0') << std::setw(2) + << std::right << std::hex << pci_bus_id << ":" << std::setfill('0') + << std::setw(2) << std::right << std::hex << pci_domain_id << "." << pci_device_id + << std::dec << std::endl; + std::cout << "info: decoding started, please wait!" << std::endl; + + int n_video_bytes = 0, n_frame_returned = 0, n_frame = 0; + uint8_t* pvideo = nullptr; + int pkg_flags = 0; + uint8_t* pframe = nullptr; + int64_t pts = 0; + OutputSurfaceInfo* surf_info; + double total_dec_time = 0; + MD5Generator* md5_generator = nullptr; + + if(b_generate_md5) + { + md5_generator = new MD5Generator(); + md5_generator->InitMd5(); + } + + do + { + auto start_time = std::chrono::high_resolution_clock::now(); + demuxer.Demux(&pvideo, &n_video_bytes, &pts); + // Treat 0 bitstream size as end of stream indicator + if(n_video_bytes == 0) + { + pkg_flags |= ROCDEC_PKT_ENDOFSTREAM; + } + n_frame_returned = viddec.DecodeFrame(pvideo, n_video_bytes, pkg_flags, pts); + auto end_time = std::chrono::high_resolution_clock::now(); + auto time_per_frame + = std::chrono::duration(end_time - start_time).count(); + total_dec_time += time_per_frame; + if(!n_frame && !viddec.GetOutputSurfaceInfo(&surf_info)) + { + std::cerr << "Error: Failed to get Output Surface Info!" << std::endl; + break; + } + for(int i = 0; i < n_frame_returned; i++) + { + pframe = viddec.GetFrame(&pts); + if(b_generate_md5) + { + md5_generator->UpdateMd5ForFrame(pframe, surf_info); + } + if(dump_output_frames && mem_type != OUT_SURFACE_MEM_NOT_MAPPED) + { + viddec.SaveFrameToFile(output_file_path, pframe, surf_info); + } + // release frame + viddec.ReleaseFrame(pts); + } + n_frame += n_frame_returned; + } + while(n_video_bytes); + + std::cout << "info: Total frame decoded: " << n_frame << std::endl; + if(!dump_output_frames) + { + std::cout << "info: avg decoding time per frame (ms): " << total_dec_time / n_frame + << std::endl; + std::cout << "info: avg FPS: " << (n_frame / total_dec_time) * 1000 << std::endl; + } + else + { + if(mem_type == OUT_SURFACE_MEM_NOT_MAPPED) + { + std::cout << "info: saving frames with -m 3 option is not supported!" << std::endl; + } + else + { + std::cout << "info: saved frames into " << output_file_path << std::endl; + } + } + + if(b_generate_md5) + { + uint8_t* digest; + md5_generator->FinalizeMd5(&digest); + std::cout << "MD5 message digest: "; + for(int i = 0; i < 16; i++) + { + std::cout << std::setfill('0') << std::setw(2) << std::hex + << static_cast(digest[i]); + } + std::cout << std::endl; + + if(b_md5_check) + { + std::fstream ref_md5_file; + std::string ref_md5_string(33, 0); + uint8_t ref_md5[16]; + ref_md5_file.open(md5_file_path.c_str(), std::ios::in); + if((ref_md5_file.rdstate() & std::ifstream::failbit) != 0) + { + std::cerr << "Failed to open MD5 file." << std::endl; + return 1; + } + ref_md5_file.getline(ref_md5_string.data(), ref_md5_string.length()); + if((ref_md5_file.rdstate() & std::ifstream::badbit) != 0) + { + std::cerr << "Failed to read MD5 digest string." << std::endl; + return 1; + } + for(int i = 0; i < 16; i++) + { + std::string part = ref_md5_string.substr(i * 2, 2); + ref_md5[i] = std::stoi(part, nullptr, 16); + } + if(memcmp(digest, ref_md5, 16) == 0) + { + std::cout << "MD5 digest matches the reference MD5 digest: "; + } + else + { + std::cout << "MD5 digest does not match the reference MD5 digest: "; + } + std::cout << ref_md5_string << std::endl; + ref_md5_file.close(); + } + delete md5_generator; + } + } + catch(const std::exception& ex) + { + std::cout << ex.what() << std::endl; + exit(1); + } + + return 0; +} diff --git a/Libraries/rocDecode/video_decode_multi_files/.gitignore b/Libraries/rocDecode/video_decode_multi_files/.gitignore new file mode 100644 index 000000000..fa69eed60 --- /dev/null +++ b/Libraries/rocDecode/video_decode_multi_files/.gitignore @@ -0,0 +1 @@ +rocdecode_video_decode_multi_files diff --git a/Libraries/rocDecode/video_decode_multi_files/CMakeLists.txt b/Libraries/rocDecode/video_decode_multi_files/CMakeLists.txt new file mode 100644 index 000000000..7cbca9d3f --- /dev/null +++ b/Libraries/rocDecode/video_decode_multi_files/CMakeLists.txt @@ -0,0 +1,124 @@ +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +set(example_name rocdecode_video_decode_multi_files) + +cmake_minimum_required(VERSION 3.21 FATAL_ERROR) +project(${example_name} LANGUAGES CXX) + +include("../../../Common/HipPlatform.cmake") +select_gpu_language() + +enable_language(${ROCM_EXAMPLES_GPU_LANGUAGE}) +set(CMAKE_${ROCM_EXAMPLES_GPU_LANGUAGE}_STANDARD 17) +set(CMAKE_${ROCM_EXAMPLES_GPU_LANGUAGE}_EXTENSIONS OFF) +set(CMAKE_${ROCM_EXAMPLES_GPU_LANGUAGE}_STANDARD_REQUIRED ON) +select_hip_platform() +verify_hip_platform(PLATFORMS "amd") + +if(CMAKE_SYSTEM_NAME MATCHES "Windows") + message(STATUS "rocDecode examples are only available on Linux") + return() +else() + set(ROCM_ROOT + "/opt/rocm" + CACHE PATH + "Root directory of the ROCm installation" + ) +endif() + +list(APPEND CMAKE_PREFIX_PATH "${ROCM_ROOT}") + +find_package(rocdecode REQUIRED) + +# Find FFmpeg libraries +find_library(AVCODEC_LIBRARY avcodec REQUIRED) +find_library(AVFORMAT_LIBRARY avformat REQUIRED) +find_library(AVUTIL_LIBRARY avutil REQUIRED) + +find_path(AVCODEC_INCLUDE_DIR libavcodec/avcodec.h REQUIRED) +find_path(AVFORMAT_INCLUDE_DIR libavformat/avformat.h REQUIRED) +find_path(AVUTIL_INCLUDE_DIR libavutil/avutil.h REQUIRED) + +# Check FFmpeg version for compatibility using pkg-config (same as Makefile) +execute_process( + COMMAND pkg-config --modversion libavcodec + OUTPUT_VARIABLE AVCODEC_VERSION + OUTPUT_STRIP_TRAILING_WHITESPACE + ERROR_QUIET +) + +# If pkg-config fails, try to get version from header as fallback +if(NOT AVCODEC_VERSION AND AVCODEC_INCLUDE_DIR) + file(STRINGS "${AVCODEC_INCLUDE_DIR}/libavcodec/version.h" AVCODEC_VERSION_MAJOR_LINE + REGEX "^#define LIBAVCODEC_VERSION_MAJOR") + file(STRINGS "${AVCODEC_INCLUDE_DIR}/libavcodec/version.h" AVCODEC_VERSION_MINOR_LINE + REGEX "^#define LIBAVCODEC_VERSION_MINOR") + file(STRINGS "${AVCODEC_INCLUDE_DIR}/libavcodec/version.h" AVCODEC_VERSION_MICRO_LINE + REGEX "^#define LIBAVCODEC_VERSION_MICRO") + + string(REGEX REPLACE "^#define LIBAVCODEC_VERSION_MAJOR[ \t]+([0-9]+).*$" "\\1" AVCODEC_MAJOR "${AVCODEC_VERSION_MAJOR_LINE}") + string(REGEX REPLACE "^#define LIBAVCODEC_VERSION_MINOR[ \t]+([0-9]+).*$" "\\1" AVCODEC_MINOR "${AVCODEC_VERSION_MINOR_LINE}") + string(REGEX REPLACE "^#define LIBAVCODEC_VERSION_MICRO[ \t]+([0-9]+).*$" "\\1" AVCODEC_MICRO "${AVCODEC_VERSION_MICRO_LINE}") + + set(AVCODEC_VERSION "${AVCODEC_MAJOR}.${AVCODEC_MINOR}.${AVCODEC_MICRO}") +endif() + +add_executable(${example_name} + main.cpp + ${ROCM_ROOT}/share/rocdecode/utils/rocvideodecode/roc_video_dec.cpp +) + +target_link_libraries(${example_name} + PRIVATE + rocdecode::rocdecode + ${AVCODEC_LIBRARY} + ${AVFORMAT_LIBRARY} + ${AVUTIL_LIBRARY} +) + +target_include_directories( + ${example_name} + PRIVATE + "../../../Common" + "../../../External" + "${ROCM_ROOT}/share/rocdecode/utils" + "${ROCM_ROOT}/share/rocdecode/utils/rocvideodecode" + ${AVCODEC_INCLUDE_DIR} + ${AVFORMAT_INCLUDE_DIR} + ${AVUTIL_INCLUDE_DIR} +) + +# FFMPEG multi-version support +if(AVCODEC_VERSION VERSION_LESS_EQUAL 58.134.100) + target_compile_definitions(${example_name} PRIVATE USE_AVCODEC_GREATER_THAN_58_134=0) +else() + target_compile_definitions(${example_name} PRIVATE USE_AVCODEC_GREATER_THAN_58_134=1) +endif() + +set_source_files_properties( + main.cpp + ${ROCM_ROOT}/share/rocdecode/utils/rocvideodecode/roc_video_dec.cpp + PROPERTIES LANGUAGE ${ROCM_EXAMPLES_GPU_LANGUAGE} +) + +install(TARGETS ${example_name}) diff --git a/Libraries/rocDecode/video_decode_multi_files/Makefile b/Libraries/rocDecode/video_decode_multi_files/Makefile new file mode 100644 index 000000000..c37f463c1 --- /dev/null +++ b/Libraries/rocDecode/video_decode_multi_files/Makefile @@ -0,0 +1,81 @@ +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +EXAMPLE := rocdecode_video_decode_multi_files +COMMON_INCLUDE_DIR := ../../../Common +EXTERNAL_DIR := ../../../External +GPU_RUNTIME := HIP + +# HIP variables +ROCM_INSTALL_DIR := /opt/rocm +UTILS_DIR := ${ROCM_INSTALL_DIR}/share/rocdecode/utils + +HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include +ROCDECODE_INCLUDE_DIR := $(HIP_INCLUDE_DIR) + +HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc + +# Common variables and flags +CXX_STD := c++17 +ICXXFLAGS := -std=$(CXX_STD) +ICPPFLAGS := -isystem $(ROCDECODE_INCLUDE_DIR) -I $(COMMON_INCLUDE_DIR) -I $(EXTERNAL_DIR) -I $(UTILS_DIR) -I $(UTILS_DIR)/rocvideodecode +ILDFLAGS := -L $(ROCM_INSTALL_DIR)/lib +ILDLIBS := -lrocdecode -lavcodec -lavformat -lavutil + +ifeq ($(GPU_RUNTIME), HIP) + CXXFLAGS ?= -Wall -Wextra + CPPFLAGS += -D__HIP_PLATFORM_AMD__ + + # FFmpeg version detection + AVCODEC_VERSION := $(shell pkg-config --modversion libavcodec 2>/dev/null || echo "0") + AVCODEC_MAJOR := $(shell echo $(AVCODEC_VERSION) | cut -d. -f1) + AVCODEC_MINOR := $(shell echo $(AVCODEC_VERSION) | cut -d. -f2) + AVCODEC_PATCH := $(shell echo $(AVCODEC_VERSION) | cut -d. -f3) + + # Compare version (58.134.100) + ifeq ($(shell test $(AVCODEC_MAJOR) -lt 58 || \ + (test $(AVCODEC_MAJOR) -eq 58 && test $(AVCODEC_MINOR) -lt 134) || \ + (test $(AVCODEC_MAJOR) -eq 58 && test $(AVCODEC_MINOR) -eq 134 && test $(AVCODEC_PATCH) -le 100); echo $$?),0) + CPPFLAGS += -DUSE_AVCODEC_GREATER_THAN_58_134=0 + else + CPPFLAGS += -DUSE_AVCODEC_GREATER_THAN_58_134=1 + endif + + COMPILER := $(HIPCXX) +else + $(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be HIP; CUDA not supported) +endif + +ICXXFLAGS += $(CXXFLAGS) +ICPPFLAGS += $(CPPFLAGS) +ILDFLAGS += $(LDFLAGS) +ILDLIBS += $(LDLIBS) + +SOURCES := main.cpp $(UTILS_DIR)/rocvideodecode/roc_video_dec.cpp + +$(EXAMPLE): $(SOURCES) $(COMMON_INCLUDE_DIR)/example_utils.hpp $(COMMON_INCLUDE_DIR)/rocdecode_utils.hpp + $(COMPILER) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $(SOURCES) $(ILDLIBS) + +clean: + $(RM) $(EXAMPLE) + +.PHONY: clean diff --git a/Libraries/rocDecode/video_decode_multi_files/README.md b/Libraries/rocDecode/video_decode_multi_files/README.md new file mode 100644 index 000000000..361729104 --- /dev/null +++ b/Libraries/rocDecode/video_decode_multi_files/README.md @@ -0,0 +1,117 @@ +# rocDecode Multi-File Video Decode with Reconfiguration + +## Description + +This example demonstrates the decoder reconfiguration capability of the rocDecode library by decoding multiple video files with a single decoder instance. The sample showcases how to handle video files with different resolutions or parameters using the same decoder, leveraging the reconfigure feature to adapt to changing video properties without recreating the decoder. Input files must be of the same codec type but can have varying resolutions. + +## Application Flow + +1. Parse command-line arguments for input file list, device ID, and reconfiguration options. +2. Read the input file list containing paths to multiple video files. +3. Store video file paths in a queue for sequential processing. +4. Initialize the first video file's demuxer to extract codec information. +5. Create the decoder instance with the codec from the first file. +6. Set up reconfiguration callback for handling resolution changes. +7. For each video file in the queue: + - Open the video file with the demuxer. + - If resolution differs from previous file, trigger decoder reconfiguration. + - Decode all frames from the current file. + - Flush remaining frames when switching to the next file. + - Optionally save decoded frames to separate output files. +8. Display decoding statistics for all processed files. +9. Clean up decoder and demuxer resources. + +## Key APIs and Concepts + +- **Decoder Reconfiguration**: The decoder can adapt to different video parameters without recreation: + - Triggered automatically when the parser detects sequence parameter changes. + - Handles resolution changes, bit depth changes, and chroma format changes. + - Reconfiguration callback flushes pending frames before applying new parameters. + - More efficient than destroying and recreating the decoder for each file. + +- **Reconfiguration Callback**: A user-provided callback function is invoked during reconfiguration: + - Called when video sequence parameters change between files. + - Flushes any remaining decoded frames from the previous sequence. + - Optionally saves flushed frames to output files. + - Returns the number of frames flushed. + - Allows the decoder to reset its internal state for the new sequence. + +- **Multi-File Processing**: The sample processes multiple files sequentially: + - Files are read from an input list file (one path per line). + - Each file is demuxed and decoded independently. + - The decoder maintains state across file boundaries. + - Output can be saved to separate files per input video. + +- **Codec Compatibility**: All input files must use the same codec: + - Codec type (H.264, HEVC, VP9, etc.) must be consistent. + - Resolution, frame rate, and bit depth can vary. + - The decoder reconfigures for parameter changes but cannot switch codecs. + +- **Flush Modes**: The reconfiguration callback supports different flush modes: + - `RECONFIG_FLUSH_MODE_NONE`: Just count flushed frames. + - `RECONFIG_FLUSH_MODE_DUMP_TO_FILE`: Save flushed frames to output file. + - `RECONFIG_FLUSH_MODE_CALCULATE_MD5`: Generate MD5 digest for flushed frames. + - Modes can be combined using bitwise OR. + +- **Use Cases**: + - Processing video playlists with varying resolutions. + - Adaptive bitrate streaming scenarios. + - Batch processing of related video files. + - Testing decoder robustness with parameter changes. + +## Demonstrated API Calls + +### rocDecode APIs + +- `rocDecCreateDecoder` +- `rocDecDecodeFrame` +- `rocDecGetVideoFrame` +- `rocDecGetDecodeStatus` +- `rocDecDestroyDecoder` +- `rocDecCreateVideoParser` +- `rocDecParseVideoData` +- `rocDecDestroyVideoParser` +- `rocDecGetErrorName` + +### HIP Runtime APIs + +- `hipSetDevice` +- `hipGetDeviceProperties` +- `hipMalloc` +- `hipFree` +- `hipMemcpy` +- `hipMemcpyDtoH` + +### FFMPEG APIs + +- `avformat_open_input` +- `avformat_close_input` +- `avformat_find_stream_info` +- `av_find_best_stream` +- `av_read_frame` +- `av_packet_alloc` +- `av_packet_free` +- `av_packet_unref` +- `av_bsf_get_by_name` +- `av_bsf_alloc` +- `av_bsf_init` +- `av_bsf_send_packet` +- `av_bsf_receive_packet` +- `av_bsf_free` + +### Data Types and Enums + +- `rocDecDecoderHandle` +- `RocdecVideoParser` +- `rocDecVideoCodec` +- `rocDecVideoSurfaceFormat` +- `rocDecVideoChromaFormat` +- `rocDecDecoderCreateInfo` +- `RocdecParserParams` +- `RocdecVideoFormat` +- `RocdecPicParams` +- `RocdecParserDispInfo` +- `rocDecDecodeStatus` +- `AVCodecID` +- `AVFormatContext` +- `AVPacket` diff --git a/Libraries/rocDecode/video_decode_multi_files/main.cpp b/Libraries/rocDecode/video_decode_multi_files/main.cpp new file mode 100644 index 000000000..a1c910c39 --- /dev/null +++ b/Libraries/rocDecode/video_decode_multi_files/main.cpp @@ -0,0 +1,335 @@ +/* +Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "CmdParser/cmdparser.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if __cplusplus >= 201703L && __has_include() + #include +#else + #include +#endif +#include "roc_video_dec.h" +#include "video_demuxer.h" + +#include "rocdecode_utils.hpp" + +typedef struct +{ + std::string in_file; + std::string out_file; + bool b_force_zero_latency; + bool b_extract_sei_messages; + bool b_flush_last_frames; + Rect crop_rect; + Rect* p_crop_rect; + int dump_output_frames; + OutputSurfaceMemoryType mem_type; + int disp_delay; +} file_info; + +void parse_file_list(const std::string& file_list_path, std::deque& multi_file_data) +{ + std::ifstream filestream(file_list_path); + std::string line; + char param[256]; + char value[256]; + int file_idx = 0; + file_info file_data; + + while(std::getline(filestream, line)) + { + const char* str = line.c_str(); + if(sscanf(str, "%s %s", param, value) != 2) + { + continue; + } + + if(!strcmp(param, "infile")) + { + if(file_idx > 0) + { + multi_file_data.push_back(file_data); + } + file_data.in_file = value; + file_idx++; + file_data.b_force_zero_latency = false; + file_data.b_extract_sei_messages = false; + file_data.b_flush_last_frames = true; + file_data.dump_output_frames = 0; + file_data.crop_rect = {}; + file_data.p_crop_rect = nullptr; + file_data.mem_type = OUT_SURFACE_MEM_DEV_INTERNAL; + file_data.disp_delay = 1; + } + else if(!strcmp(param, "outfile")) + { + file_data.out_file = value; + file_data.dump_output_frames = 1; + } + else if(!strcmp(param, "z")) + { + file_data.b_force_zero_latency = atoi(value) ? true : false; + } + else if(!strcmp(param, "sei")) + { + file_data.b_extract_sei_messages = atoi(value) ? true : false; + } + else if(!strcmp(param, "flush")) + { + file_data.b_flush_last_frames = atoi(value) ? true : false; + } + else if(!strcmp(param, "crop")) + { + if(sscanf(value, + "%d,%d,%d,%d", + &file_data.crop_rect.left, + &file_data.crop_rect.top, + &file_data.crop_rect.right, + &file_data.crop_rect.bottom) + == 4) + { + if((file_data.crop_rect.right - file_data.crop_rect.left) % 2 == 1 + || (file_data.crop_rect.bottom - file_data.crop_rect.top) % 2 == 1) + { + std::cerr << "Cropping rect must have width and height of even numbers" + << std::endl; + exit(1); + } + file_data.p_crop_rect = &file_data.crop_rect; + } + } + else if(!strcmp(param, "m")) + { + file_data.mem_type = static_cast(atoi(value)); + } + else if(!strcmp(param, "disp_delay")) + { + file_data.disp_delay = atoi(value); + } + } + if(file_idx > 0) + { + multi_file_data.push_back(file_data); + } +} + +void configure_parser(cli::Parser& parser) +{ + parser.set_required("i", + "input", + "Input file list (text file containing all files to decode)"); + parser.set_optional("d", + "device", + 0, + "GPU device ID (0 for the first device, 1 for the second, etc.)"); + parser.set_optional( + "use_reconfigure", + "use_reconfigure", + true, + "Use reconfigure API for decoding multiple files (only resolution changes supported)"); +} + +int main(int argc, char** argv) +{ + // Parse command-line arguments + cli::Parser parser(argc, argv); + configure_parser(parser); + parser.run_and_exit_if_error(); + + // Get arguments + std::string file_list_path = parser.get("i"); + int device_id = parser.get("d"); + bool use_reconfigure = parser.get("use_reconfigure"); + + std::deque multi_file_data; + parse_file_list(file_list_path, multi_file_data); + + RocVideoDecoder* viddec = nullptr; + ReconfigParams reconfig_params = {}; + reconfig_dump_file_struct reconfig_user_struct = {}; + + try + { + while(!multi_file_data.empty()) + { + file_info file_data = multi_file_data.front(); + multi_file_data.pop_front(); + VideoDemuxer demuxer(file_data.in_file.c_str()); + rocDecVideoCodec rocdec_codec_id = AVCodec2RocDecVideoCodec(demuxer.GetCodecID()); + + if(file_data.b_flush_last_frames && file_data.dump_output_frames) + { + reconfig_params.p_fn_reconfigure_flush = reconfigure_flush_callback; + reconfig_user_struct.b_dump_frames_to_file = file_data.dump_output_frames; + reconfig_user_struct.output_file_name = file_data.out_file; + reconfig_params.reconfig_flush_mode = RECONFIG_FLUSH_MODE_DUMP_TO_FILE; + reconfig_params.p_reconfig_user_struct = &reconfig_user_struct; + } + + if(use_reconfigure) + { + if(!viddec) + { + viddec = new RocVideoDecoder(device_id, + file_data.mem_type, + rocdec_codec_id, + file_data.b_force_zero_latency, + file_data.p_crop_rect, + file_data.b_extract_sei_messages, + file_data.disp_delay); + } + } + else + { + viddec = new RocVideoDecoder(device_id, + file_data.mem_type, + rocdec_codec_id, + file_data.b_force_zero_latency, + file_data.p_crop_rect, + file_data.b_extract_sei_messages, + file_data.disp_delay); + } + + if(!viddec->CodecSupported(device_id, rocdec_codec_id, demuxer.GetBitDepth())) + { + std::cerr << "Codec not supported on GPU, skipping this file!" << std::endl; + continue; + } + + if(viddec && file_data.b_flush_last_frames) + { + viddec->SetReconfigParams(&reconfig_params); + } + + std::string device_name, gcn_arch_name; + int pci_bus_id, pci_domain_id, pci_device_id; + + std::size_t found_file = file_data.in_file.find_last_of('/'); + std::cout << "info: Input file: " << file_data.in_file.substr(found_file + 1) + << std::endl; + viddec->GetDeviceinfo(device_name, + gcn_arch_name, + pci_bus_id, + pci_domain_id, + pci_device_id); + std::cout << "info: Using GPU device " << device_id << " - " << device_name << "[" + << gcn_arch_name << "] on PCI bus " << std::setfill('0') << std::setw(2) + << std::right << std::hex << pci_bus_id << ":" << std::setfill('0') + << std::setw(2) << std::right << std::hex << pci_domain_id << "." + << pci_device_id << std::dec << std::endl; + std::cout << "info: decoding started, please wait!" << std::endl; + + int n_video_bytes = 0, n_frame_returned = 0, n_frame = 0; + uint8_t* pvideo = nullptr; + int pkg_flags = 0; + uint8_t* pframe = nullptr; + int64_t pts = 0; + OutputSurfaceInfo* surf_info; + double total_dec_time = 0; + + do + { + auto start_time = std::chrono::high_resolution_clock::now(); + demuxer.Demux(&pvideo, &n_video_bytes, &pts); + // Treat 0 bitstream size as end of stream indicator + if(n_video_bytes == 0) + { + pkg_flags |= ROCDEC_PKT_ENDOFSTREAM; + } + n_frame_returned = viddec->DecodeFrame(pvideo, n_video_bytes, pkg_flags, pts); + auto end_time = std::chrono::high_resolution_clock::now(); + auto time_per_frame + = std::chrono::duration(end_time - start_time).count(); + total_dec_time += time_per_frame; + if(!n_frame && !viddec->GetOutputSurfaceInfo(&surf_info)) + { + std::cerr << "Error: Failed to get Output Surface Info!" << std::endl; + break; + } + for(int i = 0; i < n_frame_returned; i++) + { + pframe = viddec->GetFrame(&pts); + if(file_data.dump_output_frames + && file_data.mem_type != OUT_SURFACE_MEM_NOT_MAPPED) + { + viddec->SaveFrameToFile(file_data.out_file, pframe, surf_info); + } + // release frame + viddec->ReleaseFrame(pts); + } + n_frame += n_frame_returned; + } + while(n_video_bytes); + + n_frame += viddec->GetNumOfFlushedFrames(); + std::cout << "info: Total frame decoded: " << n_frame << std::endl; + if(!file_data.dump_output_frames) + { + std::cout << "info: avg decoding time per frame (ms): " << total_dec_time / n_frame + << std::endl; + std::cout << "info: avg FPS: " << (n_frame / total_dec_time) * 1000 << std::endl; + } + else + { + if(file_data.mem_type == OUT_SURFACE_MEM_NOT_MAPPED) + { + std::cout << "info: saving frames with -m 3 option is not supported!" + << std::endl; + } + else + { + std::cout << "info: saved frames into " << file_data.out_file << std::endl; + } + } + + if(!use_reconfigure) + { + delete viddec; + viddec = nullptr; + } + std::cout << "\n"; + } + + if(viddec) + { + delete viddec; + viddec = nullptr; + } + } + catch(const std::exception& ex) + { + std::cout << ex.what() << std::endl; + exit(1); + } + + return 0; +} diff --git a/Libraries/rocDecode/video_decode_perf/.gitignore b/Libraries/rocDecode/video_decode_perf/.gitignore new file mode 100644 index 000000000..cb07d6aab --- /dev/null +++ b/Libraries/rocDecode/video_decode_perf/.gitignore @@ -0,0 +1 @@ +rocdecode_video_decode_perf diff --git a/Libraries/rocDecode/video_decode_perf/CMakeLists.txt b/Libraries/rocDecode/video_decode_perf/CMakeLists.txt new file mode 100644 index 000000000..7ef56c3fc --- /dev/null +++ b/Libraries/rocDecode/video_decode_perf/CMakeLists.txt @@ -0,0 +1,142 @@ +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +set(example_name rocdecode_video_decode_perf) + +cmake_minimum_required(VERSION 3.21 FATAL_ERROR) +project(${example_name} LANGUAGES CXX) + +include("../../../Common/HipPlatform.cmake") +select_gpu_language() + +enable_language(${ROCM_EXAMPLES_GPU_LANGUAGE}) +set(CMAKE_${ROCM_EXAMPLES_GPU_LANGUAGE}_STANDARD 17) +set(CMAKE_${ROCM_EXAMPLES_GPU_LANGUAGE}_EXTENSIONS OFF) +set(CMAKE_${ROCM_EXAMPLES_GPU_LANGUAGE}_STANDARD_REQUIRED ON) +select_hip_platform() +verify_hip_platform(PLATFORMS "amd") + +if(CMAKE_SYSTEM_NAME MATCHES "Windows") + message(STATUS "rocDecode examples are only available on Linux") + return() +else() + set(ROCM_ROOT + "/opt/rocm" + CACHE PATH + "Root directory of the ROCm installation" + ) +endif() + +list(APPEND CMAKE_PREFIX_PATH "${ROCM_ROOT}") + +find_package(rocdecode REQUIRED) + +# Try to find the host library directly (handles both naming conventions) +find_library(ROCDECODE_HOST_LIB + NAMES rocdecode-host rocdecodehost + PATHS ${ROCM_ROOT}/lib + NO_DEFAULT_PATH +) + +# Find FFmpeg libraries +find_library(AVCODEC_LIBRARY avcodec REQUIRED) +find_library(AVFORMAT_LIBRARY avformat REQUIRED) +find_library(AVUTIL_LIBRARY avutil REQUIRED) + +find_path(AVCODEC_INCLUDE_DIR libavcodec/avcodec.h REQUIRED) +find_path(AVFORMAT_INCLUDE_DIR libavformat/avformat.h REQUIRED) +find_path(AVUTIL_INCLUDE_DIR libavutil/avutil.h REQUIRED) + +# Check FFmpeg version for compatibility using pkg-config (same as Makefile) +execute_process( + COMMAND pkg-config --modversion libavcodec + OUTPUT_VARIABLE AVCODEC_VERSION + OUTPUT_STRIP_TRAILING_WHITESPACE + ERROR_QUIET +) + +# If pkg-config fails, try to get version from header as fallback +if(NOT AVCODEC_VERSION AND AVCODEC_INCLUDE_DIR) + file(STRINGS "${AVCODEC_INCLUDE_DIR}/libavcodec/version.h" AVCODEC_VERSION_MAJOR_LINE + REGEX "^#define LIBAVCODEC_VERSION_MAJOR") + file(STRINGS "${AVCODEC_INCLUDE_DIR}/libavcodec/version.h" AVCODEC_VERSION_MINOR_LINE + REGEX "^#define LIBAVCODEC_VERSION_MINOR") + file(STRINGS "${AVCODEC_INCLUDE_DIR}/libavcodec/version.h" AVCODEC_VERSION_MICRO_LINE + REGEX "^#define LIBAVCODEC_VERSION_MICRO") + + string(REGEX REPLACE "^#define LIBAVCODEC_VERSION_MAJOR[ \t]+([0-9]+).*$" "\\1" AVCODEC_MAJOR "${AVCODEC_VERSION_MAJOR_LINE}") + string(REGEX REPLACE "^#define LIBAVCODEC_VERSION_MINOR[ \t]+([0-9]+).*$" "\\1" AVCODEC_MINOR "${AVCODEC_VERSION_MINOR_LINE}") + string(REGEX REPLACE "^#define LIBAVCODEC_VERSION_MICRO[ \t]+([0-9]+).*$" "\\1" AVCODEC_MICRO "${AVCODEC_VERSION_MICRO_LINE}") + + set(AVCODEC_VERSION "${AVCODEC_MAJOR}.${AVCODEC_MINOR}.${AVCODEC_MICRO}") +endif() + +add_executable(${example_name} + main.cpp + ${ROCM_ROOT}/share/rocdecode/utils/rocvideodecode/roc_video_dec.cpp + ${ROCM_ROOT}/share/rocdecode/utils/ffmpegvideodecode/ffmpeg_video_dec.cpp +) + +target_link_libraries(${example_name} + PRIVATE + rocdecode::rocdecode + ${AVCODEC_LIBRARY} + ${AVFORMAT_LIBRARY} + ${AVUTIL_LIBRARY} +) + +# Link host library if found +if(ROCDECODE_HOST_LIB) + target_link_libraries(${example_name} PRIVATE ${ROCDECODE_HOST_LIB}) + target_compile_definitions(${example_name} PRIVATE ENABLE_HOST_DECODE=1) +else() + target_compile_definitions(${example_name} PRIVATE ENABLE_HOST_DECODE=0) +endif() + +target_include_directories( + ${example_name} + PRIVATE + "../../../Common" + "../../../External" + "${ROCM_ROOT}/share/rocdecode/utils" + "${ROCM_ROOT}/share/rocdecode/utils/rocvideodecode" + "${ROCM_ROOT}/share/rocdecode/utils/ffmpegvideodecode" + ${AVCODEC_INCLUDE_DIR} + ${AVFORMAT_INCLUDE_DIR} + ${AVUTIL_INCLUDE_DIR} +) + +# FFMPEG multi-version support +if(AVCODEC_VERSION VERSION_LESS_EQUAL 58.134.100) + target_compile_definitions(${example_name} PRIVATE USE_AVCODEC_GREATER_THAN_58_134=0) +else() + target_compile_definitions(${example_name} PRIVATE USE_AVCODEC_GREATER_THAN_58_134=1) +endif() + +set_source_files_properties( + main.cpp + ${ROCM_ROOT}/share/rocdecode/utils/rocvideodecode/roc_video_dec.cpp + ${ROCM_ROOT}/share/rocdecode/utils/ffmpegvideodecode/ffmpeg_video_dec.cpp + PROPERTIES LANGUAGE ${ROCM_EXAMPLES_GPU_LANGUAGE} +) + +install(TARGETS ${example_name}) diff --git a/Libraries/rocDecode/video_decode_perf/Makefile b/Libraries/rocDecode/video_decode_perf/Makefile new file mode 100644 index 000000000..d84c85ed0 --- /dev/null +++ b/Libraries/rocDecode/video_decode_perf/Makefile @@ -0,0 +1,96 @@ +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +EXAMPLE := rocdecode_video_decode_perf +COMMON_INCLUDE_DIR := ../../../Common +EXTERNAL_DIR := ../../../External +GPU_RUNTIME := HIP + +# HIP variables +ROCM_INSTALL_DIR := /opt/rocm +UTILS_DIR := ${ROCM_INSTALL_DIR}/share/rocdecode/utils + +HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include +ROCDECODE_INCLUDE_DIR := $(HIP_INCLUDE_DIR) + +HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc + +# Detect which rocdecode host library is available +ROCDECODE_HOST_LIB := $(shell if [ -f $(ROCM_INSTALL_DIR)/lib/librocdecode-host.so ]; then \ + echo "rocdecode-host"; \ + elif [ -f $(ROCM_INSTALL_DIR)/lib/librocdecodehost.so ]; then \ + echo "rocdecodehost"; \ + fi) + +# Common variables and flags +CXX_STD := c++17 +ICXXFLAGS := -std=$(CXX_STD) +ICPPFLAGS := -isystem $(ROCDECODE_INCLUDE_DIR) -I $(COMMON_INCLUDE_DIR) -I $(EXTERNAL_DIR) -I $(UTILS_DIR) -I $(UTILS_DIR)/rocvideodecode -I $(UTILS_DIR)/ffmpegvideodecode +ILDFLAGS := -L $(ROCM_INSTALL_DIR)/lib +ILDLIBS := -lrocdecode -lavcodec -lavformat -lavutil + +ifeq ($(GPU_RUNTIME), HIP) + CXXFLAGS ?= -Wall -Wextra + CPPFLAGS += -D__HIP_PLATFORM_AMD__ + + # Add host decode library and flag if available + ifneq ($(ROCDECODE_HOST_LIB),) + ILDLIBS += -l$(ROCDECODE_HOST_LIB) + CPPFLAGS += -DENABLE_HOST_DECODE=1 + else + CPPFLAGS += -DENABLE_HOST_DECODE=0 + endif + + # FFmpeg version detection + AVCODEC_VERSION := $(shell pkg-config --modversion libavcodec 2>/dev/null || echo "0") + AVCODEC_MAJOR := $(shell echo $(AVCODEC_VERSION) | cut -d. -f1) + AVCODEC_MINOR := $(shell echo $(AVCODEC_VERSION) | cut -d. -f2) + AVCODEC_PATCH := $(shell echo $(AVCODEC_VERSION) | cut -d. -f3) + + # Compare version (58.134.100) + ifeq ($(shell test $(AVCODEC_MAJOR) -lt 58 || \ + (test $(AVCODEC_MAJOR) -eq 58 && test $(AVCODEC_MINOR) -lt 134) || \ + (test $(AVCODEC_MAJOR) -eq 58 && test $(AVCODEC_MINOR) -eq 134 && test $(AVCODEC_PATCH) -le 100); echo $$?),0) + CPPFLAGS += -DUSE_AVCODEC_GREATER_THAN_58_134=0 + else + CPPFLAGS += -DUSE_AVCODEC_GREATER_THAN_58_134=1 + endif + + COMPILER := $(HIPCXX) +else + $(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be HIP; CUDA not supported) +endif + +ICXXFLAGS += $(CXXFLAGS) +ICPPFLAGS += $(CPPFLAGS) +ILDFLAGS += $(LDFLAGS) +ILDLIBS += $(LDLIBS) + +SOURCES := main.cpp $(UTILS_DIR)/rocvideodecode/roc_video_dec.cpp $(UTILS_DIR)/ffmpegvideodecode/ffmpeg_video_dec.cpp + +$(EXAMPLE): $(SOURCES) $(COMMON_INCLUDE_DIR)/example_utils.hpp $(COMMON_INCLUDE_DIR)/rocdecode_utils.hpp + $(COMPILER) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $(SOURCES) $(ILDLIBS) + +clean: + $(RM) $(EXAMPLE) + +.PHONY: clean diff --git a/Libraries/rocDecode/video_decode_perf/README.md b/Libraries/rocDecode/video_decode_perf/README.md new file mode 100644 index 000000000..941f0901c --- /dev/null +++ b/Libraries/rocDecode/video_decode_perf/README.md @@ -0,0 +1,126 @@ +# rocDecode Performance Testing + +## Description + +This example demonstrates performance testing and benchmarking of the rocDecode library by decoding the same video file multiple times in parallel using multiple threads. The sample is designed to measure maximum decode throughput, GPU utilization, and multi-stream decode performance. It provides detailed performance metrics including frames per second, decode time per frame, and overall throughput. + +## Application Flow + +1. Parse command-line arguments for input file, number of threads, device ID, and performance options. +2. Initialize the video demuxer to extract codec information. +3. Verify codec support on the selected GPU device. +4. Create multiple threads (default or user-specified count). +5. For each thread: + - Create an independent decoder instance. + - Create a separate demuxer for the same input file. + - Decode all frames from the video file. + - Track decode time and frame count. + - Optionally skip frame retrieval to measure pure decode performance. +6. Synchronize all threads and collect performance statistics. +7. Calculate and display aggregate metrics: + - Total frames decoded across all threads. + - Average decode time per frame. + - Frames per second (FPS) per thread and aggregate. + - Total throughput in FPS. +8. Clean up all decoder and demuxer resources. + +## Key APIs and Concepts + +- **Parallel Decode Streams**: Multiple decoder instances run concurrently: + - Each thread has its own decoder created with `rocDecCreateDecoder()`. + - Threads decode the same video file independently. + - Maximizes GPU hardware decoder utilization. + - Tests multi-stream decode capability of the hardware. + +- **Performance Measurement**: The sample tracks detailed timing information: + - Decode start and end times for each thread. + - Per-frame decode time. + - Total decode time excluding initialization overhead. + - Frame count per thread and aggregate. + +- **Decode-Only Mode**: Optional mode to measure pure decode performance: + - Skips frame retrieval and memory copies. + - Focuses on hardware decode throughput. + - Useful for understanding decoder bottlenecks vs. memory bandwidth. + +- **Thread Configuration**: Configurable number of parallel decode threads: + - Default thread count based on system capabilities. + - User can specify thread count for testing different scenarios. + - Each thread operates independently without synchronization during decode. + +- **GPU Utilization**: The sample helps measure: + - Maximum decode throughput of the GPU. + - Scalability with multiple concurrent streams. + - Hardware decoder saturation point. + - Efficiency of parallel decode operations. + +- **Performance Metrics**: + - **Per-Thread FPS**: Decode rate for individual threads. + - **Aggregate FPS**: Combined throughput across all threads. + - **Average Decode Time**: Mean time to decode a single frame. + - **Total Frames**: Sum of frames decoded by all threads. + +## Demonstrated API Calls + +### rocDecode APIs + +- `rocDecCreateDecoder` +- `rocDecDecodeFrame` +- `rocDecGetVideoFrame` +- `rocDecGetDecodeStatus` +- `rocDecDestroyDecoder` +- `rocDecCreateVideoParser` +- `rocDecParseVideoData` +- `rocDecDestroyVideoParser` +- `rocDecGetErrorName` + +### HIP Runtime APIs + +- `hipSetDevice` +- `hipGetDeviceCount` +- `hipGetDeviceProperties` +- `hipMalloc` +- `hipFree` +- `hipMemcpy` +- `hipMemcpyDtoH` + +### FFMPEG APIs + +- `avformat_open_input` +- `avformat_find_stream_info` +- `av_find_best_stream` +- `av_read_frame` +- `av_packet_alloc` +- `av_packet_free` +- `av_packet_unref` +- `avformat_close_input` +- `av_bsf_get_by_name` +- `av_bsf_alloc` +- `av_bsf_init` +- `av_bsf_send_packet` +- `av_bsf_receive_packet` +- `av_bsf_free` + +### C++ Standard Library (Threading and Timing) + +- `std::thread` +- `std::vector` +- `std::chrono::high_resolution_clock` +- `std::chrono::duration` + +### Data Types and Enums + +- `rocDecDecoderHandle` +- `RocdecVideoParser` +- `rocDecVideoCodec` +- `rocDecVideoSurfaceFormat` +- `rocDecVideoChromaFormat` +- `rocDecDecoderCreateInfo` +- `RocdecParserParams` +- `RocdecVideoFormat` +- `RocdecPicParams` +- `RocdecParserDispInfo` +- `rocDecDecodeStatus` +- `AVCodecID` +- `AVFormatContext` +- `AVPacket` diff --git a/Libraries/rocDecode/video_decode_perf/main.cpp b/Libraries/rocDecode/video_decode_perf/main.cpp new file mode 100644 index 000000000..ba93d4d62 --- /dev/null +++ b/Libraries/rocDecode/video_decode_perf/main.cpp @@ -0,0 +1,334 @@ +/* +Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "CmdParser/cmdparser.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#if __cplusplus >= 201703L && __has_include() + #include +#else + #include +#endif +#include "ffmpeg_video_dec.h" +#include "roc_video_dec.h" +#include "video_demuxer.h" + +#include "rocdecode_utils.hpp" + +void dec_proc(RocVideoDecoder* p_dec, + VideoDemuxer* demuxer, + int* pn_frame, + int* pn_pic_dec, + double* pn_fps, + double* pn_fps_dec, + int max_num_frames, + OutputSurfaceMemoryType mem_type) +{ + int n_video_bytes = 0, n_frame_returned = 0, n_frame = 0; + int n_pic_decoded = 0, decoded_pics = 0; + uint8_t* p_video = nullptr; + int64_t pts = 0; + double total_dec_time = 0.0; + auto start_time = std::chrono::high_resolution_clock::now(); + + do + { + demuxer->Demux(&p_video, &n_video_bytes, &pts); + n_frame_returned = p_dec->DecodeFrame(p_video, n_video_bytes, 0, pts, &decoded_pics); + n_frame += n_frame_returned; + n_pic_decoded += decoded_pics; + if(max_num_frames && max_num_frames <= n_frame) + { + break; + } + } + while(n_video_bytes); + + if(mem_type == OUT_SURFACE_MEM_NOT_MAPPED) + { + p_dec->WaitForDecodeCompletion(); + } + + auto end_time = std::chrono::high_resolution_clock::now(); + auto time_per_decode = std::chrono::duration(end_time - start_time).count(); + auto session_overhead = p_dec->GetDecoderSessionOverHead(std::this_thread::get_id()); + // Calculate average decoding time + total_dec_time = time_per_decode - session_overhead; + double average_output_time = total_dec_time / n_frame; + double average_decoding_time = total_dec_time / n_pic_decoded; + double n_fps = 1000 / average_output_time; + double n_fps_dec = 1000 / average_decoding_time; + *pn_fps = n_fps; + *pn_fps_dec = n_fps_dec; + *pn_frame = n_frame; + *pn_pic_dec = n_pic_decoded; +} + +void configure_parser(cli::Parser& parser) +{ + parser.set_required("i", "input", "Input file path"); + parser.set_optional("t", "threads", 1, "Number of threads (>= 1)"); + parser.set_optional("d", "device", 0, "Device ID (>= 0)"); + parser.set_optional( + "z", + "force_zero_latency", + false, + "Force zero latency (decoded frames will be flushed out for display immediately)"); + parser.set_optional("disp_delay", + "display_delay", + 1, + "Number of frames to be delayed for display"); + parser.set_optional( + "m", + "mem_type", + 3, + "Memory type [0: DEV_INTERNAL, 1: DEV_COPIED, 2: HOST_COPIED, 3: NOT_MAPPED]"); + parser.set_optional("f", + "max_frames", + 0, + "Max number of frames to decode (0 = decode entire stream)"); + parser.set_optional("backend", "backend", 0, "Backend type [0: GPU, 1: CPU]"); +} + +int main(int argc, char** argv) +{ + // Parse command-line arguments + cli::Parser parser(argc, argv); + configure_parser(parser); + parser.run_and_exit_if_error(); + + // Get arguments + std::string input_file_path = parser.get("i"); + int n_thread = parser.get("t"); + int device_id = parser.get("d"); + bool b_force_zero_latency = parser.get("z"); + int disp_delay = parser.get("disp_delay"); + OutputSurfaceMemoryType mem_type = static_cast(parser.get("m")); + uint32_t max_num_frames = parser.get("f"); + int backend = parser.get("backend"); + + if(n_thread <= 0) + { + std::cerr << "Number of threads must be >= 1" << std::endl; + return 1; + } + if(device_id < 0) + { + std::cerr << "Device ID must be >= 0" << std::endl; + return 1; + } + + Rect* p_crop_rect = nullptr; + + try + { + // TODO: Change this block to use VCN query API + int num_devices = 0, sd = 0; + hipError_t hip_status = hipSuccess; + hipDeviceProp_t hip_dev_prop; + std::string gcn_arch_name; + hip_status = hipGetDeviceCount(&num_devices); + if(hip_status != hipSuccess) + { + std::cout << "ERROR: hipGetDeviceCount failed! (" << hip_status << ")" << std::endl; + return -1; + } + + if(num_devices < 1) + { + ROCDEC_ERR("ERROR: didn't find any GPU!"); + return -1; + } + + hip_status = hipGetDeviceProperties(&hip_dev_prop, device_id); + if(hip_status != hipSuccess) + { + ROCDEC_ERR("ERROR: hipGetDeviceProperties for device (" + TOSTR(device_id) + + " ) failed! (" + hipGetErrorName(hip_status) + ")"); + return -1; + } + + gcn_arch_name = hip_dev_prop.gcnArchName; + std::size_t pos = gcn_arch_name.find_first_of(":"); + std::string gcn_arch_name_base + = (pos != std::string::npos) ? gcn_arch_name.substr(0, pos) : gcn_arch_name; + + // gfx90a has two GCDs as two separate devices + if(!gcn_arch_name_base.compare("gfx90a") && num_devices > 1) + { + sd = 1; + } + + std::vector> v_demuxer; + std::vector> v_viddec; + std::vector v_device_id(n_thread); + + int hip_vis_dev_count = 0; + get_env_var("HIP_VISIBLE_DEVICES", hip_vis_dev_count); + + std::size_t found_file = input_file_path.find_last_of('/'); + std::cout << "info: Input file: " << input_file_path.substr(found_file + 1) << std::endl; + std::cout << "info: Number of threads: " << n_thread << std::endl; + + for(int i = 0; i < n_thread; i++) + { + std::unique_ptr demuxer(new VideoDemuxer(input_file_path.c_str())); + rocDecVideoCodec rocdec_codec_id = AVCodec2RocDecVideoCodec(demuxer->GetCodecID()); + if(!hip_vis_dev_count) + { + if(device_id % 2 == 0) + { + v_device_id[i] = (i % 2 == 0) ? device_id : device_id + sd; + } + else + { + v_device_id[i] = (i % 2 == 0) ? device_id - sd : device_id; + } + } + else + { + v_device_id[i] = i % hip_vis_dev_count; + } + std::unique_ptr dec; + if(!backend) + { // gpu backend + dec = std::make_unique(v_device_id[i], + mem_type, + rocdec_codec_id, + b_force_zero_latency, + p_crop_rect, + false, + disp_delay); + } + else + { +#if ENABLE_HOST_DECODE + std::cout << "info: RocDecode is using CPU backend!" << std::endl; + uint32_t max_width = demuxer->GetWidth(); + uint32_t max_height = demuxer->GetHeight(); + mem_type = OUT_SURFACE_MEM_HOST_COPIED; + dec = std::make_unique(v_device_id[i], + mem_type, + rocdec_codec_id, + b_force_zero_latency, + p_crop_rect, + false, + disp_delay, + max_width, + max_height); +#else + std::cout + << "Error: RocDecode HOST library is not found and backend is not supported!" + << std::endl; + return 0; +#endif + } + + if(!dec->CodecSupported(v_device_id[i], rocdec_codec_id, demuxer->GetBitDepth())) + { + std::cerr << "Codec not supported on GPU, skipping this file!" << std::endl; + continue; + } + v_demuxer.push_back(std::move(demuxer)); + v_viddec.push_back(std::move(dec)); + } + + float total_fps = 0; + float total_fps_dec = 0; + std::vector v_thread; + std::vector v_fps, v_fps_dec; + std::vector v_frame, v_frame_dec; + v_fps.resize(n_thread, 0); + v_fps_dec.resize(n_thread, 0); + v_frame.resize(n_thread, 0); + v_frame_dec.resize(n_thread, 0); + int n_total = 0; + int n_total_dec = 0; + + std::string device_name; + int pci_bus_id, pci_domain_id, pci_device_id; + + for(int i = 0; i < n_thread; i++) + { + v_viddec[i]->GetDeviceinfo(device_name, + gcn_arch_name, + pci_bus_id, + pci_domain_id, + pci_device_id); + if(!backend) + { + std::cout << "info: stream " << i << " using GPU device " << v_device_id[i] << " - " + << device_name << "[" << gcn_arch_name << "] on PCI bus " + << std::setfill('0') << std::setw(2) << std::right << std::hex + << pci_bus_id << ":" << std::setfill('0') << std::setw(2) << std::right + << std::hex << pci_domain_id << "." << pci_device_id << std::dec + << std::endl; + } + std::cout << "info: decoding started for thread " << i << " ,please wait!" << std::endl; + } + + for(int i = 0; i < n_thread; i++) + { + v_thread.push_back(std::thread(dec_proc, + v_viddec[i].get(), + v_demuxer[i].get(), + &v_frame[i], + &v_frame_dec[i], + &v_fps[i], + &v_fps_dec[i], + max_num_frames, + mem_type)); + } + + for(int i = 0; i < n_thread; i++) + { + v_thread[i].join(); + total_fps += v_fps[i]; + total_fps_dec += v_fps_dec[i]; + n_total += v_frame[i]; + n_total_dec += v_frame_dec[i]; + } + + std::cout << "info: Total pictures decoded: " << n_total_dec << std::endl; + std::cout << "info: Total frames output/displayed: " << n_total << std::endl; + std::cout << "info: avg decoding time per picture: " << 1000 / total_fps_dec << " ms" + << std::endl; + std::cout << "info: avg decode FPS: " << total_fps_dec << std::endl; + std::cout << "info: avg output/display time per frame: " << 1000 / total_fps << " ms" + << std::endl; + std::cout << "info: avg output/display FPS: " << total_fps << std::endl; + } + catch(const std::exception& ex) + { + std::cout << ex.what() << std::endl; + exit(1); + } + + return 0; +} diff --git a/Libraries/rocDecode/video_decode_pic_files/.gitignore b/Libraries/rocDecode/video_decode_pic_files/.gitignore new file mode 100644 index 000000000..95aa9826b --- /dev/null +++ b/Libraries/rocDecode/video_decode_pic_files/.gitignore @@ -0,0 +1 @@ +rocdecode_video_decode_pic_files diff --git a/Libraries/rocDecode/video_decode_pic_files/CMakeLists.txt b/Libraries/rocDecode/video_decode_pic_files/CMakeLists.txt new file mode 100644 index 000000000..4c7c3757d --- /dev/null +++ b/Libraries/rocDecode/video_decode_pic_files/CMakeLists.txt @@ -0,0 +1,142 @@ +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +set(example_name rocdecode_video_decode_pic_files) + +cmake_minimum_required(VERSION 3.21 FATAL_ERROR) +project(${example_name} LANGUAGES CXX) + +include("../../../Common/HipPlatform.cmake") +select_gpu_language() + +enable_language(${ROCM_EXAMPLES_GPU_LANGUAGE}) +set(CMAKE_${ROCM_EXAMPLES_GPU_LANGUAGE}_STANDARD 17) +set(CMAKE_${ROCM_EXAMPLES_GPU_LANGUAGE}_EXTENSIONS OFF) +set(CMAKE_${ROCM_EXAMPLES_GPU_LANGUAGE}_STANDARD_REQUIRED ON) +select_hip_platform() +verify_hip_platform(PLATFORMS "amd") + +if(CMAKE_SYSTEM_NAME MATCHES "Windows") + message(STATUS "rocDecode examples are only available on Linux") + return() +else() + set(ROCM_ROOT + "/opt/rocm" + CACHE PATH + "Root directory of the ROCm installation" + ) +endif() + +list(APPEND CMAKE_PREFIX_PATH "${ROCM_ROOT}") + +find_package(rocdecode REQUIRED) + +# Try to find the host library directly (handles both naming conventions) +find_library(ROCDECODE_HOST_LIB + NAMES rocdecode-host rocdecodehost + PATHS ${ROCM_ROOT}/lib + NO_DEFAULT_PATH +) + +# Find FFmpeg libraries +find_library(AVCODEC_LIBRARY avcodec REQUIRED) +find_library(AVFORMAT_LIBRARY avformat REQUIRED) +find_library(AVUTIL_LIBRARY avutil REQUIRED) + +find_path(AVCODEC_INCLUDE_DIR libavcodec/avcodec.h REQUIRED) +find_path(AVFORMAT_INCLUDE_DIR libavformat/avformat.h REQUIRED) +find_path(AVUTIL_INCLUDE_DIR libavutil/avutil.h REQUIRED) + +# Check FFmpeg version for compatibility using pkg-config (same as Makefile) +execute_process( + COMMAND pkg-config --modversion libavcodec + OUTPUT_VARIABLE AVCODEC_VERSION + OUTPUT_STRIP_TRAILING_WHITESPACE + ERROR_QUIET +) + +# If pkg-config fails, try to get version from header as fallback +if(NOT AVCODEC_VERSION AND AVCODEC_INCLUDE_DIR) + file(STRINGS "${AVCODEC_INCLUDE_DIR}/libavcodec/version.h" AVCODEC_VERSION_MAJOR_LINE + REGEX "^#define LIBAVCODEC_VERSION_MAJOR") + file(STRINGS "${AVCODEC_INCLUDE_DIR}/libavcodec/version.h" AVCODEC_VERSION_MINOR_LINE + REGEX "^#define LIBAVCODEC_VERSION_MINOR") + file(STRINGS "${AVCODEC_INCLUDE_DIR}/libavcodec/version.h" AVCODEC_VERSION_MICRO_LINE + REGEX "^#define LIBAVCODEC_VERSION_MICRO") + + string(REGEX REPLACE "^#define LIBAVCODEC_VERSION_MAJOR[ \t]+([0-9]+).*$" "\\1" AVCODEC_MAJOR "${AVCODEC_VERSION_MAJOR_LINE}") + string(REGEX REPLACE "^#define LIBAVCODEC_VERSION_MINOR[ \t]+([0-9]+).*$" "\\1" AVCODEC_MINOR "${AVCODEC_VERSION_MINOR_LINE}") + string(REGEX REPLACE "^#define LIBAVCODEC_VERSION_MICRO[ \t]+([0-9]+).*$" "\\1" AVCODEC_MICRO "${AVCODEC_VERSION_MICRO_LINE}") + + set(AVCODEC_VERSION "${AVCODEC_MAJOR}.${AVCODEC_MINOR}.${AVCODEC_MICRO}") +endif() + +add_executable(${example_name} + main.cpp + ${ROCM_ROOT}/share/rocdecode/utils/rocvideodecode/roc_video_dec.cpp + ${ROCM_ROOT}/share/rocdecode/utils/ffmpegvideodecode/ffmpeg_video_dec.cpp +) + +target_link_libraries(${example_name} + PRIVATE + rocdecode::rocdecode + ${AVCODEC_LIBRARY} + ${AVFORMAT_LIBRARY} + ${AVUTIL_LIBRARY} +) + +# Link host library if found +if(ROCDECODE_HOST_LIB) + target_link_libraries(${example_name} PRIVATE ${ROCDECODE_HOST_LIB}) + target_compile_definitions(${example_name} PRIVATE ENABLE_HOST_DECODE=1) +else() + target_compile_definitions(${example_name} PRIVATE ENABLE_HOST_DECODE=0) +endif() + +target_include_directories( + ${example_name} + PRIVATE + "../../../Common" + "../../../External" + "${ROCM_ROOT}/share/rocdecode/utils" + "${ROCM_ROOT}/share/rocdecode/utils/rocvideodecode" + "${ROCM_ROOT}/share/rocdecode/utils/ffmpegvideodecode" + ${AVCODEC_INCLUDE_DIR} + ${AVFORMAT_INCLUDE_DIR} + ${AVUTIL_INCLUDE_DIR} +) + +# FFMPEG multi-version support +if(AVCODEC_VERSION VERSION_LESS_EQUAL 58.134.100) + target_compile_definitions(${example_name} PRIVATE USE_AVCODEC_GREATER_THAN_58_134=0) +else() + target_compile_definitions(${example_name} PRIVATE USE_AVCODEC_GREATER_THAN_58_134=1) +endif() + +set_source_files_properties( + main.cpp + ${ROCM_ROOT}/share/rocdecode/utils/rocvideodecode/roc_video_dec.cpp + ${ROCM_ROOT}/share/rocdecode/utils/ffmpegvideodecode/ffmpeg_video_dec.cpp + PROPERTIES LANGUAGE ${ROCM_EXAMPLES_GPU_LANGUAGE} +) + +install(TARGETS ${example_name}) diff --git a/Libraries/rocDecode/video_decode_pic_files/Makefile b/Libraries/rocDecode/video_decode_pic_files/Makefile new file mode 100644 index 000000000..387306dbc --- /dev/null +++ b/Libraries/rocDecode/video_decode_pic_files/Makefile @@ -0,0 +1,96 @@ +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +EXAMPLE := rocdecode_video_decode_pic_files +COMMON_INCLUDE_DIR := ../../../Common +EXTERNAL_DIR := ../../../External +GPU_RUNTIME := HIP + +# HIP variables +ROCM_INSTALL_DIR := /opt/rocm +UTILS_DIR := ${ROCM_INSTALL_DIR}/share/rocdecode/utils + +HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include +ROCDECODE_INCLUDE_DIR := $(HIP_INCLUDE_DIR) + +HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc + +# Detect which rocdecode host library is available +ROCDECODE_HOST_LIB := $(shell if [ -f $(ROCM_INSTALL_DIR)/lib/librocdecode-host.so ]; then \ + echo "rocdecode-host"; \ + elif [ -f $(ROCM_INSTALL_DIR)/lib/librocdecodehost.so ]; then \ + echo "rocdecodehost"; \ + fi) + +# Common variables and flags +CXX_STD := c++17 +ICXXFLAGS := -std=$(CXX_STD) +ICPPFLAGS := -isystem $(ROCDECODE_INCLUDE_DIR) -I $(COMMON_INCLUDE_DIR) -I $(EXTERNAL_DIR) -I $(UTILS_DIR) -I $(UTILS_DIR)/rocvideodecode -I $(UTILS_DIR)/ffmpegvideodecode +ILDFLAGS := -L $(ROCM_INSTALL_DIR)/lib +ILDLIBS := -lrocdecode -lavcodec -lavformat -lavutil + +ifeq ($(GPU_RUNTIME), HIP) + CXXFLAGS ?= -Wall -Wextra + CPPFLAGS += -D__HIP_PLATFORM_AMD__ + + # Add host decode library and flag if available + ifneq ($(ROCDECODE_HOST_LIB),) + ILDLIBS += -l$(ROCDECODE_HOST_LIB) + CPPFLAGS += -DENABLE_HOST_DECODE=1 + else + CPPFLAGS += -DENABLE_HOST_DECODE=0 + endif + + # FFmpeg version detection + AVCODEC_VERSION := $(shell pkg-config --modversion libavcodec 2>/dev/null || echo "0") + AVCODEC_MAJOR := $(shell echo $(AVCODEC_VERSION) | cut -d. -f1) + AVCODEC_MINOR := $(shell echo $(AVCODEC_VERSION) | cut -d. -f2) + AVCODEC_PATCH := $(shell echo $(AVCODEC_VERSION) | cut -d. -f3) + + # Compare version (58.134.100) + ifeq ($(shell test $(AVCODEC_MAJOR) -lt 58 || \ + (test $(AVCODEC_MAJOR) -eq 58 && test $(AVCODEC_MINOR) -lt 134) || \ + (test $(AVCODEC_MAJOR) -eq 58 && test $(AVCODEC_MINOR) -eq 134 && test $(AVCODEC_PATCH) -le 100); echo $$?),0) + CPPFLAGS += -DUSE_AVCODEC_GREATER_THAN_58_134=0 + else + CPPFLAGS += -DUSE_AVCODEC_GREATER_THAN_58_134=1 + endif + + COMPILER := $(HIPCXX) +else + $(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be HIP; CUDA not supported) +endif + +ICXXFLAGS += $(CXXFLAGS) +ICPPFLAGS += $(CPPFLAGS) +ILDFLAGS += $(LDFLAGS) +ILDLIBS += $(LDLIBS) + +SOURCES := main.cpp $(UTILS_DIR)/rocvideodecode/roc_video_dec.cpp $(UTILS_DIR)/ffmpegvideodecode/ffmpeg_video_dec.cpp + +$(EXAMPLE): $(SOURCES) $(COMMON_INCLUDE_DIR)/example_utils.hpp $(COMMON_INCLUDE_DIR)/rocdecode_utils.hpp + $(COMPILER) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $(SOURCES) $(ILDLIBS) + +clean: + $(RM) $(EXAMPLE) + +.PHONY: clean diff --git a/Libraries/rocDecode/video_decode_pic_files/README.md b/Libraries/rocDecode/video_decode_pic_files/README.md new file mode 100644 index 000000000..3200c80f0 --- /dev/null +++ b/Libraries/rocDecode/video_decode_pic_files/README.md @@ -0,0 +1,115 @@ +# rocDecode Picture File Extraction + +## Description + +This example demonstrates extracting individual decoded frames from a video file and saving them as separate picture files. The sample decodes a video stream using the rocDecode library and writes each decoded frame to disk as an individual YUV file, making it useful for frame-by-frame analysis, quality inspection, or creating image sequences from video content. + +## Application Flow + +1. Parse command-line arguments for input file, output directory, device ID, and frame options. +2. Initialize the FFMPEG video demuxer to extract codec information. +3. Create the video decoder instance with the specified codec and device. +4. Verify codec support on the selected GPU device. +5. Create output directory if it doesn't exist. +6. Loop through video stream: + - Demux video packets from input file. + - Decode frames using hardware acceleration. + - Retrieve each decoded frame. + - Save frame to individual file with sequential naming (e.g., frame_0000.yuv, frame_0001.yuv). + - Release frame back to decoder. +7. Display total number of frames extracted. +8. Clean up decoder and demuxer resources. + +## Key APIs and Concepts + +- **Frame Extraction**: Each decoded frame is saved as a separate file: + - Frames are saved in raw YUV format (NV12, P016, YUV444, etc.). + - Sequential file naming for easy frame identification. + - Preserves original video quality without re-encoding. + - Useful for frame-by-frame analysis and quality verification. + +- **File Naming Convention**: Frames are saved with zero-padded sequential numbers: + - Format: `frame_XXXX.yuv` where XXXX is the frame number. + - Zero-padding ensures proper alphabetical sorting. + - Frame number corresponds to decode order. + +- **Output Format**: Frames are saved in raw YUV format: + - Maintains the decoder's native output format (NV12, P016, etc.). + - No color space conversion or format transformation. + - Includes luma and chroma planes in planar or semi-planar layout. + - File size depends on resolution and bit depth. + +- **Decoder Configuration**: Standard decoder setup with: + - `rocDecCreateDecoder()`: Initializes decoder with codec parameters. + - Output surface memory type configured for frame retrieval. + - Optional crop rectangle for region-of-interest extraction. + +- **Frame Processing**: Each frame is processed individually: + - `rocDecGetVideoFrame()`: Retrieves decoded frame data. + - Frame data is copied from device memory to host memory. + - Written to disk as raw binary data. + - Frame is released back to decoder surface pool. + +- **Use Cases**: + - Creating image sequences from video files. + - Frame-by-frame quality analysis and inspection. + - Extracting specific frames for thumbnails or previews. + - Debugging video decode issues. + - Preparing training data for machine learning applications. + +## Demonstrated API Calls + +### rocDecode APIs + +- `rocDecCreateDecoder` +- `rocDecDecodeFrame` +- `rocDecGetVideoFrame` +- `rocDecGetDecodeStatus` +- `rocDecDestroyDecoder` +- `rocDecCreateVideoParser` +- `rocDecParseVideoData` +- `rocDecDestroyVideoParser` +- `rocDecGetErrorName` + +### HIP Runtime APIs + +- `hipSetDevice` +- `hipGetDeviceProperties` +- `hipMalloc` +- `hipFree` +- `hipMemcpy` +- `hipMemcpyDtoH` + +### FFMPEG APIs + +- `avformat_open_input` +- `avformat_find_stream_info` +- `av_find_best_stream` +- `av_read_frame` +- `av_packet_alloc` +- `av_packet_free` +- `av_packet_unref` +- `avformat_close_input` +- `av_bsf_get_by_name` +- `av_bsf_alloc` +- `av_bsf_init` +- `av_bsf_send_packet` +- `av_bsf_receive_packet` +- `av_bsf_free` + +### Data Types and Enums + +- `rocDecDecoderHandle` +- `RocdecVideoParser` +- `rocDecVideoCodec` +- `rocDecVideoSurfaceFormat` +- `rocDecVideoChromaFormat` +- `rocDecDecoderCreateInfo` +- `RocdecParserParams` +- `RocdecVideoFormat` +- `RocdecPicParams` +- `RocdecParserDispInfo` +- `rocDecDecodeStatus` +- `AVCodecID` +- `AVFormatContext` +- `AVPacket` diff --git a/Libraries/rocDecode/video_decode_pic_files/main.cpp b/Libraries/rocDecode/video_decode_pic_files/main.cpp new file mode 100644 index 000000000..fb3ef4886 --- /dev/null +++ b/Libraries/rocDecode/video_decode_pic_files/main.cpp @@ -0,0 +1,412 @@ +/* +Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "roc_video_dec.h" +#include "video_demuxer.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if ENABLE_HOST_DECODE + #include "ffmpeg_video_dec.h" +#endif + +#include "CmdParser/cmdparser.hpp" +#include "example_utils.hpp" +#include "rocdecode_utils.hpp" + +int main(int argc, char** argv) +{ + // Parse command-line arguments + cli::Parser parser(argc, argv); + parser.set_optional>("i", + "input", + {}, + "Input picture files (required)"); + parser.set_optional("codec", + "codec", + 0, + "Codec type (0: HEVC, 1: AVC; 2: AV1; 3: VP9) - required"); + parser.set_optional("l", "iterations", 1, "Number of iterations"); + parser.set_optional("o", + "output", + "", + "Output file path - dumps output if requested"); + parser.set_optional("d", "device", 0, "GPU device ID"); + parser.set_optional("backend", + "backend", + 0, + "Backend (0 for GPU, 1 CPU-FFMpeg, 2 CPU-FFMpeg No threading)"); + parser.set_optional( + "f", + "frames", + 0, + "Number of decoded frames - specify the number of pictures to be decoded"); + parser.set_optional( + "z", + "zero_latency", + false, + "Force zero latency (decoded frames will be flushed out for display immediately)"); + parser.set_optional("disp_delay", + "disp_delay", + 1, + "Specify the number of frames to be delayed for display"); + parser.set_optional("md5", + "md5", + false, + "Generate MD5 message digest on the decoded YUV image sequence"); + parser.set_optional( + "md5_check", + "md5_check", + "", + "MD5 file path - generate MD5 message digest and compare to reference"); + parser.set_optional("crop", + "crop", + "", + "Crop rectangle for output (left,top,right,bottom)"); + parser.set_optional("m", + "mem_type", + 0, + "Output surface memory type (0: DEV_INTERNAL, 1: DEV_COPIED, 2: " + "HOST_COPIED, 3: NOT_MAPPED)"); + parser.run_and_exit_if_error(); + + // Get parameters + std::vector file_names = parser.get>("i"); + int codec_type = parser.get("codec"); + int num_iterations = parser.get("l"); + std::string output_file_path = parser.get("o"); + int device_id = parser.get("d"); + int backend = parser.get("backend"); + uint32_t num_decoded_frames = parser.get("f"); + bool b_force_zero_latency = parser.get("z"); + int disp_delay = parser.get("disp_delay"); + bool b_generate_md5 = parser.get("md5"); + std::string md5_file_path = parser.get("md5_check"); + std::string crop_str = parser.get("crop"); + OutputSurfaceMemoryType mem_type = static_cast(parser.get("m")); + + if(file_names.empty()) + { + std::cerr << "Error: Input files are required. Use -i option." << std::endl; + return 1; + } + + int dump_output_frames = output_file_path.empty() ? 0 : 1; + bool b_md5_check = !md5_file_path.empty(); + if(b_md5_check) + { + b_generate_md5 = true; + } + bool b_extract_sei_messages = false; + + Rect crop_rect = {}; + Rect* p_crop_rect = nullptr; + if(!crop_str.empty()) + { + if(4 + != sscanf(crop_str.c_str(), + "%d,%d,%d,%d", + &crop_rect.left, + &crop_rect.top, + &crop_rect.right, + &crop_rect.bottom)) + { + std::cerr << "Error: Invalid crop rectangle format. Use: left,top,right,bottom" + << std::endl; + return 1; + } + if((crop_rect.right - crop_rect.left) % 2 == 1 + || (crop_rect.bottom - crop_rect.top) % 2 == 1) + { + std::cout << "output crop rectangle must have width and height of even numbers" + << std::endl; + return 1; + } + p_crop_rect = &crop_rect; + } + + try + { + std::cout << "Total frame number = " << file_names.size() << std::endl; + rocDecVideoCodec rocdec_codec_id; + switch(codec_type) + { + case 0: rocdec_codec_id = rocDecVideoCodec_HEVC; break; + case 1: rocdec_codec_id = rocDecVideoCodec_AVC; break; + case 2: rocdec_codec_id = rocDecVideoCodec_AV1; break; + case 3: rocdec_codec_id = rocDecVideoCodec_VP9; break; + default: std::cerr << "Unsupported stream codec type." << std::endl; return 1; + } + + RocVideoDecoder* viddec; + if(!backend) + { + // gpu backend + viddec = new RocVideoDecoder(device_id, + mem_type, + rocdec_codec_id, + b_force_zero_latency, + p_crop_rect, + b_extract_sei_messages, + disp_delay); + } + else + { +#if ENABLE_HOST_DECODE + std::cout << "info: RocDecode is using CPU backend!" << std::endl; + if(mem_type == OUT_SURFACE_MEM_DEV_INTERNAL) + { + mem_type + = OUT_SURFACE_MEM_DEV_COPIED; // mem_type internal is not supported in this mode + } + if(backend == 1) + { + viddec = new FFMpegVideoDecoder(device_id, + mem_type, + rocdec_codec_id, + b_force_zero_latency, + p_crop_rect, + b_extract_sei_messages, + disp_delay); + } + else + { + viddec = new FFMpegVideoDecoder(device_id, + mem_type, + rocdec_codec_id, + b_force_zero_latency, + p_crop_rect, + b_extract_sei_messages, + disp_delay, + true); + } +#else + std::cerr << "Error: CPU backend not enabled. Rebuild with ENABLE_HOST_DECODE=1" + << std::endl; + return 1; +#endif + } + + std::string device_name, gcn_arch_name; + int pci_bus_id, pci_domain_id, pci_device_id; + + viddec->GetDeviceinfo(device_name, gcn_arch_name, pci_bus_id, pci_domain_id, pci_device_id); + std::cout << "info: Using GPU device " << device_id << " - " << device_name << "[" + << gcn_arch_name << "] on PCI bus " << std::setfill('0') << std::setw(2) + << std::right << std::hex << pci_bus_id << ":" << std::setfill('0') + << std::setw(2) << std::right << std::hex << pci_domain_id << "." << pci_device_id + << std::dec << std::endl; + std::cout << "info: decoding started, please wait!" << std::endl; + + int n_video_bytes = 0, n_frame_returned = 0, n_frame = 0; + int n_pic_decoded = 0, decoded_pics = 0; + std::vector bitstream(5 * 1024 * 1024); + int pkg_flags = 0; + uint8_t* pframe = nullptr; + int64_t pts = 0; + OutputSurfaceInfo* surf_info; + double total_dec_time = 0; + MD5Generator* md5_generator = nullptr; + + // initialize reconfigure params + ReconfigParams reconfig_params = {}; + reconfig_dump_file_struct reconfig_user_struct = {}; + reconfig_params.p_fn_reconfigure_flush = reconfigure_flush_callback; + reconfig_user_struct.b_dump_frames_to_file = dump_output_frames; + reconfig_user_struct.output_file_name = output_file_path; + reconfig_params.reconfig_flush_mode = RECONFIG_FLUSH_MODE_NONE; + if(dump_output_frames) + { + reconfig_params.reconfig_flush_mode |= RECONFIG_FLUSH_MODE_DUMP_TO_FILE; + } + if(b_generate_md5) + { + reconfig_params.reconfig_flush_mode |= RECONFIG_FLUSH_MODE_CALCULATE_MD5; + } + reconfig_params.p_reconfig_user_struct = &reconfig_user_struct; + + if(b_generate_md5) + { + md5_generator = new MD5Generator(); + md5_generator->InitMd5(); + reconfig_user_struct.md5_generator_handle = static_cast(md5_generator); + } + viddec->SetReconfigParams(&reconfig_params); + + for(int i = 0; i < num_iterations; i++) + { + int num_frames_decoded_in_loop = 0; + pkg_flags = 0; + for(const auto& file_name : file_names) + { + std::ifstream in_file(file_name, std::ios::binary); + if(!in_file) + { + std::cerr << "Error: Failed to open " << file_name << " for reading." + << std::endl; + return 1; + } + in_file.seekg(0, std::ios::end); + n_video_bytes = in_file.tellg(); + if(static_cast(n_video_bytes) > bitstream.size()) + { + bitstream.resize(n_video_bytes); + } + in_file.seekg(0, std::ios::beg); + if(!in_file.read(reinterpret_cast(bitstream.data()), n_video_bytes)) + { + std::cerr << "Error: Failed to read " << file_name << "." << std::endl; + return 1; + } + in_file.close(); + + auto start_time = std::chrono::high_resolution_clock::now(); + if(static_cast(num_frames_decoded_in_loop + 1) == file_names.size()) + { + pkg_flags |= ROCDEC_PKT_ENDOFSTREAM; + } + n_frame_returned = viddec->DecodeFrame(bitstream.data(), + n_video_bytes, + pkg_flags, + pts, + &decoded_pics); + num_frames_decoded_in_loop++; + + if(!n_frame && !viddec->GetOutputSurfaceInfo(&surf_info)) + { + std::cerr << "Error: Failed to get Output Surface Info!" << std::endl; + break; + } + for(int j = 0; j < n_frame_returned; j++) + { + pframe = viddec->GetFrame(&pts); + if(b_generate_md5) + { + md5_generator->UpdateMd5ForFrame(pframe, surf_info); + } + if(dump_output_frames && mem_type != OUT_SURFACE_MEM_NOT_MAPPED) + { + viddec->SaveFrameToFile(output_file_path, pframe, surf_info); + } + viddec->ReleaseFrame(pts); + } + auto end_time = std::chrono::high_resolution_clock::now(); + auto time_per_decode + = std::chrono::duration(end_time - start_time).count(); + total_dec_time += time_per_decode; + n_frame += n_frame_returned; + n_pic_decoded += decoded_pics; + if(num_decoded_frames && static_cast(n_frame) >= num_decoded_frames) + { + break; + } + } + } + n_frame += viddec->GetNumOfFlushedFrames(); + + std::cout << "info: Total pictures decoded: " << n_pic_decoded << std::endl; + std::cout << "info: Total frames output/displayed: " << n_frame << std::endl; + if(!dump_output_frames) + { + std::cout << "info: avg decoding time per picture: " << total_dec_time / n_pic_decoded + << " ms" << std::endl; + std::cout << "info: avg decode FPS: " << (n_pic_decoded / total_dec_time) * 1000 + << std::endl; + std::cout << "info: avg output/display time per frame: " << total_dec_time / n_frame + << " ms" << std::endl; + std::cout << "info: avg output/display FPS: " << (n_frame / total_dec_time) * 1000 + << std::endl; + } + else + { + if(mem_type == OUT_SURFACE_MEM_NOT_MAPPED) + { + std::cout << "info: saving frames with -m 3 option is not supported!" << std::endl; + } + else + { + std::cout << "info: saved frames into " << output_file_path << std::endl; + } + } + if(b_generate_md5) + { + uint8_t* digest; + md5_generator->FinalizeMd5(&digest); + std::cout << "MD5 message digest: "; + for(int i = 0; i < 16; i++) + { + std::cout << std::setfill('0') << std::setw(2) << std::hex + << static_cast(digest[i]); + } + std::cout << std::endl; + if(b_md5_check) + { + std::string ref_md5_string(33, 0); + uint8_t ref_md5[16]; + std::ifstream ref_md5_file(md5_file_path.c_str(), std::ios::in); + if(!ref_md5_file) + { + std::cerr << "Failed to open MD5 file." << std::endl; + return 1; + } + ref_md5_file.getline(ref_md5_string.data(), ref_md5_string.length()); + if(!ref_md5_file) + { + std::cerr << "Failed to read MD5 digest string." << std::endl; + return 1; + } + for(int i = 0; i < 16; i++) + { + std::string part = ref_md5_string.substr(i * 2, 2); + ref_md5[i] = std::stoi(part, nullptr, 16); + } + if(memcmp(digest, ref_md5, 16) == 0) + { + std::cout << "MD5 digest matches the reference MD5 digest: "; + } + else + { + std::cout << "MD5 digest does not match the reference MD5 digest: "; + } + std::cout << ref_md5_string.c_str() << std::endl; + ref_md5_file.close(); + } + delete md5_generator; + } + + delete viddec; + } + catch(const std::exception& ex) + { + std::cout << ex.what() << std::endl; + return 1; + } + + return 0; +} diff --git a/Libraries/rocDecode/video_decode_raw/.gitignore b/Libraries/rocDecode/video_decode_raw/.gitignore new file mode 100644 index 000000000..cf11e80d3 --- /dev/null +++ b/Libraries/rocDecode/video_decode_raw/.gitignore @@ -0,0 +1 @@ +rocdecode_video_decode_raw diff --git a/Libraries/rocDecode/video_decode_raw/CMakeLists.txt b/Libraries/rocDecode/video_decode_raw/CMakeLists.txt new file mode 100644 index 000000000..0abeaf508 --- /dev/null +++ b/Libraries/rocDecode/video_decode_raw/CMakeLists.txt @@ -0,0 +1,126 @@ +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +set(example_name rocdecode_video_decode_raw) + +cmake_minimum_required(VERSION 3.21 FATAL_ERROR) +project(${example_name} LANGUAGES CXX) + +include("../../../Common/HipPlatform.cmake") +select_gpu_language() + +enable_language(${ROCM_EXAMPLES_GPU_LANGUAGE}) +set(CMAKE_${ROCM_EXAMPLES_GPU_LANGUAGE}_STANDARD 17) +set(CMAKE_${ROCM_EXAMPLES_GPU_LANGUAGE}_EXTENSIONS OFF) +set(CMAKE_${ROCM_EXAMPLES_GPU_LANGUAGE}_STANDARD_REQUIRED ON) +select_hip_platform() +verify_hip_platform(PLATFORMS "amd") + +if(CMAKE_SYSTEM_NAME MATCHES "Windows") + message(STATUS "rocDecode examples are only available on Linux") + return() +else() + set(ROCM_ROOT + "/opt/rocm" + CACHE PATH + "Root directory of the ROCm installation" + ) +endif() + +list(APPEND CMAKE_PREFIX_PATH "${ROCM_ROOT}") + +find_package(rocdecode REQUIRED) + +# Find FFmpeg libraries +find_library(AVCODEC_LIBRARY avcodec REQUIRED) +find_library(AVFORMAT_LIBRARY avformat REQUIRED) +find_library(AVUTIL_LIBRARY avutil REQUIRED) +find_library(SWRESAMPLE_LIBRARY swresample REQUIRED) + +find_path(AVCODEC_INCLUDE_DIR libavcodec/avcodec.h REQUIRED) +find_path(AVFORMAT_INCLUDE_DIR libavformat/avformat.h REQUIRED) +find_path(AVUTIL_INCLUDE_DIR libavutil/avutil.h REQUIRED) + +# Check FFmpeg version for compatibility using pkg-config (same as Makefile) +execute_process( + COMMAND pkg-config --modversion libavcodec + OUTPUT_VARIABLE AVCODEC_VERSION + OUTPUT_STRIP_TRAILING_WHITESPACE + ERROR_QUIET +) + +# If pkg-config fails, try to get version from header as fallback +if(NOT AVCODEC_VERSION AND AVCODEC_INCLUDE_DIR) + file(STRINGS "${AVCODEC_INCLUDE_DIR}/libavcodec/version.h" AVCODEC_VERSION_MAJOR_LINE + REGEX "^#define LIBAVCODEC_VERSION_MAJOR") + file(STRINGS "${AVCODEC_INCLUDE_DIR}/libavcodec/version.h" AVCODEC_VERSION_MINOR_LINE + REGEX "^#define LIBAVCODEC_VERSION_MINOR") + file(STRINGS "${AVCODEC_INCLUDE_DIR}/libavcodec/version.h" AVCODEC_VERSION_MICRO_LINE + REGEX "^#define LIBAVCODEC_VERSION_MICRO") + + string(REGEX REPLACE "^#define LIBAVCODEC_VERSION_MAJOR[ \t]+([0-9]+).*$" "\\1" AVCODEC_MAJOR "${AVCODEC_VERSION_MAJOR_LINE}") + string(REGEX REPLACE "^#define LIBAVCODEC_VERSION_MINOR[ \t]+([0-9]+).*$" "\\1" AVCODEC_MINOR "${AVCODEC_VERSION_MINOR_LINE}") + string(REGEX REPLACE "^#define LIBAVCODEC_VERSION_MICRO[ \t]+([0-9]+).*$" "\\1" AVCODEC_MICRO "${AVCODEC_VERSION_MICRO_LINE}") + + set(AVCODEC_VERSION "${AVCODEC_MAJOR}.${AVCODEC_MINOR}.${AVCODEC_MICRO}") +endif() + +add_executable(${example_name} + main.cpp + ${ROCM_ROOT}/share/rocdecode/utils/rocvideodecode/roc_video_dec.cpp +) + +target_link_libraries(${example_name} + PRIVATE + rocdecode::rocdecode + ${AVCODEC_LIBRARY} + ${AVFORMAT_LIBRARY} + ${AVUTIL_LIBRARY} + ${SWRESAMPLE_LIBRARY} +) + +target_include_directories( + ${example_name} + PRIVATE + "../../../Common" + "../../../External" + "${ROCM_ROOT}/share/rocdecode/utils" + "${ROCM_ROOT}/share/rocdecode/utils/rocvideodecode" + ${AVCODEC_INCLUDE_DIR} + ${AVFORMAT_INCLUDE_DIR} + ${AVUTIL_INCLUDE_DIR} +) + +# FFMPEG multi-version support +if(AVCODEC_VERSION VERSION_LESS_EQUAL 58.134.100) + target_compile_definitions(${example_name} PRIVATE USE_AVCODEC_GREATER_THAN_58_134=0) +else() + target_compile_definitions(${example_name} PRIVATE USE_AVCODEC_GREATER_THAN_58_134=1) +endif() + +set_source_files_properties( + main.cpp + ${ROCM_ROOT}/share/rocdecode/utils/rocvideodecode/roc_video_dec.cpp + PROPERTIES LANGUAGE ${ROCM_EXAMPLES_GPU_LANGUAGE} +) + +install(TARGETS ${example_name}) diff --git a/Libraries/rocDecode/video_decode_raw/Makefile b/Libraries/rocDecode/video_decode_raw/Makefile new file mode 100644 index 000000000..8bd1c7a19 --- /dev/null +++ b/Libraries/rocDecode/video_decode_raw/Makefile @@ -0,0 +1,81 @@ +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +EXAMPLE := rocdecode_video_decode_raw +COMMON_INCLUDE_DIR := ../../../Common +EXTERNAL_DIR := ../../../External +GPU_RUNTIME := HIP + +# HIP variables +ROCM_INSTALL_DIR := /opt/rocm +UTILS_DIR := ${ROCM_INSTALL_DIR}/share/rocdecode/utils + +HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include +ROCDECODE_INCLUDE_DIR := $(HIP_INCLUDE_DIR) + +HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc + +# Common variables and flags +CXX_STD := c++17 +ICXXFLAGS := -std=$(CXX_STD) +ICPPFLAGS := -isystem $(ROCDECODE_INCLUDE_DIR) -I $(COMMON_INCLUDE_DIR) -I $(EXTERNAL_DIR) -I $(UTILS_DIR) -I $(UTILS_DIR)/rocvideodecode +ILDFLAGS := -L $(ROCM_INSTALL_DIR)/lib +ILDLIBS := -lrocdecode -lavcodec -lavformat -lavutil -lswresample + +ifeq ($(GPU_RUNTIME), HIP) + CXXFLAGS ?= -Wall -Wextra + CPPFLAGS += -D__HIP_PLATFORM_AMD__ + + # FFmpeg version detection + AVCODEC_VERSION := $(shell pkg-config --modversion libavcodec 2>/dev/null || echo "0") + AVCODEC_MAJOR := $(shell echo $(AVCODEC_VERSION) | cut -d. -f1) + AVCODEC_MINOR := $(shell echo $(AVCODEC_VERSION) | cut -d. -f2) + AVCODEC_PATCH := $(shell echo $(AVCODEC_VERSION) | cut -d. -f3) + + # Compare version (58.134.100) + ifeq ($(shell test $(AVCODEC_MAJOR) -lt 58 || \ + (test $(AVCODEC_MAJOR) -eq 58 && test $(AVCODEC_MINOR) -lt 134) || \ + (test $(AVCODEC_MAJOR) -eq 58 && test $(AVCODEC_MINOR) -eq 134 && test $(AVCODEC_PATCH) -le 100); echo $$?),0) + CPPFLAGS += -DUSE_AVCODEC_GREATER_THAN_58_134=0 + else + CPPFLAGS += -DUSE_AVCODEC_GREATER_THAN_58_134=1 + endif + + COMPILER := $(HIPCXX) +else + $(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be HIP; CUDA not supported) +endif + +ICXXFLAGS += $(CXXFLAGS) +ICPPFLAGS += $(CPPFLAGS) +ILDFLAGS += $(LDFLAGS) +ILDLIBS += $(LDLIBS) + +SOURCES := main.cpp $(UTILS_DIR)/rocvideodecode/roc_video_dec.cpp + +$(EXAMPLE): $(SOURCES) $(COMMON_INCLUDE_DIR)/example_utils.hpp $(COMMON_INCLUDE_DIR)/rocdecode_utils.hpp + $(COMPILER) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $(SOURCES) $(ILDLIBS) + +clean: + $(RM) $(EXAMPLE) + +.PHONY: clean diff --git a/Libraries/rocDecode/video_decode_raw/README.md b/Libraries/rocDecode/video_decode_raw/README.md new file mode 100644 index 000000000..67607478f --- /dev/null +++ b/Libraries/rocDecode/video_decode_raw/README.md @@ -0,0 +1,93 @@ +# rocDecode Raw Bitstream Decode + +## Description + +This example demonstrates video decoding using the rocDecode library's built-in bitstream reader instead of FFMPEG demuxer. The sample reads raw video bitstreams directly and decodes them using hardware acceleration, providing an alternative to FFMPEG-based demuxing. This approach is useful for scenarios where FFMPEG is not available or when working with raw elementary streams. + +## Application Flow + +1. Parse command-line arguments for input file, device ID, and output options. +2. Create the built-in bitstream reader using `rocDecCreateBitstreamReader()`. +3. Extract codec information from the bitstream. +4. Create the video decoder instance with the detected codec. +5. Verify codec support on the selected GPU device. +6. Loop through bitstream: + - Read video data packets using the bitstream reader. + - Parse packets using the video parser. + - Decode frames through parser callbacks. + - Retrieve decoded frames. + - Optionally save frames to output file. +7. Display decoding statistics. +8. Destroy bitstream reader and clean up decoder resources. + +## Key APIs and Concepts + +- **Built-in Bitstream Reader**: rocDecode provides a native bitstream reader: + - `rocDecCreateBitstreamReader()`: Creates a reader for raw elementary streams. + - `rocDecDestroyBitstreamReader()`: Releases bitstream reader resources. + - Eliminates dependency on FFMPEG for simple decode scenarios. + - Supports common video codecs (H.264, HEVC, VP9, AV1). + +- **Raw Elementary Stream Processing**: The bitstream reader handles: + - Direct reading of compressed video data. + - Automatic detection of codec type from bitstream. + - Extraction of sequence parameters and frame data. + - No container format parsing required. + +- **Decoder Integration**: The decoder works with the bitstream reader: + - `rocDecCreateDecoder()`: Initializes decoder with codec from bitstream. + - `rocDecParseVideoData()`: Parses data provided by bitstream reader. + - `rocDecDecodeFrame()`: Decodes frames using hardware acceleration. + - Same decode pipeline as FFMPEG-based samples. + +- **Use Cases**: + - Decoding raw elementary streams without container formats. + - Embedded systems where FFMPEG is not available. + - Custom video processing pipelines. + - Testing decoder with raw bitstream data. + - Scenarios requiring minimal dependencies. + +- **Limitations**: + - Only supports elementary streams (no container formats like MP4, MKV). + - Limited metadata extraction compared to FFMPEG. + - No support for multiplexed audio/video streams. + - Codec must be detectable from bitstream headers. + +## Demonstrated API Calls + +### rocDecode APIs + +- `rocDecCreateDecoder` +- `rocDecDecodeFrame` +- `rocDecGetVideoFrame` +- `rocDecGetDecodeStatus` +- `rocDecDestroyDecoder` +- `rocDecCreateVideoParser` +- `rocDecParseVideoData` +- `rocDecDestroyVideoParser` +- `rocDecCreateBitstreamReader` +- `rocDecDestroyBitstreamReader` +- `rocDecGetErrorName` + +### HIP Runtime APIs + +- `hipSetDevice` +- `hipGetDeviceProperties` +- `hipMalloc` +- `hipFree` +- `hipMemcpy` +- `hipMemcpyDtoH` + +### Data Types and Enums + +- `rocDecDecoderHandle` +- `RocdecVideoParser` +- `rocDecVideoCodec` +- `rocDecVideoSurfaceFormat` +- `rocDecVideoChromaFormat` +- `rocDecDecoderCreateInfo` +- `RocdecParserParams` +- `RocdecVideoFormat` +- `RocdecPicParams` +- `RocdecParserDispInfo` +- `rocDecDecodeStatus` diff --git a/Libraries/rocDecode/video_decode_raw/main.cpp b/Libraries/rocDecode/video_decode_raw/main.cpp new file mode 100644 index 000000000..8aa63ff0f --- /dev/null +++ b/Libraries/rocDecode/video_decode_raw/main.cpp @@ -0,0 +1,293 @@ +/* +Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if __cplusplus >= 201703L && __has_include() + #include +#else + #include +#endif + +#include "rocdecode/roc_bitstream_reader.h" +#include "roc_video_dec.h" + +#include "CmdParser/cmdparser.hpp" +#include "example_utils.hpp" +#include "rocdecode_utils.hpp" + +int main(int argc, char** argv) +{ + // Parse command-line arguments + cli::Parser parser(argc, argv); + parser.set_optional("i", "input", "", "Input file path (required)"); + parser.set_optional("o", + "output", + "", + "Output file path - dumps output if requested"); + parser.set_optional("d", + "device", + 0, + "GPU device ID (0 for the first device, 1 for the second, etc.)"); + parser.set_optional("f", + "frames", + 0, + "Number of decoded frames - specify the number of pictures to be " + "decoded (0 = decode entire stream)"); + parser.set_optional( + "z", + "zero_latency", + false, + "Force zero latency (decoded frames will be flushed out for display immediately)"); + parser.set_optional("disp_delay", + "display_delay", + 1, + "Specify the number of frames to be delayed for display"); + parser.set_optional("sei", "extract_sei", false, "Extract SEI messages"); + parser.set_optional("crop", + "crop_rect", + "", + "Crop rectangle for output (format: left,top,right,bottom)"); + parser.set_optional("m", + "memory_type", + 0, + "Output surface memory type [0: DEV_INTERNAL, 1: DEV_COPIED, 2: " + "HOST_COPIED, 3: NOT_MAPPED]"); + parser.run_and_exit_if_error(); + + // Get parameters + std::string input_file_path = parser.get("i"); + if(input_file_path.empty()) + { + std::cerr << "Error: Input file path is required (-i option)" << std::endl; + return 1; + } + + std::string output_file_path = parser.get("o"); + int dump_output_frames = output_file_path.empty() ? 0 : 1; + int device_id = parser.get("d"); + int disp_delay = parser.get("disp_delay"); + bool b_force_zero_latency = parser.get("z"); + bool b_extract_sei_messages = parser.get("sei"); + uint32_t num_decoded_frames = parser.get("f"); + OutputSurfaceMemoryType mem_type = static_cast(parser.get("m")); + + // Parse crop rectangle if provided + Rect crop_rect = {}; + Rect* p_crop_rect = nullptr; + std::string crop_str = parser.get("crop"); + if(!crop_str.empty()) + { + if(4 + != sscanf(crop_str.c_str(), + "%d,%d,%d,%d", + &crop_rect.left, + &crop_rect.top, + &crop_rect.right, + &crop_rect.bottom)) + { + std::cerr << "Error: Invalid crop rectangle format. Use: left,top,right,bottom" + << std::endl; + return 1; + } + if((crop_rect.right - crop_rect.left) % 2 == 1 + || (crop_rect.bottom - crop_rect.top) % 2 == 1) + { + std::cerr << "Error: Output crop rectangle must have width and height of even numbers" + << std::endl; + return 1; + } + p_crop_rect = &crop_rect; + } + + try + { + std::size_t found_file = input_file_path.find_last_of('/'); + std::cout << "info: Input file: " << input_file_path.substr(found_file + 1) << std::endl; + std::cout << "info: Using built-in bitstream reader" << std::endl; + RocdecBitstreamReader bs_reader = nullptr; + rocDecVideoCodec rocdec_codec_id; + int bit_depth; + if(rocDecCreateBitstreamReader(&bs_reader, input_file_path.c_str()) != ROCDEC_SUCCESS) + { + std::cerr << "Failed to create the bitstream reader." << std::endl; + return 1; + } + if(rocDecGetBitstreamCodecType(bs_reader, &rocdec_codec_id) != ROCDEC_SUCCESS) + { + std::cerr << "Failed to get stream codec type." << std::endl; + return 1; + } + if(rocdec_codec_id >= rocDecVideoCodec_NumCodecs) + { + std::cerr + << "Unsupported stream file type or codec type by the bitstream reader. Exiting." + << std::endl; + return 1; + } + if(rocDecGetBitstreamBitDepth(bs_reader, &bit_depth) != ROCDEC_SUCCESS) + { + std::cerr << "Failed to get stream bit depth." << std::endl; + return 1; + } + + RocVideoDecoder viddec(device_id, + mem_type, + rocdec_codec_id, + b_force_zero_latency, + p_crop_rect, + b_extract_sei_messages, + disp_delay); + if(!viddec.CodecSupported(device_id, rocdec_codec_id, bit_depth)) + { + std::cerr << "GPU doesn't support codec!" << std::endl; + return 0; + } + std::string device_name, gcn_arch_name; + int pci_bus_id, pci_domain_id, pci_device_id; + + viddec.GetDeviceinfo(device_name, gcn_arch_name, pci_bus_id, pci_domain_id, pci_device_id); + std::cout << "info: Using GPU device " << device_id << " - " << device_name << "[" + << gcn_arch_name << "] on PCI bus " << std::setfill('0') << std::setw(2) + << std::right << std::hex << pci_bus_id << ":" << std::setfill('0') + << std::setw(2) << std::right << std::hex << pci_domain_id << "." << pci_device_id + << std::dec << std::endl; + std::cout << "info: decoding started, please wait!" << std::endl; + + int n_video_bytes = 0, n_frame_returned = 0, n_frame = 0; + int n_pic_decoded = 0, decoded_pics = 0; + uint8_t* pvideo = nullptr; + int pkg_flags = 0; + uint8_t* pframe = nullptr; + int64_t pts = 0; + OutputSurfaceInfo* surf_info; + double total_dec_time = 0; + + // Initialize reconfigure params + ReconfigParams reconfig_params = {}; + reconfig_dump_file_struct reconfig_user_struct = {}; + reconfig_params.p_fn_reconfigure_flush = reconfigure_flush_callback; + reconfig_user_struct.b_dump_frames_to_file = dump_output_frames; + reconfig_user_struct.output_file_name = output_file_path; + if(dump_output_frames) + { + reconfig_params.reconfig_flush_mode |= RECONFIG_FLUSH_MODE_DUMP_TO_FILE; + } + else + { + reconfig_params.reconfig_flush_mode = RECONFIG_FLUSH_MODE_NONE; + } + reconfig_params.p_reconfig_user_struct = &reconfig_user_struct; + + viddec.SetReconfigParams(&reconfig_params); + + do + { + auto start_time = std::chrono::high_resolution_clock::now(); + if(rocDecGetBitstreamPicData(bs_reader, &pvideo, &n_video_bytes, &pts) + != ROCDEC_SUCCESS) + { + std::cerr << "Failed to get picture data." << std::endl; + return 1; + } + // Treat 0 bitstream size as end of stream indicator + if(n_video_bytes == 0) + { + pkg_flags |= ROCDEC_PKT_ENDOFSTREAM; + } + n_frame_returned + = viddec.DecodeFrame(pvideo, n_video_bytes, pkg_flags, pts, &decoded_pics); + + if(!n_frame && !viddec.GetOutputSurfaceInfo(&surf_info)) + { + std::cerr << "Error: Failed to get Output Surface Info!" << std::endl; + break; + } + for(int i = 0; i < n_frame_returned; i++) + { + pframe = viddec.GetFrame(&pts); + if(dump_output_frames && mem_type != OUT_SURFACE_MEM_NOT_MAPPED) + { + viddec.SaveFrameToFile(output_file_path, pframe, surf_info); + } + // release frame + viddec.ReleaseFrame(pts); + } + auto end_time = std::chrono::high_resolution_clock::now(); + auto time_per_decode + = std::chrono::duration(end_time - start_time).count(); + total_dec_time += time_per_decode; + n_frame += n_frame_returned; + n_pic_decoded += decoded_pics; + if(num_decoded_frames && num_decoded_frames <= static_cast(n_frame)) + { + break; + } + } + while(n_video_bytes); + + n_frame += viddec.GetNumOfFlushedFrames(); + std::cout << "info: Total pictures decoded: " << n_pic_decoded << std::endl; + std::cout << "info: Total frames output/displayed: " << n_frame << std::endl; + if(!dump_output_frames) + { + std::cout << "info: avg decoding time per picture: " << total_dec_time / n_pic_decoded + << " ms" << std::endl; + std::cout << "info: avg decode FPS: " << (n_pic_decoded / total_dec_time) * 1000 + << std::endl; + std::cout << "info: avg output/display time per frame: " << total_dec_time / n_frame + << " ms" << std::endl; + std::cout << "info: avg output/display FPS: " << (n_frame / total_dec_time) * 1000 + << std::endl; + } + else + { + if(mem_type == OUT_SURFACE_MEM_NOT_MAPPED) + { + std::cout << "info: saving frames with -m 3 option is not supported!" << std::endl; + } + else + { + std::cout << "info: saved frames into " << output_file_path << std::endl; + } + } + if(bs_reader) + { + rocDecDestroyBitstreamReader(bs_reader); + } + } + catch(const std::exception& ex) + { + std::cout << ex.what() << std::endl; + exit(1); + } + + return 0; +} diff --git a/Libraries/rocDecode/video_decode_rgb/.gitignore b/Libraries/rocDecode/video_decode_rgb/.gitignore new file mode 100644 index 000000000..1428778fa --- /dev/null +++ b/Libraries/rocDecode/video_decode_rgb/.gitignore @@ -0,0 +1 @@ +rocdecode_video_decode_rgb diff --git a/Libraries/rocDecode/video_decode_rgb/CMakeLists.txt b/Libraries/rocDecode/video_decode_rgb/CMakeLists.txt new file mode 100644 index 000000000..680f051ee --- /dev/null +++ b/Libraries/rocDecode/video_decode_rgb/CMakeLists.txt @@ -0,0 +1,130 @@ +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +set(example_name rocdecode_video_decode_rgb) + +cmake_minimum_required(VERSION 3.21 FATAL_ERROR) +project(${example_name} LANGUAGES CXX) + +include("../../../Common/HipPlatform.cmake") +select_gpu_language() + +enable_language(${ROCM_EXAMPLES_GPU_LANGUAGE}) +set(CMAKE_${ROCM_EXAMPLES_GPU_LANGUAGE}_STANDARD 17) +set(CMAKE_${ROCM_EXAMPLES_GPU_LANGUAGE}_EXTENSIONS OFF) +set(CMAKE_${ROCM_EXAMPLES_GPU_LANGUAGE}_STANDARD_REQUIRED ON) +select_hip_platform() +verify_hip_platform(PLATFORMS "amd") + +if(CMAKE_SYSTEM_NAME MATCHES "Windows") + message(STATUS "rocDecode examples are only available on Linux") + return() +else() + set(ROCM_ROOT + "/opt/rocm" + CACHE PATH + "Root directory of the ROCm installation" + ) +endif() + +list(APPEND CMAKE_PREFIX_PATH "${ROCM_ROOT}") + +find_package(rocdecode REQUIRED) + +# Find FFmpeg libraries +find_library(AVCODEC_LIBRARY avcodec REQUIRED) +find_library(AVFORMAT_LIBRARY avformat REQUIRED) +find_library(AVUTIL_LIBRARY avutil REQUIRED) +find_library(SWRESAMPLE_LIBRARY swresample REQUIRED) + +find_path(AVCODEC_INCLUDE_DIR libavcodec/avcodec.h REQUIRED) +find_path(AVFORMAT_INCLUDE_DIR libavformat/avformat.h REQUIRED) +find_path(AVUTIL_INCLUDE_DIR libavutil/avutil.h REQUIRED) + +# Check FFmpeg version for compatibility using pkg-config (same as Makefile) +execute_process( + COMMAND pkg-config --modversion libavcodec + OUTPUT_VARIABLE AVCODEC_VERSION + OUTPUT_STRIP_TRAILING_WHITESPACE + ERROR_QUIET +) + +# If pkg-config fails, try to get version from header as fallback +if(NOT AVCODEC_VERSION AND AVCODEC_INCLUDE_DIR) + file(STRINGS "${AVCODEC_INCLUDE_DIR}/libavcodec/version.h" AVCODEC_VERSION_MAJOR_LINE + REGEX "^#define LIBAVCODEC_VERSION_MAJOR") + file(STRINGS "${AVCODEC_INCLUDE_DIR}/libavcodec/version.h" AVCODEC_VERSION_MINOR_LINE + REGEX "^#define LIBAVCODEC_VERSION_MINOR") + file(STRINGS "${AVCODEC_INCLUDE_DIR}/libavcodec/version.h" AVCODEC_VERSION_MICRO_LINE + REGEX "^#define LIBAVCODEC_VERSION_MICRO") + + string(REGEX REPLACE "^#define LIBAVCODEC_VERSION_MAJOR[ \t]+([0-9]+).*$" "\\1" AVCODEC_MAJOR "${AVCODEC_VERSION_MAJOR_LINE}") + string(REGEX REPLACE "^#define LIBAVCODEC_VERSION_MINOR[ \t]+([0-9]+).*$" "\\1" AVCODEC_MINOR "${AVCODEC_VERSION_MINOR_LINE}") + string(REGEX REPLACE "^#define LIBAVCODEC_VERSION_MICRO[ \t]+([0-9]+).*$" "\\1" AVCODEC_MICRO "${AVCODEC_VERSION_MICRO_LINE}") + + set(AVCODEC_VERSION "${AVCODEC_MAJOR}.${AVCODEC_MINOR}.${AVCODEC_MICRO}") +endif() + +add_executable(${example_name} + main.cpp + ${ROCM_ROOT}/share/rocdecode/utils/rocvideodecode/roc_video_dec.cpp + ${ROCM_ROOT}/share/rocdecode/utils/colorspace_kernels.cpp + ${ROCM_ROOT}/share/rocdecode/utils/resize_kernels.cpp +) + +target_link_libraries(${example_name} + PRIVATE + rocdecode::rocdecode + ${AVCODEC_LIBRARY} + ${AVFORMAT_LIBRARY} + ${AVUTIL_LIBRARY} + ${SWRESAMPLE_LIBRARY} +) + +target_include_directories( + ${example_name} + PRIVATE + "../../../Common" + "../../../External" + "${ROCM_ROOT}/share/rocdecode/utils" + "${ROCM_ROOT}/share/rocdecode/utils/rocvideodecode" + ${AVCODEC_INCLUDE_DIR} + ${AVFORMAT_INCLUDE_DIR} + ${AVUTIL_INCLUDE_DIR} +) + +# FFMPEG multi-version support +if(AVCODEC_VERSION VERSION_LESS_EQUAL 58.134.100) + target_compile_definitions(${example_name} PRIVATE USE_AVCODEC_GREATER_THAN_58_134=0) +else() + target_compile_definitions(${example_name} PRIVATE USE_AVCODEC_GREATER_THAN_58_134=1) +endif() + +set_source_files_properties( + main.cpp + ${ROCM_ROOT}/share/rocdecode/utils/rocvideodecode/roc_video_dec.cpp + ${ROCM_ROOT}/share/rocdecode/utils/colorspace_kernels.cpp + ${ROCM_ROOT}/share/rocdecode/utils/resize_kernels.cpp + PROPERTIES LANGUAGE ${ROCM_EXAMPLES_GPU_LANGUAGE} +) + +install(TARGETS ${example_name}) diff --git a/Libraries/rocDecode/video_decode_rgb/Makefile b/Libraries/rocDecode/video_decode_rgb/Makefile new file mode 100644 index 000000000..865492bab --- /dev/null +++ b/Libraries/rocDecode/video_decode_rgb/Makefile @@ -0,0 +1,81 @@ +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +EXAMPLE := rocdecode_video_decode_rgb +COMMON_INCLUDE_DIR := ../../../Common +EXTERNAL_DIR := ../../../External +GPU_RUNTIME := HIP + +# HIP variables +ROCM_INSTALL_DIR := /opt/rocm +UTILS_DIR := ${ROCM_INSTALL_DIR}/share/rocdecode/utils + +HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include +ROCDECODE_INCLUDE_DIR := $(HIP_INCLUDE_DIR) + +HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc + +# Common variables and flags +CXX_STD := c++17 +ICXXFLAGS := -std=$(CXX_STD) +ICPPFLAGS := -isystem $(ROCDECODE_INCLUDE_DIR) -I $(COMMON_INCLUDE_DIR) -I $(EXTERNAL_DIR) -I $(UTILS_DIR) -I $(UTILS_DIR)/rocvideodecode +ILDFLAGS := -L $(ROCM_INSTALL_DIR)/lib +ILDLIBS := -lrocdecode -lavcodec -lavformat -lavutil -lswresample + +ifeq ($(GPU_RUNTIME), HIP) + CXXFLAGS ?= -Wall -Wextra + CPPFLAGS += -D__HIP_PLATFORM_AMD__ + + # FFmpeg version detection + AVCODEC_VERSION := $(shell pkg-config --modversion libavcodec 2>/dev/null || echo "0") + AVCODEC_MAJOR := $(shell echo $(AVCODEC_VERSION) | cut -d. -f1) + AVCODEC_MINOR := $(shell echo $(AVCODEC_VERSION) | cut -d. -f2) + AVCODEC_PATCH := $(shell echo $(AVCODEC_VERSION) | cut -d. -f3) + + # Compare version (58.134.100) + ifeq ($(shell test $(AVCODEC_MAJOR) -lt 58 || \ + (test $(AVCODEC_MAJOR) -eq 58 && test $(AVCODEC_MINOR) -lt 134) || \ + (test $(AVCODEC_MAJOR) -eq 58 && test $(AVCODEC_MINOR) -eq 134 && test $(AVCODEC_PATCH) -le 100); echo $$?),0) + CPPFLAGS += -DUSE_AVCODEC_GREATER_THAN_58_134=0 + else + CPPFLAGS += -DUSE_AVCODEC_GREATER_THAN_58_134=1 + endif + + COMPILER := $(HIPCXX) +else + $(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be HIP; CUDA not supported) +endif + +ICXXFLAGS += $(CXXFLAGS) +ICPPFLAGS += $(CPPFLAGS) +ILDFLAGS += $(LDFLAGS) +ILDLIBS += $(LDLIBS) + +SOURCES := main.cpp $(UTILS_DIR)/rocvideodecode/roc_video_dec.cpp $(UTILS_DIR)/colorspace_kernels.cpp $(UTILS_DIR)/resize_kernels.cpp + +$(EXAMPLE): $(SOURCES) $(COMMON_INCLUDE_DIR)/example_utils.hpp $(COMMON_INCLUDE_DIR)/rocdecode_utils.hpp + $(COMPILER) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $(SOURCES) $(ILDLIBS) + +clean: + $(RM) $(EXAMPLE) + +.PHONY: clean diff --git a/Libraries/rocDecode/video_decode_rgb/README.md b/Libraries/rocDecode/video_decode_rgb/README.md new file mode 100644 index 000000000..c279e3447 --- /dev/null +++ b/Libraries/rocDecode/video_decode_rgb/README.md @@ -0,0 +1,134 @@ +# rocDecode Video Decode with Color Space Conversion + +## Description + +This example demonstrates video decoding with hardware-accelerated color space conversion using the rocDecode library and custom HIP kernels. The sample decodes YUV video frames and converts them to RGB/BGR formats using GPU compute, showcasing parallel execution of VCN hardware decoder and compute engine. It supports multiple output formats including RGB24, BGR24, RGBA32, BGRA32, and their 48/64-bit variants, with optional frame resizing. + +## Application Flow + +1. Parse command-line arguments for input file, output format, resize dimensions, and device options. +2. Initialize the FFMPEG video demuxer to extract codec information. +3. Create the video decoder instance with device internal memory type. +4. Verify codec support on the selected GPU device. +5. Create two HIP streams: one for decoding, one for color space conversion. +6. Allocate frame buffers for asynchronous processing. +7. Launch color space conversion thread for parallel post-processing. +8. Loop through video stream: + - Demux video packets from input file. + - Decode frames using hardware decoder. + - Copy decoded frames to intermediate buffers asynchronously. + - Signal color space conversion thread when frames are ready. + - Conversion thread processes frames in parallel: + - Optional resize using HIP kernels. + - Color space conversion from YUV to RGB/BGR. + - Save converted frames to output file. +9. Synchronize threads and display performance metrics. +10. Clean up streams, buffers, and decoder resources. + +## Key APIs and Concepts + +- **Dual-Stream Processing**: Uses separate HIP streams for decode and post-processing: + - Decode stream handles frame decoding and memory copies. + - Color conversion stream handles YUV to RGB transformation. + - Enables parallel execution of VCN decoder and compute engine. + - Maximizes GPU utilization and throughput. + +- **Color Space Conversion Kernels**: Custom HIP kernels for YUV to RGB conversion: + - Supports multiple input formats: NV12, P016, YUV444, YUV444P16. + - Supports multiple output formats: RGB24, BGR24, RGB48, BGR48, RGBA32, BGRA32, RGBA64, BGRA64. + - Implements ITU-R BT.709 color space standard. + - Optimized for AMD GPU architecture. + +- **Resize Kernels**: Optional frame resizing using HIP kernels: + - `ResizeNv12()`: Resize NV12 format frames. + - `ResizeP016()`: Resize P016 format frames. + - Nearest neighbor interpolation for performance. + - Maintains aspect ratio or custom dimensions. + +- **Asynchronous Processing**: Frame buffers enable pipelined execution: + - Decoded frames are copied to intermediate buffers. + - Conversion thread processes frames while decoder continues. + - Condition variables synchronize producer-consumer pattern. + - Minimizes idle time for both decoder and compute. + +- **Output Formats**: + - **24-bit**: RGB24, BGR24 (8-bit per channel, 3 channels). + - **32-bit**: RGBA32, BGRA32 (8-bit per channel, 4 channels with alpha). + - **48-bit**: RGB48, BGR48 (16-bit per channel, 3 channels). + - **64-bit**: RGBA64, BGRA64 (16-bit per channel, 4 channels with alpha). + +- **Performance Optimization**: + - Parallel decode and color conversion. + - Asynchronous memory operations. + - Efficient kernel implementations. + - Optional output to avoid I/O bottlenecks. + +## Demonstrated API Calls + +### rocDecode APIs + +- `rocDecCreateDecoder` +- `rocDecDecodeFrame` +- `rocDecGetVideoFrame` +- `rocDecGetDecodeStatus` +- `rocDecDestroyDecoder` +- `rocDecCreateVideoParser` +- `rocDecParseVideoData` +- `rocDecDestroyVideoParser` +- `rocDecGetErrorName` + +### HIP Runtime APIs + +- `hipSetDevice` +- `hipGetDeviceProperties` +- `hipMalloc` +- `hipFree` +- `hipMemcpy` +- `hipMemcpyDtoH` +- `hipMemcpyDtoDAsync` +- `hipStreamCreate` +- `hipStreamDestroy` +- `hipStreamSynchronize` + +### FFMPEG APIs + +- `avformat_open_input` +- `avformat_find_stream_info` +- `av_find_best_stream` +- `av_read_frame` +- `av_packet_alloc` +- `av_packet_free` +- `av_packet_unref` +- `avformat_close_input` +- `av_bsf_get_by_name` +- `av_bsf_alloc` +- `av_bsf_init` +- `av_bsf_send_packet` +- `av_bsf_receive_packet` +- `av_bsf_free` + +### C++ Standard Library (Threading) + +- `std::thread` +- `std::mutex` +- `std::condition_variable` +- `std::atomic` +- `std::queue` + +### Data Types and Enums + +- `rocDecDecoderHandle` +- `RocdecVideoParser` +- `rocDecVideoCodec` +- `rocDecVideoSurfaceFormat` +- `rocDecVideoChromaFormat` +- `rocDecDecoderCreateInfo` +- `RocdecParserParams` +- `RocdecVideoFormat` +- `RocdecPicParams` +- `RocdecParserDispInfo` +- `rocDecDecodeStatus` +- `hipStream_t` +- `AVCodecID` +- `AVFormatContext` +- `AVPacket` diff --git a/Libraries/rocDecode/video_decode_rgb/main.cpp b/Libraries/rocDecode/video_decode_rgb/main.cpp new file mode 100644 index 000000000..746db6024 --- /dev/null +++ b/Libraries/rocDecode/video_decode_rgb/main.cpp @@ -0,0 +1,551 @@ +/* +Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "md5.h" +#include "roc_video_dec.h" +#include "video_demuxer.h" +#include "video_post_process.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "CmdParser/cmdparser.hpp" +#include "example_utils.hpp" +#include "rocdecode_utils.hpp" + +std::vector st_output_format_name + = {"native", "bgr", "bgr48", "rgb", "rgb48", "bgra", "bgra64", "rgba", "rgba64"}; + +constexpr int frame_buffers_size = 2; +std::mutex mutex; +std::condition_variable cv; +std::queue frame_indices_q; +uint8_t* frame_buffers[frame_buffers_size] = {0}; + +void color_space_conversion_thread(std::atomic& continue_processing, + bool convert_to_rgb, + Dim* p_resize_dim, + OutputSurfaceInfo** surf_info, + OutputSurfaceInfo** res_surf_info, + OutputFormatEnum e_output_format, + uint8_t* p_rgb_dev_mem, + uint8_t* p_resize_dev_mem, + bool dump_output_frames, + std::string& output_file_path, + RocVideoDecoder& viddec, + VideoPostProcess& post_proc, + MD5Generator* md5_gen_handle, + bool b_generate_md5, + int device_id, + hipStream_t hip_stream) +{ + + size_t rgb_image_size, resize_image_size; + hipError_t hip_status = hipSuccess; + int current_frame_index; + uint8_t* frame; + + HIP_CHECK(hipSetDevice(device_id)); + while(continue_processing || !frame_indices_q.empty()) + { + OutputSurfaceInfo* p_surf_info; + uint8_t* out_frame; + { + std::unique_lock lock(mutex); + cv.wait(lock, [&] { return !frame_indices_q.empty() || !continue_processing; }); + if(!continue_processing && frame_indices_q.empty()) + { + break; + } + p_surf_info = *surf_info; + current_frame_index = frame_indices_q.front(); + frame = frame_buffers[current_frame_index]; + out_frame = frame; + } + if(p_resize_dim->w && p_resize_dim->h && *res_surf_info) + { + if(((*surf_info)->output_width != static_cast(p_resize_dim->w)) + || ((*surf_info)->output_height != static_cast(p_resize_dim->h))) + { + resize_image_size = p_resize_dim->w * (p_resize_dim->h + (p_resize_dim->h >> 1)) + * (*surf_info)->bytes_per_pixel; + if(p_resize_dev_mem == nullptr && resize_image_size > 0) + { + hip_status = hipMalloc(&p_resize_dev_mem, resize_image_size); + if(hip_status != hipSuccess) + { + std::cerr << "ERROR: hipMalloc failed to allocate the device memory for " + "the output!" + << hip_status << std::endl; + return; + } + } + if((*surf_info)->bytes_per_pixel == 2) + { + ResizeP016(p_resize_dev_mem, + p_resize_dim->w * 2, + p_resize_dim->w, + p_resize_dim->h, + frame, + (*surf_info)->output_pitch, + (*surf_info)->output_width, + (*surf_info)->output_height, + (frame + (*surf_info)->output_vstride * (*surf_info)->output_pitch), + nullptr, + hip_stream); + } + else + { + ResizeNv12(p_resize_dev_mem, + p_resize_dim->w, + p_resize_dim->w, + p_resize_dim->h, + frame, + (*surf_info)->output_pitch, + (*surf_info)->output_width, + (*surf_info)->output_height, + (frame + (*surf_info)->output_vstride * (*surf_info)->output_pitch), + nullptr, + hip_stream); + } + (*res_surf_info)->output_width = p_resize_dim->w; + (*res_surf_info)->output_height = p_resize_dim->h; + (*res_surf_info)->output_pitch = p_resize_dim->w * (*surf_info)->bytes_per_pixel; + (*res_surf_info)->output_vstride = p_resize_dim->h; + (*res_surf_info)->output_surface_size_in_bytes + = (*res_surf_info)->output_pitch * (p_resize_dim->h + (p_resize_dim->h >> 1)); + (*res_surf_info)->mem_type = OUT_SURFACE_MEM_DEV_COPIED; + p_surf_info = *res_surf_info; + out_frame = p_resize_dev_mem; + } + } + + if(convert_to_rgb) + { + uint32_t rgb_stride = post_proc.GetRgbStride(e_output_format, p_surf_info); + rgb_image_size = p_surf_info->output_height * rgb_stride; + if(p_rgb_dev_mem == nullptr) + { + hip_status = hipMalloc(&p_rgb_dev_mem, rgb_image_size); + if(hip_status != hipSuccess) + { + std::cerr + << "ERROR: hipMalloc failed to allocate the device memory for the output!" + << hip_status << std::endl; + return; + } + } + post_proc.ColorConvertYUV2RGB(out_frame, + p_surf_info, + p_rgb_dev_mem, + e_output_format, + hip_stream); + } + if(dump_output_frames) + { + if(convert_to_rgb) + { + viddec.SaveFrameToFile(output_file_path, + p_rgb_dev_mem, + p_surf_info, + rgb_image_size); + } + else + { + viddec.SaveFrameToFile(output_file_path, out_frame, p_surf_info); + } + } + if(b_generate_md5) + { + if(convert_to_rgb) + { + md5_gen_handle->UpdateMd5ForDataBuffer(p_rgb_dev_mem, rgb_image_size); + } + else + { + md5_gen_handle->UpdateMd5ForFrame(frame, p_surf_info); + } + } + + { + std::unique_lock lock(mutex); + frame_indices_q.pop(); + } + + cv.notify_one(); + } +} + +int main(int argc, char** argv) +{ + // Parse command-line arguments + cli::Parser parser(argc, argv); + parser.set_optional("i", "input", "", "Input file path (required)"); + parser.set_optional("o", + "output", + "", + "Output file path - dumps output if requested"); + parser.set_optional("d", "device", 0, "GPU device ID"); + parser.set_optional( + "of", + "output_format", + "native", + "Output format (native, bgr, bgr48, rgb, rgb48, bgra, bgra64, rgba, rgba64)"); + parser.set_optional("resize", + "resize", + "", + "Resize dimensions WxH (e.g., 1920x1080)"); + parser.set_optional("crop", + "crop", + "", + "Crop rectangle for output (left,top,right,bottom)"); + parser.set_optional("disp_delay", + "disp_delay", + 1, + "Number of frames to be delayed for display"); + parser.set_optional("md5", "md5", false, "Generate MD5 message digest"); + parser.set_optional("md5_check", + "md5_check", + "", + "MD5 file path - generate MD5 and compare to reference"); + parser.run_and_exit_if_error(); + + // Get parameters + std::string input_file_path = parser.get("i"); + std::string output_file_path = parser.get("o"); + int device_id = parser.get("d"); + std::string output_format_str = parser.get("of"); + std::string resize_str = parser.get("resize"); + std::string crop_str = parser.get("crop"); + int disp_delay = parser.get("disp_delay"); + bool b_generate_md5 = parser.get("md5"); + std::string md5_file_path = parser.get("md5_check"); + + if(input_file_path.empty()) + { + std::cerr << "Error: Input file path is required. Use -i option." << std::endl; + return 1; + } + + bool dump_output_frames = !output_file_path.empty(); + bool b_md5_check = !md5_file_path.empty(); + if(b_md5_check) + { + b_generate_md5 = true; + } + bool b_extract_sei_messages = false; + + Rect crop_rect = {}; + Rect* p_crop_rect = nullptr; + if(!crop_str.empty()) + { + if(4 + != sscanf(crop_str.c_str(), + "%d,%d,%d,%d", + &crop_rect.left, + &crop_rect.top, + &crop_rect.right, + &crop_rect.bottom)) + { + std::cerr << "Error: Invalid crop rectangle format. Use: left,top,right,bottom" + << std::endl; + return 1; + } + if((crop_rect.right - crop_rect.left) % 2 == 1 + || (crop_rect.bottom - crop_rect.top) % 2 == 1) + { + std::cout << "output crop rectangle must have width and height of even numbers" + << std::endl; + return 1; + } + p_crop_rect = &crop_rect; + } + + Dim resize_dim = {}; + if(!resize_str.empty()) + { + if(2 != sscanf(resize_str.c_str(), "%dx%d", &resize_dim.w, &resize_dim.h)) + { + std::cerr << "Error: Invalid resize format. Use: WxH (e.g., 1920x1080)" << std::endl; + return 1; + } + if(resize_dim.w % 2 == 1 || resize_dim.h % 2 == 1) + { + std::cout << "Resizing dimensions must have width and height of even numbers" + << std::endl; + return 1; + } + } + + OutputFormatEnum e_output_format = native; + auto it + = std::find(st_output_format_name.begin(), st_output_format_name.end(), output_format_str); + if(it == st_output_format_name.end()) + { + std::cerr << "Error: Invalid output format. Valid options: native, bgr, bgr48, rgb, rgb48, " + "bgra, bgra64, rgba, rgba64" + << std::endl; + return 1; + } + e_output_format = (OutputFormatEnum)(it - st_output_format_name.begin()); + + hipError_t hip_status = hipSuccess; + uint8_t* p_rgb_dev_mem = nullptr; + uint8_t* p_resize_dev_mem = nullptr; + OutputSurfaceMemoryType mem_type = OUT_SURFACE_MEM_DEV_INTERNAL; + int current_frame_index = 0; + hipStream_t hip_stream_dec = 0; + hipStream_t hip_stream_csc = 0; + + try + { + VideoDemuxer demuxer(input_file_path.c_str()); + rocDecVideoCodec rocdec_codec_id = AVCodec2RocDecVideoCodec(demuxer.GetCodecID()); + RocVideoDecoder viddec(device_id, + mem_type, + rocdec_codec_id, + false, + p_crop_rect, + b_extract_sei_messages, + disp_delay); + if(!viddec.CodecSupported(device_id, rocdec_codec_id, demuxer.GetBitDepth())) + { + std::cerr << "GPU doesn't support codec!" << std::endl; + return 0; + } + VideoPostProcess post_process; + MD5Generator* md5_generator = nullptr; + + std::string device_name, gcn_arch_name; + int pci_bus_id, pci_domain_id, pci_device_id; + + viddec.GetDeviceinfo(device_name, gcn_arch_name, pci_bus_id, pci_domain_id, pci_device_id); + std::cout << "info: Using GPU device " << device_id << " " << device_name << "[" + << gcn_arch_name << "] on PCI bus " << std::setfill('0') << std::setw(2) + << std::right << std::hex << pci_bus_id << ":" << std::setfill('0') + << std::setw(2) << std::right << std::hex << pci_domain_id << "." << pci_device_id + << std::dec << std::endl; + std::cout << "info: decoding started, please wait!" << std::endl; + HIP_CHECK(hipStreamCreate(&hip_stream_dec)); + HIP_CHECK(hipStreamCreate(&hip_stream_csc)); + + if(b_generate_md5) + { + md5_generator = new MD5Generator(); + md5_generator->InitMd5(); + } + + int n_video_bytes = 0, n_frames_returned = 0, n_frame = 0; + uint8_t* p_video = nullptr; + uint8_t* p_frame = nullptr; + int64_t pts = 0; + OutputSurfaceInfo* surf_info; + OutputSurfaceInfo* resize_surf_info = nullptr; + double total_dec_time = 0; + bool convert_to_rgb = e_output_format != native; + std::atomic continue_processing(true); + std::thread color_space_conversion_thread_obj(color_space_conversion_thread, + std::ref(continue_processing), + std::ref(convert_to_rgb), + &resize_dim, + &surf_info, + &resize_surf_info, + std::ref(e_output_format), + std::ref(p_rgb_dev_mem), + std::ref(p_resize_dev_mem), + std::ref(dump_output_frames), + std::ref(output_file_path), + std::ref(viddec), + std::ref(post_process), + md5_generator, + b_generate_md5, + device_id, + hip_stream_csc); + + auto start_time = std::chrono::high_resolution_clock::now(); + do + { + demuxer.Demux(&p_video, &n_video_bytes, &pts); + n_frames_returned = viddec.DecodeFrame(p_video, n_video_bytes, 0, pts); + if(!n_frame && !viddec.GetOutputSurfaceInfo(&surf_info)) + { + std::cerr << "Error: Failed to get Output Image Info!" << std::endl; + break; + } + if(resize_dim.w && resize_dim.h && !resize_surf_info) + { + resize_surf_info = new OutputSurfaceInfo; + memcpy(resize_surf_info, surf_info, sizeof(OutputSurfaceInfo)); + } + + for(int i = 0; i < n_frames_returned; i++) + { + p_frame = viddec.GetFrame(&pts); + if(frame_buffers[0] == nullptr) + { + for(int i = 0; i < frame_buffers_size; i++) + { + HIP_CHECK( + hipMalloc(&frame_buffers[i], surf_info->output_surface_size_in_bytes)); + } + } + + { + std::unique_lock lock(mutex); + cv.wait(lock, [&] { return frame_indices_q.size() < frame_buffers_size; }); + HIP_CHECK(hipMemcpyDtoDAsync(frame_buffers[current_frame_index], + p_frame, + surf_info->output_surface_size_in_bytes, + hip_stream_dec)); + HIP_CHECK(hipStreamSynchronize(hip_stream_dec)); + frame_indices_q.push(current_frame_index); + } + + viddec.ReleaseFrame(pts); + current_frame_index = (current_frame_index + 1) % frame_buffers_size; + cv.notify_one(); + } + + n_frame += n_frames_returned; + } + while(n_video_bytes); + + { + std::unique_lock lock(mutex); + continue_processing = false; + } + + cv.notify_one(); + + auto end_time = std::chrono::high_resolution_clock::now(); + auto time_per_frame + = std::chrono::duration(end_time - start_time).count(); + total_dec_time += time_per_frame; + + color_space_conversion_thread_obj.join(); + + if(p_rgb_dev_mem != nullptr) + { + hip_status = hipFree(p_rgb_dev_mem); + if(hip_status != hipSuccess) + { + std::cout << "ERROR: hipFree failed! (" << hip_status << ")" << std::endl; + return -1; + } + } + for(int i = 0; i < frame_buffers_size; i++) + { + hip_status = hipFree(frame_buffers[i]); + if(hip_status != hipSuccess) + { + std::cout << "ERROR: hipFree failed! (" << hip_status << ")" << std::endl; + } + } + if(hip_stream_dec) + { + HIP_CHECK(hipStreamDestroy(hip_stream_dec)); + } + if(hip_stream_csc) + { + HIP_CHECK(hipStreamDestroy(hip_stream_csc)); + } + + std::cout << "info: Total frame decoded: " << n_frame << std::endl; + if(!dump_output_frames) + { + std::string info_message = "info: avg decoding time per frame (ms): "; + if(convert_to_rgb) + { + info_message = "info: avg decoding and post processing time per frame (ms): "; + } + std::cout << info_message << total_dec_time / n_frame << std::endl; + std::cout << "info: avg FPS: " << (n_frame / total_dec_time) * 1000 << std::endl; + } + if(resize_surf_info != nullptr) + { + delete resize_surf_info; + } + if(b_generate_md5) + { + uint8_t* digest; + md5_generator->FinalizeMd5(&digest); + std::cout << "MD5 message digest: "; + for(int i = 0; i < 16; i++) + { + std::cout << std::setfill('0') << std::setw(2) << std::hex + << static_cast(digest[i]); + } + std::cout << std::endl; + if(b_md5_check) + { + std::string ref_md5_string(33, 0); + uint8_t ref_md5[16]; + std::ifstream ref_md5_file(md5_file_path.c_str(), std::ios::in); + if(!ref_md5_file) + { + std::cerr << "Failed to open MD5 file." << std::endl; + return 1; + } + ref_md5_file.getline(ref_md5_string.data(), ref_md5_string.length()); + if(!ref_md5_file) + { + std::cerr << "Failed to read MD5 digest string." << std::endl; + return 1; + } + for(int i = 0; i < 16; i++) + { + std::string part = ref_md5_string.substr(i * 2, 2); + ref_md5[i] = std::stoi(part, nullptr, 16); + } + if(memcmp(digest, ref_md5, 16) == 0) + { + std::cout << "MD5 digest matches the reference MD5 digest: "; + } + else + { + std::cout << "MD5 digest does not match the reference MD5 digest: "; + } + std::cout << ref_md5_string.c_str() << std::endl; + ref_md5_file.close(); + } + delete md5_generator; + } + } + catch(const std::exception& ex) + { + std::cout << ex.what() << std::endl; + return 1; + } + + return 0; +} diff --git a/Libraries/rocDecode/video_to_sequence/.gitignore b/Libraries/rocDecode/video_to_sequence/.gitignore new file mode 100644 index 000000000..3a4e586cc --- /dev/null +++ b/Libraries/rocDecode/video_to_sequence/.gitignore @@ -0,0 +1 @@ +rocdecode_video_to_sequence diff --git a/Libraries/rocDecode/video_to_sequence/CMakeLists.txt b/Libraries/rocDecode/video_to_sequence/CMakeLists.txt new file mode 100644 index 000000000..32a48419a --- /dev/null +++ b/Libraries/rocDecode/video_to_sequence/CMakeLists.txt @@ -0,0 +1,126 @@ +# MIT License +# +# Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +set(example_name rocdecode_video_to_sequence) + +cmake_minimum_required(VERSION 3.21 FATAL_ERROR) +project(${example_name} LANGUAGES CXX) + +include("../../../Common/HipPlatform.cmake") +select_gpu_language() + +enable_language(${ROCM_EXAMPLES_GPU_LANGUAGE}) +set(CMAKE_${ROCM_EXAMPLES_GPU_LANGUAGE}_STANDARD 17) +set(CMAKE_${ROCM_EXAMPLES_GPU_LANGUAGE}_EXTENSIONS OFF) +set(CMAKE_${ROCM_EXAMPLES_GPU_LANGUAGE}_STANDARD_REQUIRED ON) +select_hip_platform() +verify_hip_platform(PLATFORMS "amd") + +if(CMAKE_SYSTEM_NAME MATCHES "Windows") + message(STATUS "rocDecode examples are only available on Linux") + return() +else() + set(ROCM_ROOT + "/opt/rocm" + CACHE PATH + "Root directory of the ROCm installation" + ) +endif() + +list(APPEND CMAKE_PREFIX_PATH "${ROCM_ROOT}") + +find_package(rocdecode REQUIRED) + +# Find FFmpeg libraries +find_library(AVCODEC_LIBRARY avcodec REQUIRED) +find_library(AVFORMAT_LIBRARY avformat REQUIRED) +find_library(AVUTIL_LIBRARY avutil REQUIRED) +find_library(SWRESAMPLE_LIBRARY swresample REQUIRED) + +find_path(AVCODEC_INCLUDE_DIR libavcodec/avcodec.h REQUIRED) +find_path(AVFORMAT_INCLUDE_DIR libavformat/avformat.h REQUIRED) +find_path(AVUTIL_INCLUDE_DIR libavutil/avutil.h REQUIRED) + +# Check FFmpeg version for compatibility using pkg-config (same as Makefile) +execute_process( + COMMAND pkg-config --modversion libavcodec + OUTPUT_VARIABLE AVCODEC_VERSION + OUTPUT_STRIP_TRAILING_WHITESPACE + ERROR_QUIET +) + +# If pkg-config fails, try to get version from header as fallback +if(NOT AVCODEC_VERSION AND AVCODEC_INCLUDE_DIR) + file(STRINGS "${AVCODEC_INCLUDE_DIR}/libavcodec/version.h" AVCODEC_VERSION_MAJOR_LINE + REGEX "^#define LIBAVCODEC_VERSION_MAJOR") + file(STRINGS "${AVCODEC_INCLUDE_DIR}/libavcodec/version.h" AVCODEC_VERSION_MINOR_LINE + REGEX "^#define LIBAVCODEC_VERSION_MINOR") + file(STRINGS "${AVCODEC_INCLUDE_DIR}/libavcodec/version.h" AVCODEC_VERSION_MICRO_LINE + REGEX "^#define LIBAVCODEC_VERSION_MICRO") + + string(REGEX REPLACE "^#define LIBAVCODEC_VERSION_MAJOR[ \t]+([0-9]+).*$" "\\1" AVCODEC_MAJOR "${AVCODEC_VERSION_MAJOR_LINE}") + string(REGEX REPLACE "^#define LIBAVCODEC_VERSION_MINOR[ \t]+([0-9]+).*$" "\\1" AVCODEC_MINOR "${AVCODEC_VERSION_MINOR_LINE}") + string(REGEX REPLACE "^#define LIBAVCODEC_VERSION_MICRO[ \t]+([0-9]+).*$" "\\1" AVCODEC_MICRO "${AVCODEC_VERSION_MICRO_LINE}") + + set(AVCODEC_VERSION "${AVCODEC_MAJOR}.${AVCODEC_MINOR}.${AVCODEC_MICRO}") +endif() + +add_executable(${example_name} + main.cpp + ${ROCM_ROOT}/share/rocdecode/utils/rocvideodecode/roc_video_dec.cpp +) + +target_link_libraries(${example_name} + PRIVATE + rocdecode::rocdecode + ${AVCODEC_LIBRARY} + ${AVFORMAT_LIBRARY} + ${AVUTIL_LIBRARY} + ${SWRESAMPLE_LIBRARY} +) + +target_include_directories( + ${example_name} + PRIVATE + "../../../Common" + "../../../External" + "${ROCM_ROOT}/share/rocdecode/utils" + "${ROCM_ROOT}/share/rocdecode/utils/rocvideodecode" + ${AVCODEC_INCLUDE_DIR} + ${AVFORMAT_INCLUDE_DIR} + ${AVUTIL_INCLUDE_DIR} +) + +# FFMPEG multi-version support +if(AVCODEC_VERSION VERSION_LESS_EQUAL 58.134.100) + target_compile_definitions(${example_name} PRIVATE USE_AVCODEC_GREATER_THAN_58_134=0) +else() + target_compile_definitions(${example_name} PRIVATE USE_AVCODEC_GREATER_THAN_58_134=1) +endif() + +set_source_files_properties( + main.cpp + ${ROCM_ROOT}/share/rocdecode/utils/rocvideodecode/roc_video_dec.cpp + PROPERTIES LANGUAGE ${ROCM_EXAMPLES_GPU_LANGUAGE} +) + +install(TARGETS ${example_name}) diff --git a/Libraries/rocDecode/video_to_sequence/Makefile b/Libraries/rocDecode/video_to_sequence/Makefile new file mode 100644 index 000000000..dadab9b3e --- /dev/null +++ b/Libraries/rocDecode/video_to_sequence/Makefile @@ -0,0 +1,81 @@ +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +EXAMPLE := rocdecode_video_to_sequence +COMMON_INCLUDE_DIR := ../../../Common +EXTERNAL_DIR := ../../../External +GPU_RUNTIME := HIP + +# HIP variables +ROCM_INSTALL_DIR := /opt/rocm +UTILS_DIR := ${ROCM_INSTALL_DIR}/share/rocdecode/utils + +HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include +ROCDECODE_INCLUDE_DIR := $(HIP_INCLUDE_DIR) + +HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc + +# Common variables and flags +CXX_STD := c++17 +ICXXFLAGS := -std=$(CXX_STD) +ICPPFLAGS := -isystem $(ROCDECODE_INCLUDE_DIR) -I $(COMMON_INCLUDE_DIR) -I $(EXTERNAL_DIR) -I $(UTILS_DIR) -I $(UTILS_DIR)/rocvideodecode +ILDFLAGS := -L $(ROCM_INSTALL_DIR)/lib +ILDLIBS := -lrocdecode -lavcodec -lavformat -lavutil -lswresample + +ifeq ($(GPU_RUNTIME), HIP) + CXXFLAGS ?= -Wall -Wextra + CPPFLAGS += -D__HIP_PLATFORM_AMD__ + + # FFmpeg version detection + AVCODEC_VERSION := $(shell pkg-config --modversion libavcodec 2>/dev/null || echo "0") + AVCODEC_MAJOR := $(shell echo $(AVCODEC_VERSION) | cut -d. -f1) + AVCODEC_MINOR := $(shell echo $(AVCODEC_VERSION) | cut -d. -f2) + AVCODEC_PATCH := $(shell echo $(AVCODEC_VERSION) | cut -d. -f3) + + # Compare version (58.134.100) + ifeq ($(shell test $(AVCODEC_MAJOR) -lt 58 || \ + (test $(AVCODEC_MAJOR) -eq 58 && test $(AVCODEC_MINOR) -lt 134) || \ + (test $(AVCODEC_MAJOR) -eq 58 && test $(AVCODEC_MINOR) -eq 134 && test $(AVCODEC_PATCH) -le 100); echo $$?),0) + CPPFLAGS += -DUSE_AVCODEC_GREATER_THAN_58_134=0 + else + CPPFLAGS += -DUSE_AVCODEC_GREATER_THAN_58_134=1 + endif + + COMPILER := $(HIPCXX) +else + $(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be HIP; CUDA not supported) +endif + +ICXXFLAGS += $(CXXFLAGS) +ICPPFLAGS += $(CPPFLAGS) +ILDFLAGS += $(LDFLAGS) +ILDLIBS += $(LDLIBS) + +SOURCES := main.cpp $(UTILS_DIR)/rocvideodecode/roc_video_dec.cpp + +$(EXAMPLE): $(SOURCES) $(COMMON_INCLUDE_DIR)/example_utils.hpp $(COMMON_INCLUDE_DIR)/rocdecode_utils.hpp + $(COMPILER) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $(SOURCES) $(ILDLIBS) + +clean: + $(RM) $(EXAMPLE) + +.PHONY: clean diff --git a/Libraries/rocDecode/video_to_sequence/README.md b/Libraries/rocDecode/video_to_sequence/README.md new file mode 100644 index 000000000..c55c7e646 --- /dev/null +++ b/Libraries/rocDecode/video_to_sequence/README.md @@ -0,0 +1,117 @@ +# rocDecode Video to Frame Sequence + +## Description + +This example demonstrates extracting a complete frame sequence from a video file using the rocDecode library. The sample decodes all frames from a video and saves them as a continuous sequence, useful for creating training datasets, video analysis, or frame-by-frame processing workflows. It provides options for output format selection and frame numbering. + +## Application Flow + +1. Parse command-line arguments for input file, output directory, device ID, and sequence options. +2. Initialize the FFMPEG video demuxer to extract codec information. +3. Create the video decoder instance with the specified codec and device. +4. Verify codec support on the selected GPU device. +5. Create output directory structure for frame sequence. +6. Loop through entire video stream: + - Demux video packets from input file. + - Decode frames using hardware acceleration. + - Retrieve each decoded frame in sequence. + - Save frame with sequential numbering (e.g., 0000.yuv, 0001.yuv, 0002.yuv). + - Maintain frame order for sequence integrity. +7. Display total number of frames in sequence. +8. Clean up decoder and demuxer resources. + +## Key APIs and Concepts + +- **Sequential Frame Extraction**: Processes and saves all frames in order: + - Maintains decode order for frame sequence. + - Sequential file naming for easy processing. + - Complete video coverage without frame skipping. + - Preserves temporal relationships between frames. + +- **Frame Sequence Output**: Organized frame storage: + - Zero-padded frame numbers for proper sorting. + - Consistent naming convention across sequence. + - Raw YUV format preserves decode quality. + - Directory structure for organized storage. + +- **Decoder Configuration**: Standard setup optimized for sequential processing: + - `rocDecCreateDecoder()`: Initializes decoder with codec parameters. + - Output surface memory configured for efficient frame retrieval. + - Display delay set to maintain frame order. + +- **Frame Management**: Efficient processing of frame sequence: + - `rocDecGetVideoFrame()`: Retrieves frames in decode order. + - Frames are saved immediately after retrieval. + - Memory is released promptly to maintain decoder surface pool. + - Continuous processing without frame buffering. + +- **Use Cases**: + - Creating training datasets for machine learning. + - Video analysis requiring frame-by-frame access. + - Extracting complete frame sequences for processing pipelines. + - Quality analysis and comparison workflows. + - Preparing data for computer vision applications. + - Video editing and post-production workflows. + +- **Output Organization**: + - Frames saved in specified output directory. + - Sequential numbering starting from 0000. + - Raw YUV format (NV12, P016, YUV444, etc.). + - Metadata preserved for reconstruction. + +## Demonstrated API Calls + +### rocDecode APIs + +- `rocDecCreateDecoder` +- `rocDecDecodeFrame` +- `rocDecGetVideoFrame` +- `rocDecGetDecodeStatus` +- `rocDecDestroyDecoder` +- `rocDecCreateVideoParser` +- `rocDecParseVideoData` +- `rocDecDestroyVideoParser` +- `rocDecGetErrorName` + +### HIP Runtime APIs + +- `hipSetDevice` +- `hipGetDeviceProperties` +- `hipMalloc` +- `hipFree` +- `hipMemcpy` +- `hipMemcpyDtoH` + +### FFMPEG APIs + +- `avformat_open_input` +- `avformat_find_stream_info` +- `av_find_best_stream` +- `av_read_frame` +- `av_packet_alloc` +- `av_packet_free` +- `av_packet_unref` +- `avformat_close_input` +- `av_bsf_get_by_name` +- `av_bsf_alloc` +- `av_bsf_init` +- `av_bsf_send_packet` +- `av_bsf_receive_packet` +- `av_bsf_free` + +### Data Types and Enums + +- `rocDecDecoderHandle` +- `RocdecVideoParser` +- `rocDecVideoCodec` +- `rocDecVideoSurfaceFormat` +- `rocDecVideoChromaFormat` +- `rocDecDecoderCreateInfo` +- `RocdecParserParams` +- `RocdecVideoFormat` +- `RocdecPicParams` +- `RocdecParserDispInfo` +- `rocDecDecodeStatus` +- `AVCodecID` +- `AVFormatContext` +- `AVPacket` diff --git a/Libraries/rocDecode/video_to_sequence/main.cpp b/Libraries/rocDecode/video_to_sequence/main.cpp new file mode 100644 index 000000000..b5fed3097 --- /dev/null +++ b/Libraries/rocDecode/video_to_sequence/main.cpp @@ -0,0 +1,547 @@ +/* +Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if __cplusplus >= 201703L && __has_include() + #include +#else + #include +#endif + +#include "roc_video_dec.h" +#include "video_demuxer.h" + +#include "CmdParser/cmdparser.hpp" +#include "example_utils.hpp" +#include "rocdecode_utils.hpp" + +class ThreadPool +{ +public: + ThreadPool(int nthreads) : shutdown_(false) + { + threads_.reserve(nthreads); + for(int i = 0; i < nthreads; ++i) + { + threads_.emplace_back(std::bind(&ThreadPool::thread_entry, this, i)); + } + } + + ~ThreadPool() {} + + void join_threads() + { + { + std::unique_lock lock(mutex_); + shutdown_ = true; + cond_var_.notify_all(); + } + + for(auto& thread : threads_) + { + thread.join(); + } + } + + void execute_job(std::function func) + { + std::unique_lock lock(mutex_); + decode_jobs_queue_.emplace(std::move(func)); + cond_var_.notify_one(); + } + +protected: + void thread_entry(int /*i*/) + { + std::function execute_decode_job; + + while(true) + { + { + std::unique_lock lock(mutex_); + cond_var_.wait(lock, [&] { return shutdown_ || !decode_jobs_queue_.empty(); }); + if(decode_jobs_queue_.empty()) + { + return; + } + + execute_decode_job = std::move(decode_jobs_queue_.front()); + decode_jobs_queue_.pop(); + } + + execute_decode_job(); + } + } + + std::mutex mutex_; + std::condition_variable cond_var_; + bool shutdown_; + std::queue> decode_jobs_queue_; + std::vector threads_; +}; + +struct DecoderInfo +{ + int dec_device_id; + std::unique_ptr viddec; + std::uint32_t bit_depth; + rocDecVideoCodec rocdec_codec_id; + std::atomic_bool decoding_complete; + + DecoderInfo() : dec_device_id(0), viddec(nullptr), bit_depth(8), decoding_complete(false) {} +}; + +struct SeqInfo +{ + int batch_size; + int seq_length; + int step; + int stride; +}; + +void dec_proc(RocVideoDecoder* p_dec, + VideoDemuxer* demuxer, + int* pn_frame, + double* pn_fps, + std::atomic_bool& decoding_complete, + int& seek_mode, + bool& b_dump_output_frames, + SeqInfo& seq_info, + std::string* p_output_file_name, + OutputSurfaceMemoryType mem_type) +{ + int n_video_bytes = 0, n_frame_returned = 0; + int64_t n_frame = 0; + uint8_t * p_video = nullptr, *p_frame = nullptr; + int64_t pts = 0; + double total_dec_time = 0.0; + OutputSurfaceInfo* surf_info; + VideoSeekContext video_seek_ctx; + std::vector seq_frame_start(seq_info.batch_size); + seq_frame_start[0] = 0; + for(int i = 1; i < seq_info.batch_size; i++) + { + seq_frame_start[i] + = seq_frame_start[i - 1] + (seq_info.seq_length - 1) * seq_info.stride + seq_info.step; + } + auto start_time = std::chrono::high_resolution_clock::now(); + int n_frame_seq = 0, num_seq = 0; + int next_frame_num = 0; + bool seq_start = true; + std::string seq_output_file_name = p_output_file_name[num_seq]; + + ReconfigParams reconfig_params = {}; + reconfig_dump_file_struct reconfig_user_struct = {}; + reconfig_params.p_fn_reconfigure_flush = reconfigure_flush_callback; + reconfig_user_struct.b_dump_frames_to_file = false; + reconfig_params.reconfig_flush_mode = RECONFIG_FLUSH_MODE_NONE; + reconfig_params.p_reconfig_user_struct = &reconfig_user_struct; + p_dec->SetReconfigParams(&reconfig_params, true); + + do + { + if(seek_mode && seq_start) + { + video_seek_ctx.seek_frame_ = seq_frame_start[num_seq]; + video_seek_ctx.seek_crit_ = SEEK_CRITERIA_FRAME_NUM; + video_seek_ctx.seek_mode_ = SEEK_MODE_PREV_KEY_FRAME; + demuxer->Seek(video_seek_ctx, &p_video, &n_video_bytes); + pts = video_seek_ctx.out_frame_pts_; + n_frame = static_cast(pts * demuxer->GetFrameRate()); + seq_start = false; + p_dec->FlushAndReconfigure(); + } + else + { + demuxer->Demux(&p_video, &n_video_bytes, &pts); + } + n_frame_returned = p_dec->DecodeFrame(p_video, n_video_bytes, 0, pts); + if(b_dump_output_frames && mem_type != OUT_SURFACE_MEM_NOT_MAPPED) + { + if(!n_frame && !p_dec->GetOutputSurfaceInfo(&surf_info)) + { + std::cerr << "Error: Failed to get Output Surface Info!" << std::endl; + break; + } + for(int i = 0; i < n_frame_returned; i++) + { + if((n_frame + i) == next_frame_num) + { + p_frame = p_dec->GetFrame(&pts); + if(n_frame_seq < seq_info.seq_length) + { + p_dec->SaveFrameToFile(seq_output_file_name, p_frame, surf_info); + n_frame_seq++; + } + p_dec->ReleaseFrame(pts); + next_frame_num += seq_info.stride; + } + else + { + p_frame = p_dec->GetFrame(&pts); + p_dec->ReleaseFrame(pts); + } + } + } + n_frame += n_frame_returned; + if(n_frame_seq >= seq_info.seq_length) + { + n_frame_seq = 0; + seq_start = true; + num_seq++; + if(num_seq < seq_info.batch_size) + { + next_frame_num = seq_frame_start[num_seq]; + seq_output_file_name = p_output_file_name[num_seq]; + } + p_dec->ResetSaveFrameToFile(); + n_frame_returned = p_dec->DecodeFrame(nullptr, 0, ROCDEC_PKT_ENDOFSTREAM, -1); + } + } + while(n_video_bytes && num_seq < seq_info.batch_size); + + auto end_time = std::chrono::high_resolution_clock::now(); + auto time_per_decode = std::chrono::duration(end_time - start_time).count(); + + total_dec_time = time_per_decode; + double average_decoding_time = total_dec_time / n_frame; + double n_fps = 1000 / average_decoding_time; + *pn_fps = n_fps; + *pn_frame = n_frame; + p_dec->ResetSaveFrameToFile(); + decoding_complete = true; +} + +int main(int argc, char** argv) +{ + cli::Parser parser(argc, argv); + parser.set_optional("i", "input", "", "Input folder path (required)"); + parser.set_optional("o", "output", "", "Output folder to dump sequences"); + parser.set_optional("d", "device", 0, "GPU device ID"); + parser.set_optional("t", "threads", 1, "Number of threads (1-64)"); + parser.set_optional("b", "batch_size", 4, "Number of sequences to be decoded"); + parser.set_optional("l", "seq_length", 4, "Number of frames in each sequence"); + parser.set_optional("step", "step", 1, "Frame interval between each sequence"); + parser.set_optional("stride", + "stride", + 1, + "Distance between consecutive frames in a sequence"); + parser.set_optional("seek_mode", + "seek_mode", + 0, + "Seeking option (0: no seek, 1: seek to prev key frame)"); + parser.set_optional("m", + "memory_type", + 0, + "Output surface memory type [0: DEV_INTERNAL, 1: DEV_COPIED, 2: " + "HOST_COPIED, 3: NOT_MAPPED]"); + parser.set_optional("disp_delay", + "display_delay", + 1, + "Number of frames to be delayed for display"); + parser.run_and_exit_if_error(); + + std::string input_folder_path = parser.get("i"); + if(input_folder_path.empty()) + { + std::cerr << "Error: Input folder path is required (-i option)" << std::endl; + return 1; + } + + std::string output_folder_path = parser.get("o"); + bool b_dump_output_frames = false; + if(!output_folder_path.empty()) + { +#if __cplusplus >= 201703L && __has_include() + if(std::filesystem::is_directory(output_folder_path)) + { + std::filesystem::remove_all(output_folder_path); + } + std::filesystem::create_directory(output_folder_path); +#else + if(std::experimental::filesystem::is_directory(output_folder_path)) + { + std::experimental::filesystem::remove_all(output_folder_path); + } + std::experimental::filesystem::create_directory(output_folder_path); +#endif + b_dump_output_frames = true; + } + + int device_id = parser.get("d"); + int n_threads = parser.get("t"); + if(n_threads <= 0 || n_threads > 64) + { + std::cerr << "Error: Number of threads must be between 1 and 64" << std::endl; + return 1; + } + + SeqInfo seq_info; + seq_info.batch_size = parser.get("b"); + seq_info.seq_length = parser.get("l"); + seq_info.step = parser.get("step"); + seq_info.stride = parser.get("stride"); + int seek_mode = parser.get("seek_mode"); + OutputSurfaceMemoryType mem_type = static_cast(parser.get("m")); + int disp_delay = parser.get("disp_delay"); + + bool b_extract_sei_messages = false; + Rect* p_crop_rect = nullptr; + std::vector input_file_names; + int num_files = 0; + + try + { +#if __cplusplus >= 201703L && __has_include() + for(const auto& entry : std::filesystem::directory_iterator(input_folder_path)) + { +#else + for(const auto& entry : + std::experimental::filesystem::directory_iterator(input_folder_path)) + { +#endif + input_file_names.push_back(entry.path()); + num_files++; + } + n_threads = ((n_threads > num_files) ? num_files : n_threads); + std::vector output_seq_file_names; + output_seq_file_names.resize(seq_info.batch_size * num_files); + int num_devices = 0, sd = 0; + hipError_t hip_status = hipSuccess; + hipDeviceProp_t hip_dev_prop; + std::string gcn_arch_name; + if(hipGetDeviceCount(&num_devices) != hipSuccess) + { + std::cout << "ERROR: hipGetDeviceCount failed! (" << hip_status << ")" << std::endl; + return -1; + } + if(num_devices < 1) + { + std::cerr << "ERROR: didn't find any GPU!" << std::endl; + return -1; + } + + if(hipSuccess != hipGetDeviceProperties(&hip_dev_prop, device_id)) + { + std::cerr << "ERROR: hipGetDeviceProperties for device (" << device_id << ") failed!" + << std::endl; + return -1; + } + + gcn_arch_name = hip_dev_prop.gcnArchName; + std::size_t pos = gcn_arch_name.find_first_of(":"); + std::string gcn_arch_name_base + = (pos != std::string::npos) ? gcn_arch_name.substr(0, pos) : gcn_arch_name; + + if(!gcn_arch_name_base.compare("gfx90a") && num_devices > 1) + { + sd = 1; + } + + std::string device_name; + int pci_bus_id, pci_domain_id, pci_device_id; + double total_fps = 0; + int n_total = 0; + std::vector v_fps; + std::vector v_frame; + v_fps.resize(num_files, 0); + v_frame.resize(num_files, 0); + int hip_vis_dev_count = 0; + get_env_var("HIP_VISIBLE_DEVICES", hip_vis_dev_count); + + std::vector> v_demuxer; + std::vector> v_dec_info; + ThreadPool thread_pool(n_threads); + std::mutex mutex; + + for(int i = 0; i < num_files; i++) + { + v_demuxer.push_back(std::make_unique(input_file_names[i].c_str())); + std::size_t found_file = input_file_names[i].find_last_of('/'); + input_file_names[i] = input_file_names[i].substr(found_file + 1); + if(b_dump_output_frames) + { + std::size_t found_ext = input_file_names[i].find_last_of('.'); + std::string path + = output_folder_path + "/output_" + input_file_names[i].substr(0, found_ext); + for(int n = 0; n < seq_info.batch_size; n++) + { + output_seq_file_names[i * seq_info.batch_size + n] + = path + "_seq_" + std::to_string(n) + ".yuv"; + } + } + } + + for(int i = 0; i < n_threads; i++) + { + v_dec_info.emplace_back(std::make_unique()); + if(!hip_vis_dev_count) + { + if(device_id % 2 == 0) + { + v_dec_info[i]->dec_device_id = (i % 2 == 0) ? device_id : device_id + sd; + } + else + { + v_dec_info[i]->dec_device_id = (i % 2 == 0) ? device_id - sd : device_id; + } + } + else + { + v_dec_info[i]->dec_device_id = i % hip_vis_dev_count; + } + v_dec_info[i]->rocdec_codec_id = AVCodec2RocDecVideoCodec(v_demuxer[i]->GetCodecID()); + v_dec_info[i]->bit_depth = v_demuxer[i]->GetBitDepth(); + v_dec_info[i]->viddec + = std::make_unique(v_dec_info[i]->dec_device_id, + mem_type, + v_dec_info[i]->rocdec_codec_id, + false, + p_crop_rect, + b_extract_sei_messages, + disp_delay); + v_dec_info[i]->viddec->GetDeviceinfo(device_name, + gcn_arch_name, + pci_bus_id, + pci_domain_id, + pci_device_id); + std::cout << "info: decoding " << input_file_names[i] << " using GPU device " + << v_dec_info[i]->dec_device_id << " - " << device_name << "[" + << gcn_arch_name << "] on PCI bus " << std::setfill('0') << std::setw(2) + << std::right << std::hex << pci_bus_id << ":" << std::setfill('0') + << std::setw(2) << std::right << std::hex << pci_domain_id << "." + << pci_device_id << std::dec << std::endl; + } + + for(int j = 0; j < num_files; j++) + { + int thread_idx = j % n_threads; + if(j >= n_threads) + { + { + std::unique_lock lock(mutex); + while(!v_dec_info[thread_idx]->decoding_complete) + { + sleep(1); + } + v_dec_info[thread_idx]->decoding_complete = false; + } + rocDecVideoCodec codec_id = AVCodec2RocDecVideoCodec(v_demuxer[j]->GetCodecID()); + (v_dec_info[thread_idx]->viddec).release(); + v_dec_info[thread_idx]->viddec + = std::make_unique(v_dec_info[thread_idx]->dec_device_id, + mem_type, + codec_id, + false, + p_crop_rect, + b_extract_sei_messages, + disp_delay); + v_dec_info[thread_idx]->viddec->GetDeviceinfo(device_name, + gcn_arch_name, + pci_bus_id, + pci_domain_id, + pci_device_id); + std::cout << "info: decoding " << input_file_names[j] << " using GPU device " + << v_dec_info[thread_idx]->dec_device_id << " - " << device_name << "[" + << gcn_arch_name << "] on PCI bus " << std::setfill('0') << std::setw(2) + << std::right << std::hex << pci_bus_id << ":" << std::setfill('0') + << std::setw(2) << std::right << std::hex << pci_domain_id << "." + << pci_device_id << std::dec << std::endl; + } + if(!v_dec_info[thread_idx]->viddec->CodecSupported( + v_dec_info[thread_idx]->dec_device_id, + v_dec_info[thread_idx]->rocdec_codec_id, + v_dec_info[thread_idx]->bit_depth)) + { + std::cerr << "Codec not supported on GPU, skipping this file!" << std::endl; + continue; + } + thread_pool.execute_job(std::bind(dec_proc, + v_dec_info[thread_idx]->viddec.get(), + v_demuxer[j].get(), + &v_frame[j], + &v_fps[j], + std::ref(v_dec_info[thread_idx]->decoding_complete), + seek_mode, + b_dump_output_frames, + seq_info, + &output_seq_file_names[j * seq_info.batch_size], + mem_type)); + } + + thread_pool.join_threads(); + for(int i = 0; i < num_files; i++) + { + total_fps += v_fps[i] * static_cast(n_threads) / static_cast(num_files); + n_total += v_frame[i]; + } + if(!b_dump_output_frames) + { + std::cout << "info: Total frame decoded: " << n_total << std::endl; + std::cout << "info: avg decoding time per frame: " << 1000 / total_fps << " ms" + << std::endl; + std::cout << "info: avg FPS: " << total_fps << std::endl; + } + else + { + if(mem_type == OUT_SURFACE_MEM_NOT_MAPPED) + { + std::cout << "info: saving frames with -m 3 option is not supported!" << std::endl; + } + else + { + for(int i = 0; i < num_files; i++) + { + for(int n = 0; n < seq_info.batch_size; n++) + { + std::cout << "info: saved frames into " + << output_seq_file_names[i * seq_info.batch_size + n] + << std::endl; + } + } + } + } + } + catch(const std::exception& ex) + { + std::cout << ex.what() << std::endl; + exit(1); + } + + return 0; +} diff --git a/README.md b/README.md index 096c82fdb..34f251a75 100644 --- a/README.md +++ b/README.md @@ -444,6 +444,17 @@ The following options are available when building with CMake. - [gamma_contrast](https://github.com/ROCm/rocm-examples/tree/amd-staging/Libraries/rocCV/gamma_contrast/): Showcases non-linear brightness and contrast adjustment using gamma transformation formulas. - [normalize](https://github.com/ROCm/rocm-examples/tree/amd-staging/Libraries/rocCV/normalize/): Showcases per-channel scaling and shifting operations for image normalization in machine learning pipelines. - [warp_perspective](https://github.com/ROCm/rocm-examples/tree/amd-staging/Libraries/rocCV/warp_perspective/): Showcases 3D perspective transformations using transformation matrices for geometric image warping. + - [rocDecode](https://github.com/ROCm/rocm-examples/tree/amd-staging/Libraries/rocDecode/) + - [rocdec_decode](https://github.com/ROCm/rocm-examples/tree/amd-staging/Libraries/rocDecode/rocdec_decode/): Demonstrates low-level rocDecode APIs for hardware-accelerated video decoding with direct control over decoder initialization, frame decoding, and output retrieval. + - [video_decode](https://github.com/ROCm/rocm-examples/tree/amd-staging/Libraries/rocDecode/video_decode/): Illustrates standard video decoding workflow using FFMPEG demuxer and hardware-accelerated decoder to obtain decoded frames in YUV format. + - [video_decode_batch](https://github.com/ROCm/rocm-examples/tree/amd-staging/Libraries/rocDecode/video_decode_batch/): Demonstrates batch video decoding using multiple threads to decode multiple video files concurrently, maximizing GPU utilization and throughput. + - [video_decode_mem](https://github.com/ROCm/rocm-examples/tree/amd-staging/Libraries/rocDecode/video_decode_mem/): Shows memory-based video decoding with a custom stream provider, useful for scenarios where video data comes from network streams, memory buffers, or other non-file sources. + - [video_decode_multi_files](https://github.com/ROCm/rocm-examples/tree/amd-staging/Libraries/rocDecode/video_decode_multi_files/): Showcases decoder reconfiguration capability by decoding multiple video files with different resolutions using a single decoder instance. + - [video_decode_perf](https://github.com/ROCm/rocm-examples/tree/amd-staging/Libraries/rocDecode/video_decode_perf/): Performance testing and benchmarking by decoding the same video file multiple times in parallel to measure maximum decode throughput and GPU utilization. + - [video_decode_pic_files](https://github.com/ROCm/rocm-examples/tree/amd-staging/Libraries/rocDecode/video_decode_pic_files/): Demonstrates extracting individual decoded frames from a video file and saving them as separate picture files for frame-by-frame analysis. + - [video_decode_raw](https://github.com/ROCm/rocm-examples/tree/amd-staging/Libraries/rocDecode/video_decode_raw/): Shows video decoding using rocDecode's built-in bitstream reader instead of FFMPEG demuxer, useful for raw elementary streams. + - [video_decode_rgb](https://github.com/ROCm/rocm-examples/tree/amd-staging/Libraries/rocDecode/video_decode_rgb/): Demonstrates video decoding with hardware-accelerated color space conversion using custom HIP kernels to convert YUV frames to RGB/BGR formats. + - [video_to_sequence](https://github.com/ROCm/rocm-examples/tree/amd-staging/Libraries/rocDecode/video_to_sequence/): Extracts a complete frame sequence from a video file, useful for creating training datasets, video analysis, or frame-by-frame processing workflows. - [rocJPEG](https://github.com/ROCm/rocm-examples/tree/amd-staging/Libraries/rocJPEG/) - [rocjpeg_decode](https://github.com/ROCm/rocm-examples/tree/amd-staging/Libraries/rocJPEG/rocjpeg_decode): Program that showcases decoding of JPEG images. - [rocjpeg_decode_batched](https://github.com/ROCm/rocm-examples/tree/amd-staging/Libraries/rocJPEG/rocjpeg_decode_batched): Program that showcases decoding a batch of JPEG images.