diff --git a/configure b/configure index b6616f00b6..94c8161b91 100755 --- a/configure +++ b/configure @@ -205,6 +205,7 @@ External library support: --disable-bzlib disable bzlib [autodetect] --disable-coreimage disable Apple CoreImage framework [autodetect] --enable-chromaprint enable audio fingerprinting with chromaprint [no] + --disable-epoxy disable epoxy [autodetect] --enable-frei0r enable frei0r video filtering [no] --enable-gcrypt enable gcrypt, needed for rtmp(t)e support if openssl, librtmp or gmp is not used [no] @@ -281,6 +282,7 @@ External library support: if openssl, gnutls or mbedtls is not used [no] --enable-libtwolame enable MP2 encoding via libtwolame [no] --enable-libuavs3d enable AVS3 decoding via libuavs3d [no] + --disable-libudev disable libudev [autodetect] --enable-libv4l2 enable libv4l2/v4l-utils [no] --enable-libvidstab enable video stabilization using vid.stab [no] --enable-libvmaf enable vmaf filter via libvmaf [no] @@ -344,12 +346,16 @@ External library support: --enable-libvpl enable Intel oneVPL code via libvpl if libmfx is not used [no] --enable-libnpp enable Nvidia Performance Primitives-based code [no] --enable-mmal enable Broadcom Multi-Media Abstraction Layer (Raspberry Pi) via MMAL [no] + --enable-sand enable sand video formats [rpi] + --enable-vout-drm enable the vout_drm module - for internal testing only [no] + --enable-vout-egl enable the vout_egl module - for internal testing only [no] --disable-nvdec disable Nvidia video decoding acceleration (via hwaccel) [autodetect] --disable-nvenc disable Nvidia video encoding code [autodetect] --enable-omx enable OpenMAX IL code [no] --enable-omx-rpi enable OpenMAX IL code for Raspberry Pi [no] --enable-rkmpp enable Rockchip Media Process Platform code [no] --disable-v4l2-m2m disable V4L2 mem2mem code [autodetect] + --enable-v4l2-request enable V4L2 request API code [no] --disable-vaapi disable Video Acceleration API (mainly Unix/Intel) code [autodetect] --disable-vdpau disable Nvidia Video Decode and Presentation API for Unix code [autodetect] --disable-videotoolbox disable VideoToolbox code [autodetect] @@ -1742,7 +1748,9 @@ EXTERNAL_AUTODETECT_LIBRARY_LIST=" avfoundation bzlib coreimage + epoxy iconv + libudev libxcb libxcb_shm libxcb_shape @@ -1913,6 +1921,7 @@ HWACCEL_LIBRARY_LIST=" mmal omx opencl + v4l2_request " DOCUMENT_LIST=" @@ -1930,10 +1939,14 @@ FEATURE_LIST=" omx_rpi runtime_cpudetect safe_bitstream_reader + sand shared small static swscale_alpha + vout_drm + vout_egl + v4l2_req_hevc_vx " # this list should be kept in linking order @@ -2495,6 +2508,7 @@ CONFIG_EXTRA=" rtpdec rtpenc_chain rv34dsp + sand scene_sad sinewin snappy @@ -2999,6 +3013,7 @@ d3d11va_deps="dxva_h ID3D11VideoDecoder ID3D11VideoContext" dxva2_deps="dxva2api_h DXVA2_ConfigPictureDecode ole32 user32" ffnvcodec_deps_any="libdl LoadLibrary" nvdec_deps="ffnvcodec" +v4l2_request_deps="linux_videodev2_h linux_media_h v4l2_timeval_to_ns libdrm libudev" vaapi_x11_deps="xlib_x11" videotoolbox_hwaccel_deps="videotoolbox pthreads" videotoolbox_hwaccel_extralibs="-framework QuartzCore" @@ -3042,6 +3057,8 @@ hevc_dxva2_hwaccel_deps="dxva2 DXVA_PicParams_HEVC" hevc_dxva2_hwaccel_select="hevc_decoder" hevc_nvdec_hwaccel_deps="nvdec" hevc_nvdec_hwaccel_select="hevc_decoder" +hevc_v4l2request_hwaccel_deps="v4l2_request" +hevc_v4l2request_hwaccel_select="hevc_decoder" hevc_vaapi_hwaccel_deps="vaapi VAPictureParameterBufferHEVC" hevc_vaapi_hwaccel_select="hevc_decoder" hevc_vdpau_hwaccel_deps="vdpau VdpPictureInfoHEVC" @@ -3549,8 +3566,11 @@ sndio_indev_deps="sndio" sndio_outdev_deps="sndio" v4l2_indev_deps_any="linux_videodev2_h sys_videoio_h" v4l2_indev_suggest="libv4l2" +v4l2_outdev_deps="libdrm" v4l2_outdev_deps_any="linux_videodev2_h sys_videoio_h" v4l2_outdev_suggest="libv4l2" +vout_drm_outdev_deps="libdrm" +vout_egl_outdev_deps="xlib epoxy" vfwcap_indev_deps="vfw32 vfwcap_defines" xcbgrab_indev_deps="libxcb" xcbgrab_indev_suggest="libxcb_shm libxcb_shape libxcb_xfixes" @@ -3751,6 +3771,7 @@ tonemap_opencl_filter_deps="opencl const_nan" transpose_opencl_filter_deps="opencl" transpose_vaapi_filter_deps="vaapi VAProcPipelineCaps_rotation_flags" transpose_vulkan_filter_deps="vulkan spirv_compiler" +unsand_filter_select="sand" unsharp_opencl_filter_deps="opencl" uspp_filter_deps="gpl avcodec" vaguedenoiser_filter_deps="gpl" @@ -6335,6 +6356,12 @@ if enabled xlib; then disable xlib fi +enabled libudev && + check_pkg_config libudev libudev libudev.h udev_new + +enabled epoxy && + check_pkg_config epoxy epoxy epoxy/egl.h epoxy_egl_version + check_headers direct.h check_headers dirent.h check_headers dxgidebug.h @@ -6794,8 +6821,16 @@ enabled rkmpp && { require_pkg_config rkmpp rockchip_mpp rockchip/r { enabled libdrm || die "ERROR: rkmpp requires --enable-libdrm"; } } +enabled v4l2_request && { enabled libdrm || + die "ERROR: v4l2-request requires --enable-libdrm"; } && + { enabled libudev || + die "ERROR: v4l2-request requires libudev"; } enabled vapoursynth && require_pkg_config vapoursynth "vapoursynth-script >= 42" VSScript.h vsscript_init +enabled vout_drm && { enabled libdrm || die "ERROR: vout_drm requires --enable-libdrm"; } + +enabled vout_egl && { enabled epoxy || die "ERROR: vout_egl requires epoxy"; } && + { enabled xlib || die "ERROR: vout_egl requires xlib"; } if enabled gcrypt; then GCRYPT_CONFIG="${cross_prefix}libgcrypt-config" @@ -6876,6 +6911,10 @@ if enabled v4l2_m2m; then check_cc vp9_v4l2_m2m linux/videodev2.h "int i = V4L2_PIX_FMT_VP9;" fi +check_func_headers "linux/media.h linux/videodev2.h" v4l2_timeval_to_ns +check_cc hevc_v4l2_request linux/videodev2.h "int i = V4L2_PIX_FMT_HEVC_SLICE;" +disable v4l2_req_hevc_vx + check_headers sys/videoio.h test_code cc sys/videoio.h "struct v4l2_frmsizeenum vfse; vfse.discrete.width = 0;" && enable_sanitized struct_v4l2_frmivalenum_discrete @@ -7370,6 +7409,9 @@ check_deps $CONFIG_LIST \ enabled threads && ! enabled pthreads && ! enabled atomics_native && die "non pthread threading without atomics not supported, try adding --enable-pthreads or --cpu=i486 or higher if you are on x86" +# Sub-feature of hevc_v4l2request_hwaccel - can only be set once deps are done +enabled hevc_v4l2request_hwaccel && disabled hevc_v4l2_request && enable v4l2_req_hevc_vx + case $target_os in haiku) disable memalign diff --git a/fftools/ffmpeg.c b/fftools/ffmpeg.c index d721a5e721..839da7b472 100644 --- a/fftools/ffmpeg.c +++ b/fftools/ffmpeg.c @@ -1993,8 +1993,8 @@ static int ifilter_send_frame(InputFilter *ifilter, AVFrame *frame, int keep_ref av_channel_layout_compare(&ifilter->ch_layout, &frame->ch_layout); break; case AVMEDIA_TYPE_VIDEO: - need_reinit |= ifilter->width != frame->width || - ifilter->height != frame->height; + need_reinit |= ifilter->width != av_frame_cropped_width(frame) || + ifilter->height != av_frame_cropped_height(frame); break; } @@ -2005,6 +2005,9 @@ static int ifilter_send_frame(InputFilter *ifilter, AVFrame *frame, int keep_ref (ifilter->hw_frames_ctx && ifilter->hw_frames_ctx->data != frame->hw_frames_ctx->data)) need_reinit = 1; + if (no_cvt_hw && fg->graph) + need_reinit = 0; + if (sd = av_frame_get_side_data(frame, AV_FRAME_DATA_DISPLAYMATRIX)) { if (!ifilter->displaymatrix || memcmp(sd->data, ifilter->displaymatrix, sizeof(int32_t) * 9)) need_reinit = 1; @@ -2274,8 +2277,7 @@ static int decode_video(InputStream *ist, AVPacket *pkt, int *got_output, int64_ decoded_frame->top_field_first = ist->top_field_first; ist->frames_decoded++; - - if (ist->hwaccel_retrieve_data && decoded_frame->format == ist->hwaccel_pix_fmt) { + if (!no_cvt_hw && ist->hwaccel_retrieve_data && decoded_frame->format == ist->hwaccel_pix_fmt) { err = ist->hwaccel_retrieve_data(ist->dec_ctx, decoded_frame); if (err < 0) goto fail; @@ -2607,7 +2609,12 @@ static int process_input_packet(InputStream *ist, const AVPacket *pkt, int no_eo case AVMEDIA_TYPE_VIDEO: ret = decode_video (ist, repeating ? NULL : avpkt, &got_output, &duration_pts, !pkt, &decode_failed); - if (!repeating || !pkt || got_output) { + // Pi: Do not inc dts if no_cvt_hw set + // V4L2 H264 decode has long latency and sometimes spits out a long + // stream of output without input. In this case incrementing DTS is wrong. + // There may be cases where the condition as written is correct so only + // "fix" in the cases which cause problems + if (!repeating || !pkt || (got_output && !no_cvt_hw)) { if (pkt && pkt->duration) { duration_dts = av_rescale_q(pkt->duration, ist->st->time_base, AV_TIME_BASE_Q); } else if(ist->dec_ctx->framerate.num != 0 && ist->dec_ctx->framerate.den != 0) { @@ -2756,12 +2763,15 @@ static enum AVPixelFormat get_format(AVCodecContext *s, const enum AVPixelFormat break; if (ist->hwaccel_id == HWACCEL_GENERIC || - ist->hwaccel_id == HWACCEL_AUTO) { + ist->hwaccel_id == HWACCEL_AUTO || + no_cvt_hw) { for (i = 0;; i++) { config = avcodec_get_hw_config(s->codec, i); if (!config) break; - if (!(config->methods & + if (no_cvt_hw && (config->methods & AV_CODEC_HW_CONFIG_METHOD_INTERNAL)) + av_log(s, AV_LOG_DEBUG, "no_cvt_hw so trying pix_fmt %d with codec internal hwaccel\n", *p); + else if (!(config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX)) continue; if (config->pix_fmt == *p) diff --git a/fftools/ffmpeg.h b/fftools/ffmpeg.h index f1412f6446..8f478619b3 100644 --- a/fftools/ffmpeg.h +++ b/fftools/ffmpeg.h @@ -729,6 +729,8 @@ extern enum VideoSyncMethod video_sync_method; extern float frame_drop_threshold; extern int do_benchmark; extern int do_benchmark_all; +extern int no_cvt_hw; +extern int do_deinterlace; extern int do_hex_dump; extern int do_pkt_dump; extern int copy_ts; diff --git a/fftools/ffmpeg_filter.c b/fftools/ffmpeg_filter.c index 1f5bbf6c4d..f888307762 100644 --- a/fftools/ffmpeg_filter.c +++ b/fftools/ffmpeg_filter.c @@ -1281,8 +1281,8 @@ int ifilter_parameters_from_frame(InputFilter *ifilter, const AVFrame *frame) ifilter->format = frame->format; - ifilter->width = frame->width; - ifilter->height = frame->height; + ifilter->width = av_frame_cropped_width(frame); + ifilter->height = av_frame_cropped_height(frame); ifilter->sample_aspect_ratio = frame->sample_aspect_ratio; ifilter->sample_rate = frame->sample_rate; diff --git a/fftools/ffmpeg_hw.c b/fftools/ffmpeg_hw.c index 88fa782470..740a5e7153 100644 --- a/fftools/ffmpeg_hw.c +++ b/fftools/ffmpeg_hw.c @@ -75,6 +75,8 @@ static char *hw_device_default_name(enum AVHWDeviceType type) char *name; size_t index_pos; int index, index_limit = 1000; + if (!type_name) + return NULL; index_pos = strlen(type_name); name = av_malloc(index_pos + 4); if (!name) diff --git a/fftools/ffmpeg_opt.c b/fftools/ffmpeg_opt.c index 055275d813..761db36588 100644 --- a/fftools/ffmpeg_opt.c +++ b/fftools/ffmpeg_opt.c @@ -71,6 +71,7 @@ enum VideoSyncMethod video_sync_method = VSYNC_AUTO; float frame_drop_threshold = 0; int do_benchmark = 0; int do_benchmark_all = 0; +int no_cvt_hw = 0; int do_hex_dump = 0; int do_pkt_dump = 0; int copy_ts = 0; @@ -1427,6 +1428,8 @@ const OptionDef options[] = { "add timings for benchmarking" }, { "benchmark_all", OPT_BOOL | OPT_EXPERT, { &do_benchmark_all }, "add timings for each task" }, + { "no_cvt_hw", OPT_BOOL | OPT_EXPERT, { &no_cvt_hw }, + "do not auto-convert hw frames to sw" }, { "progress", HAS_ARG | OPT_EXPERT, { .func_arg = opt_progress }, "write program-readable progress information", "url" }, { "stdin", OPT_BOOL | OPT_EXPERT, { &stdin_interaction }, diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 389253f5d0..8b1d669834 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -169,7 +169,10 @@ OBJS-$(CONFIG_VIDEODSP) += videodsp.o OBJS-$(CONFIG_VP3DSP) += vp3dsp.o OBJS-$(CONFIG_VP56DSP) += vp56dsp.o OBJS-$(CONFIG_VP8DSP) += vp8dsp.o -OBJS-$(CONFIG_V4L2_M2M) += v4l2_m2m.o v4l2_context.o v4l2_buffers.o v4l2_fmt.o +OBJS-$(CONFIG_V4L2_M2M) += v4l2_m2m.o v4l2_context.o v4l2_buffers.o v4l2_fmt.o\ + weak_link.o v4l2_req_dmabufs.o +OBJS-$(CONFIG_V4L2_REQUEST) += v4l2_req_media.o v4l2_req_pollqueue.o v4l2_req_dmabufs.o\ + v4l2_req_devscan.o weak_link.o OBJS-$(CONFIG_WMA_FREQS) += wma_freqs.o OBJS-$(CONFIG_WMV2DSP) += wmv2dsp.o @@ -996,6 +999,8 @@ OBJS-$(CONFIG_HEVC_D3D11VA_HWACCEL) += dxva2_hevc.o OBJS-$(CONFIG_HEVC_DXVA2_HWACCEL) += dxva2_hevc.o OBJS-$(CONFIG_HEVC_NVDEC_HWACCEL) += nvdec_hevc.o OBJS-$(CONFIG_HEVC_QSV_HWACCEL) += qsvdec.o +OBJS-$(CONFIG_HEVC_V4L2REQUEST_HWACCEL) += v4l2_request_hevc.o v4l2_req_decode_q.o v4l2_req_hevc_v4.o +OBJS-$(CONFIG_V4L2_REQ_HEVC_VX) += v4l2_req_hevc_v1.o v4l2_req_hevc_v2.o v4l2_req_hevc_v3.o OBJS-$(CONFIG_HEVC_VAAPI_HWACCEL) += vaapi_hevc.o h265_profile_level.o OBJS-$(CONFIG_HEVC_VDPAU_HWACCEL) += vdpau_hevc.o h265_profile_level.o OBJS-$(CONFIG_MJPEG_NVDEC_HWACCEL) += nvdec_mjpeg.o diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h index 39881a1d2b..32bc78e2be 100644 --- a/libavcodec/avcodec.h +++ b/libavcodec/avcodec.h @@ -2221,6 +2221,17 @@ typedef struct AVHWAccel { * that avctx->hwaccel_priv_data is invalid. */ int (*frame_params)(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx); + + /** + * Called if parsing fails + * + * An error has occured, end_frame will not be called + * start_frame & decode_slice may or may not have been called + * Optional + * + * @param avctx the codec context + */ + void (*abort_frame)(AVCodecContext *avctx); } AVHWAccel; /** diff --git a/libavcodec/hevc-ctrls-v1.h b/libavcodec/hevc-ctrls-v1.h new file mode 100644 index 0000000000..72cbba0953 --- /dev/null +++ b/libavcodec/hevc-ctrls-v1.h @@ -0,0 +1,229 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * These are the HEVC state controls for use with stateless HEVC + * codec drivers. + * + * It turns out that these structs are not stable yet and will undergo + * more changes. So keep them private until they are stable and ready to + * become part of the official public API. + */ + +#ifndef _HEVC_CTRLS_H_ +#define _HEVC_CTRLS_H_ + +#include + +/* The pixel format isn't stable at the moment and will likely be renamed. */ +#define V4L2_PIX_FMT_HEVC_SLICE v4l2_fourcc('S', '2', '6', '5') /* HEVC parsed slices */ + +#define V4L2_CID_MPEG_VIDEO_HEVC_SPS (V4L2_CID_MPEG_BASE + 1008) +#define V4L2_CID_MPEG_VIDEO_HEVC_PPS (V4L2_CID_MPEG_BASE + 1009) +#define V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS (V4L2_CID_MPEG_BASE + 1010) +#define V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX (V4L2_CID_MPEG_BASE + 1011) +#define V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE (V4L2_CID_MPEG_BASE + 1015) +#define V4L2_CID_MPEG_VIDEO_HEVC_START_CODE (V4L2_CID_MPEG_BASE + 1016) + +/* enum v4l2_ctrl_type type values */ +#define V4L2_CTRL_TYPE_HEVC_SPS 0x0120 +#define V4L2_CTRL_TYPE_HEVC_PPS 0x0121 +#define V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS 0x0122 +#define V4L2_CTRL_TYPE_HEVC_SCALING_MATRIX 0x0123 + +enum v4l2_mpeg_video_hevc_decode_mode { + V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED, + V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_FRAME_BASED, +}; + +enum v4l2_mpeg_video_hevc_start_code { + V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE, + V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B, +}; + +#define V4L2_HEVC_SLICE_TYPE_B 0 +#define V4L2_HEVC_SLICE_TYPE_P 1 +#define V4L2_HEVC_SLICE_TYPE_I 2 + +#define V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE (1ULL << 0) +#define V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED (1ULL << 1) +#define V4L2_HEVC_SPS_FLAG_AMP_ENABLED (1ULL << 2) +#define V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET (1ULL << 3) +#define V4L2_HEVC_SPS_FLAG_PCM_ENABLED (1ULL << 4) +#define V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED (1ULL << 5) +#define V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT (1ULL << 6) +#define V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED (1ULL << 7) +#define V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED (1ULL << 8) + +/* The controls are not stable at the moment and will likely be reworked. */ +struct v4l2_ctrl_hevc_sps { + /* ISO/IEC 23008-2, ITU-T Rec. H.265: Sequence parameter set */ + __u16 pic_width_in_luma_samples; + __u16 pic_height_in_luma_samples; + __u8 bit_depth_luma_minus8; + __u8 bit_depth_chroma_minus8; + __u8 log2_max_pic_order_cnt_lsb_minus4; + __u8 sps_max_dec_pic_buffering_minus1; + __u8 sps_max_num_reorder_pics; + __u8 sps_max_latency_increase_plus1; + __u8 log2_min_luma_coding_block_size_minus3; + __u8 log2_diff_max_min_luma_coding_block_size; + __u8 log2_min_luma_transform_block_size_minus2; + __u8 log2_diff_max_min_luma_transform_block_size; + __u8 max_transform_hierarchy_depth_inter; + __u8 max_transform_hierarchy_depth_intra; + __u8 pcm_sample_bit_depth_luma_minus1; + __u8 pcm_sample_bit_depth_chroma_minus1; + __u8 log2_min_pcm_luma_coding_block_size_minus3; + __u8 log2_diff_max_min_pcm_luma_coding_block_size; + __u8 num_short_term_ref_pic_sets; + __u8 num_long_term_ref_pics_sps; + __u8 chroma_format_idc; + __u8 sps_max_sub_layers_minus1; + + __u64 flags; +}; + +#define V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT (1ULL << 0) +#define V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT (1ULL << 1) +#define V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED (1ULL << 2) +#define V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT (1ULL << 3) +#define V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED (1ULL << 4) +#define V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED (1ULL << 5) +#define V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED (1ULL << 6) +#define V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT (1ULL << 7) +#define V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED (1ULL << 8) +#define V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED (1ULL << 9) +#define V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED (1ULL << 10) +#define V4L2_HEVC_PPS_FLAG_TILES_ENABLED (1ULL << 11) +#define V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED (1ULL << 12) +#define V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED (1ULL << 13) +#define V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 14) +#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED (1ULL << 15) +#define V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER (1ULL << 16) +#define V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT (1ULL << 17) +#define V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT (1ULL << 18) + +struct v4l2_ctrl_hevc_pps { + /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture parameter set */ + __u8 num_extra_slice_header_bits; + __s8 init_qp_minus26; + __u8 diff_cu_qp_delta_depth; + __s8 pps_cb_qp_offset; + __s8 pps_cr_qp_offset; + __u8 num_tile_columns_minus1; + __u8 num_tile_rows_minus1; + __u8 column_width_minus1[20]; + __u8 row_height_minus1[22]; + __s8 pps_beta_offset_div2; + __s8 pps_tc_offset_div2; + __u8 log2_parallel_merge_level_minus2; + + __u8 padding[4]; + __u64 flags; +}; + +#define V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_BEFORE 0x01 +#define V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_AFTER 0x02 +#define V4L2_HEVC_DPB_ENTRY_RPS_LT_CURR 0x03 + +#define V4L2_HEVC_DPB_ENTRIES_NUM_MAX 16 + +struct v4l2_hevc_dpb_entry { + __u64 timestamp; + __u8 rps; + __u8 field_pic; + __u16 pic_order_cnt[2]; + __u8 padding[2]; +}; + +struct v4l2_hevc_pred_weight_table { + __s8 delta_luma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; + __s8 luma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; + __s8 delta_chroma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; + __s8 chroma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; + + __s8 delta_luma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; + __s8 luma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; + __s8 delta_chroma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; + __s8 chroma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; + + __u8 padding[6]; + + __u8 luma_log2_weight_denom; + __s8 delta_chroma_log2_weight_denom; +}; + +#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA (1ULL << 0) +#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA (1ULL << 1) +#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED (1ULL << 2) +#define V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO (1ULL << 3) +#define V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT (1ULL << 4) +#define V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0 (1ULL << 5) +#define V4L2_HEVC_SLICE_PARAMS_FLAG_USE_INTEGER_MV (1ULL << 6) +#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED (1ULL << 7) +#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 8) +#define V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT (1ULL << 9) + +struct v4l2_ctrl_hevc_slice_params { + __u32 bit_size; + __u32 data_bit_offset; + + /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ + __u32 slice_segment_addr; + __u32 num_entry_point_offsets; + + /* ISO/IEC 23008-2, ITU-T Rec. H.265: NAL unit header */ + __u8 nal_unit_type; + __u8 nuh_temporal_id_plus1; + + /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ + __u8 slice_type; + __u8 colour_plane_id; + __u16 slice_pic_order_cnt; + __u8 num_ref_idx_l0_active_minus1; + __u8 num_ref_idx_l1_active_minus1; + __u8 collocated_ref_idx; + __u8 five_minus_max_num_merge_cand; + __s8 slice_qp_delta; + __s8 slice_cb_qp_offset; + __s8 slice_cr_qp_offset; + __s8 slice_act_y_qp_offset; + __s8 slice_act_cb_qp_offset; + __s8 slice_act_cr_qp_offset; + __s8 slice_beta_offset_div2; + __s8 slice_tc_offset_div2; + + /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture timing SEI message */ + __u8 pic_struct; + + /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ + __u8 num_active_dpb_entries; + __u8 ref_idx_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; + __u8 ref_idx_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; + + __u8 num_rps_poc_st_curr_before; + __u8 num_rps_poc_st_curr_after; + __u8 num_rps_poc_lt_curr; + + __u8 padding; + + __u32 entry_point_offset_minus1[256]; + + /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ + struct v4l2_hevc_dpb_entry dpb[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; + + /* ISO/IEC 23008-2, ITU-T Rec. H.265: Weighted prediction parameter */ + struct v4l2_hevc_pred_weight_table pred_weight_table; + + __u64 flags; +}; + +struct v4l2_ctrl_hevc_scaling_matrix { + __u8 scaling_list_4x4[6][16]; + __u8 scaling_list_8x8[6][64]; + __u8 scaling_list_16x16[6][64]; + __u8 scaling_list_32x32[2][64]; + __u8 scaling_list_dc_coef_16x16[6]; + __u8 scaling_list_dc_coef_32x32[2]; +}; + +#endif diff --git a/libavcodec/hevc-ctrls-v2.h b/libavcodec/hevc-ctrls-v2.h new file mode 100644 index 0000000000..7cbbbf055f --- /dev/null +++ b/libavcodec/hevc-ctrls-v2.h @@ -0,0 +1,257 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * These are the HEVC state controls for use with stateless HEVC + * codec drivers. + * + * It turns out that these structs are not stable yet and will undergo + * more changes. So keep them private until they are stable and ready to + * become part of the official public API. + */ + +#ifndef _HEVC_CTRLS_H_ +#define _HEVC_CTRLS_H_ + +#include + +/* The pixel format isn't stable at the moment and will likely be renamed. */ +#define V4L2_PIX_FMT_HEVC_SLICE v4l2_fourcc('S', '2', '6', '5') /* HEVC parsed slices */ + +#define V4L2_CID_MPEG_VIDEO_HEVC_SPS (V4L2_CID_CODEC_BASE + 1008) +#define V4L2_CID_MPEG_VIDEO_HEVC_PPS (V4L2_CID_CODEC_BASE + 1009) +#define V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS (V4L2_CID_CODEC_BASE + 1010) +#define V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX (V4L2_CID_CODEC_BASE + 1011) +#define V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS (V4L2_CID_CODEC_BASE + 1012) +#define V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE (V4L2_CID_CODEC_BASE + 1015) +#define V4L2_CID_MPEG_VIDEO_HEVC_START_CODE (V4L2_CID_CODEC_BASE + 1016) + +/* enum v4l2_ctrl_type type values */ +#define V4L2_CTRL_TYPE_HEVC_SPS 0x0120 +#define V4L2_CTRL_TYPE_HEVC_PPS 0x0121 +#define V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS 0x0122 +#define V4L2_CTRL_TYPE_HEVC_SCALING_MATRIX 0x0123 +#define V4L2_CTRL_TYPE_HEVC_DECODE_PARAMS 0x0124 + +enum v4l2_mpeg_video_hevc_decode_mode { + V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED, + V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_FRAME_BASED, +}; + +enum v4l2_mpeg_video_hevc_start_code { + V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE, + V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B, +}; + +#define V4L2_HEVC_SLICE_TYPE_B 0 +#define V4L2_HEVC_SLICE_TYPE_P 1 +#define V4L2_HEVC_SLICE_TYPE_I 2 + +#define V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE (1ULL << 0) +#define V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED (1ULL << 1) +#define V4L2_HEVC_SPS_FLAG_AMP_ENABLED (1ULL << 2) +#define V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET (1ULL << 3) +#define V4L2_HEVC_SPS_FLAG_PCM_ENABLED (1ULL << 4) +#define V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED (1ULL << 5) +#define V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT (1ULL << 6) +#define V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED (1ULL << 7) +#define V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED (1ULL << 8) + +/* The controls are not stable at the moment and will likely be reworked. */ +struct v4l2_ctrl_hevc_sps { + /* ISO/IEC 23008-2, ITU-T Rec. H.265: Sequence parameter set */ + __u16 pic_width_in_luma_samples; + __u16 pic_height_in_luma_samples; + __u8 bit_depth_luma_minus8; + __u8 bit_depth_chroma_minus8; + __u8 log2_max_pic_order_cnt_lsb_minus4; + __u8 sps_max_dec_pic_buffering_minus1; + __u8 sps_max_num_reorder_pics; + __u8 sps_max_latency_increase_plus1; + __u8 log2_min_luma_coding_block_size_minus3; + __u8 log2_diff_max_min_luma_coding_block_size; + __u8 log2_min_luma_transform_block_size_minus2; + __u8 log2_diff_max_min_luma_transform_block_size; + __u8 max_transform_hierarchy_depth_inter; + __u8 max_transform_hierarchy_depth_intra; + __u8 pcm_sample_bit_depth_luma_minus1; + __u8 pcm_sample_bit_depth_chroma_minus1; + __u8 log2_min_pcm_luma_coding_block_size_minus3; + __u8 log2_diff_max_min_pcm_luma_coding_block_size; + __u8 num_short_term_ref_pic_sets; + __u8 num_long_term_ref_pics_sps; + __u8 chroma_format_idc; + __u8 sps_max_sub_layers_minus1; + + __u64 flags; +}; + +#define V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED (1ULL << 0) +#define V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT (1ULL << 1) +#define V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED (1ULL << 2) +#define V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT (1ULL << 3) +#define V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED (1ULL << 4) +#define V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED (1ULL << 5) +#define V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED (1ULL << 6) +#define V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT (1ULL << 7) +#define V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED (1ULL << 8) +#define V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED (1ULL << 9) +#define V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED (1ULL << 10) +#define V4L2_HEVC_PPS_FLAG_TILES_ENABLED (1ULL << 11) +#define V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED (1ULL << 12) +#define V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED (1ULL << 13) +#define V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 14) +#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED (1ULL << 15) +#define V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER (1ULL << 16) +#define V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT (1ULL << 17) +#define V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT (1ULL << 18) +#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT (1ULL << 19) +#define V4L2_HEVC_PPS_FLAG_UNIFORM_SPACING (1ULL << 20) + +struct v4l2_ctrl_hevc_pps { + /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture parameter set */ + __u8 num_extra_slice_header_bits; + __u8 num_ref_idx_l0_default_active_minus1; + __u8 num_ref_idx_l1_default_active_minus1; + __s8 init_qp_minus26; + __u8 diff_cu_qp_delta_depth; + __s8 pps_cb_qp_offset; + __s8 pps_cr_qp_offset; + __u8 num_tile_columns_minus1; + __u8 num_tile_rows_minus1; + __u8 column_width_minus1[20]; + __u8 row_height_minus1[22]; + __s8 pps_beta_offset_div2; + __s8 pps_tc_offset_div2; + __u8 log2_parallel_merge_level_minus2; + + __u8 padding[4]; + __u64 flags; +}; + +#define V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_BEFORE 0x01 +#define V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_AFTER 0x02 +#define V4L2_HEVC_DPB_ENTRY_RPS_LT_CURR 0x03 + +#define V4L2_HEVC_DPB_ENTRIES_NUM_MAX 16 + +struct v4l2_hevc_dpb_entry { + __u64 timestamp; + __u8 rps; + __u8 field_pic; + __u16 pic_order_cnt[2]; + __u8 padding[2]; +}; + +struct v4l2_hevc_pred_weight_table { + __s8 delta_luma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; + __s8 luma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; + __s8 delta_chroma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; + __s8 chroma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; + + __s8 delta_luma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; + __s8 luma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; + __s8 delta_chroma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; + __s8 chroma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; + + __u8 padding[6]; + + __u8 luma_log2_weight_denom; + __s8 delta_chroma_log2_weight_denom; +}; + +#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA (1ULL << 0) +#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA (1ULL << 1) +#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED (1ULL << 2) +#define V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO (1ULL << 3) +#define V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT (1ULL << 4) +#define V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0 (1ULL << 5) +#define V4L2_HEVC_SLICE_PARAMS_FLAG_USE_INTEGER_MV (1ULL << 6) +#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED (1ULL << 7) +#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 8) +#define V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT (1ULL << 9) + +struct v4l2_ctrl_hevc_slice_params { + __u32 bit_size; + __u32 data_bit_offset; + + /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ + __u32 slice_segment_addr; + __u32 num_entry_point_offsets; + + /* ISO/IEC 23008-2, ITU-T Rec. H.265: NAL unit header */ + __u8 nal_unit_type; + __u8 nuh_temporal_id_plus1; + + /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ + __u8 slice_type; + __u8 colour_plane_id; + __u16 slice_pic_order_cnt; + __u8 num_ref_idx_l0_active_minus1; + __u8 num_ref_idx_l1_active_minus1; + __u8 collocated_ref_idx; + __u8 five_minus_max_num_merge_cand; + __s8 slice_qp_delta; + __s8 slice_cb_qp_offset; + __s8 slice_cr_qp_offset; + __s8 slice_act_y_qp_offset; + __s8 slice_act_cb_qp_offset; + __s8 slice_act_cr_qp_offset; + __s8 slice_beta_offset_div2; + __s8 slice_tc_offset_div2; + + /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture timing SEI message */ + __u8 pic_struct; + + /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ + __u8 ref_idx_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; + __u8 ref_idx_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; + + __u8 padding[5]; + + __u32 entry_point_offset_minus1[256]; + + /* ISO/IEC 23008-2, ITU-T Rec. H.265: Weighted prediction parameter */ + struct v4l2_hevc_pred_weight_table pred_weight_table; + + __u64 flags; +}; + +#define V4L2_HEVC_DECODE_PARAM_FLAG_IRAP_PIC 0x1 +#define V4L2_HEVC_DECODE_PARAM_FLAG_IDR_PIC 0x2 +#define V4L2_HEVC_DECODE_PARAM_FLAG_NO_OUTPUT_OF_PRIOR 0x4 + +struct v4l2_ctrl_hevc_decode_params { + __s32 pic_order_cnt_val; + __u8 num_active_dpb_entries; + struct v4l2_hevc_dpb_entry dpb[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; + __u8 num_poc_st_curr_before; + __u8 num_poc_st_curr_after; + __u8 num_poc_lt_curr; + __u8 poc_st_curr_before[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; + __u8 poc_st_curr_after[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; + __u8 poc_lt_curr[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; + __u64 flags; +}; + +/* MPEG-class control IDs specific to the Hantro driver as defined by V4L2 */ +#define V4L2_CID_CODEC_HANTRO_BASE (V4L2_CTRL_CLASS_CODEC | 0x1200) +/* + * V4L2_CID_HANTRO_HEVC_SLICE_HEADER_SKIP - + * the number of data (in bits) to skip in the + * slice segment header. + * If non-IDR, the bits to be skipped go from syntax element "pic_output_flag" + * to before syntax element "slice_temporal_mvp_enabled_flag". + * If IDR, the skipped bits are just "pic_output_flag" + * (separate_colour_plane_flag is not supported). + */ +#define V4L2_CID_HANTRO_HEVC_SLICE_HEADER_SKIP (V4L2_CID_CODEC_HANTRO_BASE + 0) + +struct v4l2_ctrl_hevc_scaling_matrix { + __u8 scaling_list_4x4[6][16]; + __u8 scaling_list_8x8[6][64]; + __u8 scaling_list_16x16[6][64]; + __u8 scaling_list_32x32[2][64]; + __u8 scaling_list_dc_coef_16x16[6]; + __u8 scaling_list_dc_coef_32x32[2]; +}; + +#endif diff --git a/libavcodec/hevc-ctrls-v3.h b/libavcodec/hevc-ctrls-v3.h new file mode 100644 index 0000000000..4e35bd583d --- /dev/null +++ b/libavcodec/hevc-ctrls-v3.h @@ -0,0 +1,255 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * These are the HEVC state controls for use with stateless HEVC + * codec drivers. + * + * It turns out that these structs are not stable yet and will undergo + * more changes. So keep them private until they are stable and ready to + * become part of the official public API. + */ + +#ifndef _HEVC_CTRLS_H_ +#define _HEVC_CTRLS_H_ + +#include + +/* The pixel format isn't stable at the moment and will likely be renamed. */ +#define V4L2_PIX_FMT_HEVC_SLICE v4l2_fourcc('S', '2', '6', '5') /* HEVC parsed slices */ + +#define V4L2_CID_MPEG_VIDEO_HEVC_SPS (V4L2_CID_CODEC_BASE + 1008) +#define V4L2_CID_MPEG_VIDEO_HEVC_PPS (V4L2_CID_CODEC_BASE + 1009) +#define V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS (V4L2_CID_CODEC_BASE + 1010) +#define V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX (V4L2_CID_CODEC_BASE + 1011) +#define V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS (V4L2_CID_CODEC_BASE + 1012) +#define V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE (V4L2_CID_CODEC_BASE + 1015) +#define V4L2_CID_MPEG_VIDEO_HEVC_START_CODE (V4L2_CID_CODEC_BASE + 1016) + +/* enum v4l2_ctrl_type type values */ +#define V4L2_CTRL_TYPE_HEVC_SPS 0x0120 +#define V4L2_CTRL_TYPE_HEVC_PPS 0x0121 +#define V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS 0x0122 +#define V4L2_CTRL_TYPE_HEVC_SCALING_MATRIX 0x0123 +#define V4L2_CTRL_TYPE_HEVC_DECODE_PARAMS 0x0124 + +enum v4l2_mpeg_video_hevc_decode_mode { + V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED, + V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_FRAME_BASED, +}; + +enum v4l2_mpeg_video_hevc_start_code { + V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE, + V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B, +}; + +#define V4L2_HEVC_SLICE_TYPE_B 0 +#define V4L2_HEVC_SLICE_TYPE_P 1 +#define V4L2_HEVC_SLICE_TYPE_I 2 + +#define V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE (1ULL << 0) +#define V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED (1ULL << 1) +#define V4L2_HEVC_SPS_FLAG_AMP_ENABLED (1ULL << 2) +#define V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET (1ULL << 3) +#define V4L2_HEVC_SPS_FLAG_PCM_ENABLED (1ULL << 4) +#define V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED (1ULL << 5) +#define V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT (1ULL << 6) +#define V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED (1ULL << 7) +#define V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED (1ULL << 8) + +/* The controls are not stable at the moment and will likely be reworked. */ +struct v4l2_ctrl_hevc_sps { + /* ISO/IEC 23008-2, ITU-T Rec. H.265: Sequence parameter set */ + __u16 pic_width_in_luma_samples; + __u16 pic_height_in_luma_samples; + __u8 bit_depth_luma_minus8; + __u8 bit_depth_chroma_minus8; + __u8 log2_max_pic_order_cnt_lsb_minus4; + __u8 sps_max_dec_pic_buffering_minus1; + __u8 sps_max_num_reorder_pics; + __u8 sps_max_latency_increase_plus1; + __u8 log2_min_luma_coding_block_size_minus3; + __u8 log2_diff_max_min_luma_coding_block_size; + __u8 log2_min_luma_transform_block_size_minus2; + __u8 log2_diff_max_min_luma_transform_block_size; + __u8 max_transform_hierarchy_depth_inter; + __u8 max_transform_hierarchy_depth_intra; + __u8 pcm_sample_bit_depth_luma_minus1; + __u8 pcm_sample_bit_depth_chroma_minus1; + __u8 log2_min_pcm_luma_coding_block_size_minus3; + __u8 log2_diff_max_min_pcm_luma_coding_block_size; + __u8 num_short_term_ref_pic_sets; + __u8 num_long_term_ref_pics_sps; + __u8 chroma_format_idc; + __u8 sps_max_sub_layers_minus1; + + __u64 flags; +}; + +#define V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED (1ULL << 0) +#define V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT (1ULL << 1) +#define V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED (1ULL << 2) +#define V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT (1ULL << 3) +#define V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED (1ULL << 4) +#define V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED (1ULL << 5) +#define V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED (1ULL << 6) +#define V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT (1ULL << 7) +#define V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED (1ULL << 8) +#define V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED (1ULL << 9) +#define V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED (1ULL << 10) +#define V4L2_HEVC_PPS_FLAG_TILES_ENABLED (1ULL << 11) +#define V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED (1ULL << 12) +#define V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED (1ULL << 13) +#define V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 14) +#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED (1ULL << 15) +#define V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER (1ULL << 16) +#define V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT (1ULL << 17) +#define V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT (1ULL << 18) +#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT (1ULL << 19) +#define V4L2_HEVC_PPS_FLAG_UNIFORM_SPACING (1ULL << 20) + +struct v4l2_ctrl_hevc_pps { + /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture parameter set */ + __u8 num_extra_slice_header_bits; + __u8 num_ref_idx_l0_default_active_minus1; + __u8 num_ref_idx_l1_default_active_minus1; + __s8 init_qp_minus26; + __u8 diff_cu_qp_delta_depth; + __s8 pps_cb_qp_offset; + __s8 pps_cr_qp_offset; + __u8 num_tile_columns_minus1; + __u8 num_tile_rows_minus1; + __u8 column_width_minus1[20]; + __u8 row_height_minus1[22]; + __s8 pps_beta_offset_div2; + __s8 pps_tc_offset_div2; + __u8 log2_parallel_merge_level_minus2; + + __u8 padding[4]; + __u64 flags; +}; + +#define V4L2_HEVC_DPB_ENTRY_LONG_TERM_REFERENCE 0x01 + +#define V4L2_HEVC_DPB_ENTRIES_NUM_MAX 16 + +struct v4l2_hevc_dpb_entry { + __u64 timestamp; + __u8 flags; + __u8 field_pic; + __u16 pic_order_cnt[2]; + __u8 padding[2]; +}; + +struct v4l2_hevc_pred_weight_table { + __s8 delta_luma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; + __s8 luma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; + __s8 delta_chroma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; + __s8 chroma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; + + __s8 delta_luma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; + __s8 luma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; + __s8 delta_chroma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; + __s8 chroma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; + + __u8 padding[6]; + + __u8 luma_log2_weight_denom; + __s8 delta_chroma_log2_weight_denom; +}; + +#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA (1ULL << 0) +#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA (1ULL << 1) +#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED (1ULL << 2) +#define V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO (1ULL << 3) +#define V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT (1ULL << 4) +#define V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0 (1ULL << 5) +#define V4L2_HEVC_SLICE_PARAMS_FLAG_USE_INTEGER_MV (1ULL << 6) +#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED (1ULL << 7) +#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 8) +#define V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT (1ULL << 9) + +struct v4l2_ctrl_hevc_slice_params { + __u32 bit_size; + __u32 data_bit_offset; + + /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ + __u32 slice_segment_addr; + __u32 num_entry_point_offsets; + + /* ISO/IEC 23008-2, ITU-T Rec. H.265: NAL unit header */ + __u8 nal_unit_type; + __u8 nuh_temporal_id_plus1; + + /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ + __u8 slice_type; + __u8 colour_plane_id; + __u16 slice_pic_order_cnt; + __u8 num_ref_idx_l0_active_minus1; + __u8 num_ref_idx_l1_active_minus1; + __u8 collocated_ref_idx; + __u8 five_minus_max_num_merge_cand; + __s8 slice_qp_delta; + __s8 slice_cb_qp_offset; + __s8 slice_cr_qp_offset; + __s8 slice_act_y_qp_offset; + __s8 slice_act_cb_qp_offset; + __s8 slice_act_cr_qp_offset; + __s8 slice_beta_offset_div2; + __s8 slice_tc_offset_div2; + + /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture timing SEI message */ + __u8 pic_struct; + + /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ + __u8 ref_idx_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; + __u8 ref_idx_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; + + __u8 padding[5]; + + __u32 entry_point_offset_minus1[256]; + + /* ISO/IEC 23008-2, ITU-T Rec. H.265: Weighted prediction parameter */ + struct v4l2_hevc_pred_weight_table pred_weight_table; + + __u64 flags; +}; + +#define V4L2_HEVC_DECODE_PARAM_FLAG_IRAP_PIC 0x1 +#define V4L2_HEVC_DECODE_PARAM_FLAG_IDR_PIC 0x2 +#define V4L2_HEVC_DECODE_PARAM_FLAG_NO_OUTPUT_OF_PRIOR 0x4 + +struct v4l2_ctrl_hevc_decode_params { + __s32 pic_order_cnt_val; + __u8 num_active_dpb_entries; + struct v4l2_hevc_dpb_entry dpb[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; + __u8 num_poc_st_curr_before; + __u8 num_poc_st_curr_after; + __u8 num_poc_lt_curr; + __u8 poc_st_curr_before[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; + __u8 poc_st_curr_after[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; + __u8 poc_lt_curr[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; + __u64 flags; +}; + +struct v4l2_ctrl_hevc_scaling_matrix { + __u8 scaling_list_4x4[6][16]; + __u8 scaling_list_8x8[6][64]; + __u8 scaling_list_16x16[6][64]; + __u8 scaling_list_32x32[2][64]; + __u8 scaling_list_dc_coef_16x16[6]; + __u8 scaling_list_dc_coef_32x32[2]; +}; + +/* MPEG-class control IDs specific to the Hantro driver as defined by V4L2 */ +#define V4L2_CID_CODEC_HANTRO_BASE (V4L2_CTRL_CLASS_CODEC | 0x1200) +/* + * V4L2_CID_HANTRO_HEVC_SLICE_HEADER_SKIP - + * the number of data (in bits) to skip in the + * slice segment header. + * If non-IDR, the bits to be skipped go from syntax element "pic_output_flag" + * to before syntax element "slice_temporal_mvp_enabled_flag". + * If IDR, the skipped bits are just "pic_output_flag" + * (separate_colour_plane_flag is not supported). + */ +#define V4L2_CID_HANTRO_HEVC_SLICE_HEADER_SKIP (V4L2_CID_CODEC_HANTRO_BASE + 0) + +#endif diff --git a/libavcodec/hevc-ctrls-v4.h b/libavcodec/hevc-ctrls-v4.h new file mode 100644 index 0000000000..c02fdbe5a8 --- /dev/null +++ b/libavcodec/hevc-ctrls-v4.h @@ -0,0 +1,524 @@ +/* SPDX-License-Identifier: ((GPL-2.0+ WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* + * Video for Linux Two controls header file + * + * Copyright (C) 1999-2012 the contributors + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Alternatively you can redistribute this file under the terms of the + * BSD license as stated below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. The names of its contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * The contents of this header was split off from videodev2.h. All control + * definitions should be added to this header, which is included by + * videodev2.h. + */ + +#ifndef AVCODEC_HEVC_CTRLS_V4_H +#define AVCODEC_HEVC_CTRLS_V4_H + +#include +#include + +#ifndef V4L2_CTRL_CLASS_CODEC_STATELESS +#define V4L2_CTRL_CLASS_CODEC_STATELESS 0x00a40000 /* Stateless codecs controls */ +#endif +#ifndef V4L2_CID_CODEC_STATELESS_BASE +#define V4L2_CID_CODEC_STATELESS_BASE (V4L2_CTRL_CLASS_CODEC_STATELESS | 0x900) +#endif + +#define V4L2_PIX_FMT_HEVC_SLICE v4l2_fourcc('S', '2', '6', '5') /* HEVC parsed slices */ + +#define V4L2_CID_STATELESS_HEVC_SPS (V4L2_CID_CODEC_STATELESS_BASE + 400) +#define V4L2_CID_STATELESS_HEVC_PPS (V4L2_CID_CODEC_STATELESS_BASE + 401) +#define V4L2_CID_STATELESS_HEVC_SLICE_PARAMS (V4L2_CID_CODEC_STATELESS_BASE + 402) +#define V4L2_CID_STATELESS_HEVC_SCALING_MATRIX (V4L2_CID_CODEC_STATELESS_BASE + 403) +#define V4L2_CID_STATELESS_HEVC_DECODE_PARAMS (V4L2_CID_CODEC_STATELESS_BASE + 404) +#define V4L2_CID_STATELESS_HEVC_DECODE_MODE (V4L2_CID_CODEC_STATELESS_BASE + 405) +#define V4L2_CID_STATELESS_HEVC_START_CODE (V4L2_CID_CODEC_STATELESS_BASE + 406) +#define V4L2_CID_STATELESS_HEVC_ENTRY_POINT_OFFSETS (V4L2_CID_CODEC_STATELESS_BASE + 407) + +enum v4l2_stateless_hevc_decode_mode { + V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED, + V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED, +}; + +enum v4l2_stateless_hevc_start_code { + V4L2_STATELESS_HEVC_START_CODE_NONE, + V4L2_STATELESS_HEVC_START_CODE_ANNEX_B, +}; + +#define V4L2_HEVC_SLICE_TYPE_B 0 +#define V4L2_HEVC_SLICE_TYPE_P 1 +#define V4L2_HEVC_SLICE_TYPE_I 2 + +#define V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE (1ULL << 0) +#define V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED (1ULL << 1) +#define V4L2_HEVC_SPS_FLAG_AMP_ENABLED (1ULL << 2) +#define V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET (1ULL << 3) +#define V4L2_HEVC_SPS_FLAG_PCM_ENABLED (1ULL << 4) +#define V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED (1ULL << 5) +#define V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT (1ULL << 6) +#define V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED (1ULL << 7) +#define V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED (1ULL << 8) + +/** + * struct v4l2_ctrl_hevc_sps - ITU-T Rec. H.265: Sequence parameter set + * + * @video_parameter_set_id: specifies the value of the + * vps_video_parameter_set_id of the active VPS + * @seq_parameter_set_id: provides an identifier for the SPS for + * reference by other syntax elements + * @pic_width_in_luma_samples: specifies the width of each decoded picture + * in units of luma samples + * @pic_height_in_luma_samples: specifies the height of each decoded picture + * in units of luma samples + * @bit_depth_luma_minus8: this value plus 8specifies the bit depth of the + * samples of the luma array + * @bit_depth_chroma_minus8: this value plus 8 specifies the bit depth of the + * samples of the chroma arrays + * @log2_max_pic_order_cnt_lsb_minus4: this value plus 4 specifies the value of + * the variable MaxPicOrderCntLsb + * @sps_max_dec_pic_buffering_minus1: this value plus 1 specifies the maximum + * required size of the decoded picture + * buffer for the codec video sequence + * @sps_max_num_reorder_pics: indicates the maximum allowed number of pictures + * @sps_max_latency_increase_plus1: not equal to 0 is used to compute the + * value of SpsMaxLatencyPictures array + * @log2_min_luma_coding_block_size_minus3: plus 3 specifies the minimum + * luma coding block size + * @log2_diff_max_min_luma_coding_block_size: specifies the difference between + * the maximum and minimum luma + * coding block size + * @log2_min_luma_transform_block_size_minus2: plus 2 specifies the minimum luma + * transform block size + * @log2_diff_max_min_luma_transform_block_size: specifies the difference between + * the maximum and minimum luma + * transform block size + * @max_transform_hierarchy_depth_inter: specifies the maximum hierarchy + * depth for transform units of + * coding units coded in inter + * prediction mode + * @max_transform_hierarchy_depth_intra: specifies the maximum hierarchy + * depth for transform units of + * coding units coded in intra + * prediction mode + * @pcm_sample_bit_depth_luma_minus1: this value plus 1 specifies the number of + * bits used to represent each of PCM sample + * values of the luma component + * @pcm_sample_bit_depth_chroma_minus1: this value plus 1 specifies the number + * of bits used to represent each of PCM + * sample values of the chroma components + * @log2_min_pcm_luma_coding_block_size_minus3: this value plus 3 specifies the + * minimum size of coding blocks + * @log2_diff_max_min_pcm_luma_coding_block_size: specifies the difference between + * the maximum and minimum size of + * coding blocks + * @num_short_term_ref_pic_sets: specifies the number of st_ref_pic_set() + * syntax structures included in the SPS + * @num_long_term_ref_pics_sps: specifies the number of candidate long-term + * reference pictures that are specified in the SPS + * @chroma_format_idc: specifies the chroma sampling + * @sps_max_sub_layers_minus1: this value plus 1 specifies the maximum number + * of temporal sub-layers + * @reserved: padding field. Should be zeroed by applications. + * @flags: see V4L2_HEVC_SPS_FLAG_{} + */ +struct v4l2_ctrl_hevc_sps { + __u8 video_parameter_set_id; + __u8 seq_parameter_set_id; + __u16 pic_width_in_luma_samples; + __u16 pic_height_in_luma_samples; + __u8 bit_depth_luma_minus8; + __u8 bit_depth_chroma_minus8; + __u8 log2_max_pic_order_cnt_lsb_minus4; + __u8 sps_max_dec_pic_buffering_minus1; + __u8 sps_max_num_reorder_pics; + __u8 sps_max_latency_increase_plus1; + __u8 log2_min_luma_coding_block_size_minus3; + __u8 log2_diff_max_min_luma_coding_block_size; + __u8 log2_min_luma_transform_block_size_minus2; + __u8 log2_diff_max_min_luma_transform_block_size; + __u8 max_transform_hierarchy_depth_inter; + __u8 max_transform_hierarchy_depth_intra; + __u8 pcm_sample_bit_depth_luma_minus1; + __u8 pcm_sample_bit_depth_chroma_minus1; + __u8 log2_min_pcm_luma_coding_block_size_minus3; + __u8 log2_diff_max_min_pcm_luma_coding_block_size; + __u8 num_short_term_ref_pic_sets; + __u8 num_long_term_ref_pics_sps; + __u8 chroma_format_idc; + __u8 sps_max_sub_layers_minus1; + + __u8 reserved[6]; + __u64 flags; +}; + +#define V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED (1ULL << 0) +#define V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT (1ULL << 1) +#define V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED (1ULL << 2) +#define V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT (1ULL << 3) +#define V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED (1ULL << 4) +#define V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED (1ULL << 5) +#define V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED (1ULL << 6) +#define V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT (1ULL << 7) +#define V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED (1ULL << 8) +#define V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED (1ULL << 9) +#define V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED (1ULL << 10) +#define V4L2_HEVC_PPS_FLAG_TILES_ENABLED (1ULL << 11) +#define V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED (1ULL << 12) +#define V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED (1ULL << 13) +#define V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 14) +#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED (1ULL << 15) +#define V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER (1ULL << 16) +#define V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT (1ULL << 17) +#define V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT (1ULL << 18) +#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT (1ULL << 19) +#define V4L2_HEVC_PPS_FLAG_UNIFORM_SPACING (1ULL << 20) + +/** + * struct v4l2_ctrl_hevc_pps - ITU-T Rec. H.265: Picture parameter set + * + * @pic_parameter_set_id: identifies the PPS for reference by other + * syntax elements + * @num_extra_slice_header_bits: specifies the number of extra slice header + * bits that are present in the slice header RBSP + * for coded pictures referring to the PPS. + * @num_ref_idx_l0_default_active_minus1: this value plus 1 specifies the + * inferred value of num_ref_idx_l0_active_minus1 + * @num_ref_idx_l1_default_active_minus1: this value plus 1 specifies the + * inferred value of num_ref_idx_l1_active_minus1 + * @init_qp_minus26: this value plus 26 specifies the initial value of SliceQp Y for + * each slice referring to the PPS + * @diff_cu_qp_delta_depth: specifies the difference between the luma coding + * tree block size and the minimum luma coding block + * size of coding units that convey cu_qp_delta_abs + * and cu_qp_delta_sign_flag + * @pps_cb_qp_offset: specify the offsets to the luma quantization parameter Cb + * @pps_cr_qp_offset: specify the offsets to the luma quantization parameter Cr + * @num_tile_columns_minus1: this value plus 1 specifies the number of tile columns + * partitioning the picture + * @num_tile_rows_minus1: this value plus 1 specifies the number of tile rows partitioning + * the picture + * @column_width_minus1: this value plus 1 specifies the width of the each tile column in + * units of coding tree blocks + * @row_height_minus1: this value plus 1 specifies the height of the each tile row in + * units of coding tree blocks + * @pps_beta_offset_div2: specify the default deblocking parameter offsets for + * beta divided by 2 + * @pps_tc_offset_div2: specify the default deblocking parameter offsets for tC + * divided by 2 + * @log2_parallel_merge_level_minus2: this value plus 2 specifies the value of + * the variable Log2ParMrgLevel + * @reserved: padding field. Should be zeroed by applications. + * @flags: see V4L2_HEVC_PPS_FLAG_{} + */ +struct v4l2_ctrl_hevc_pps { + __u8 pic_parameter_set_id; + __u8 num_extra_slice_header_bits; + __u8 num_ref_idx_l0_default_active_minus1; + __u8 num_ref_idx_l1_default_active_minus1; + __s8 init_qp_minus26; + __u8 diff_cu_qp_delta_depth; + __s8 pps_cb_qp_offset; + __s8 pps_cr_qp_offset; + __u8 num_tile_columns_minus1; + __u8 num_tile_rows_minus1; + __u8 column_width_minus1[20]; + __u8 row_height_minus1[22]; + __s8 pps_beta_offset_div2; + __s8 pps_tc_offset_div2; + __u8 log2_parallel_merge_level_minus2; + __u8 reserved; + __u64 flags; +}; + +#define V4L2_HEVC_DPB_ENTRY_LONG_TERM_REFERENCE 0x01 + +#define V4L2_HEVC_SEI_PIC_STRUCT_FRAME 0 +#define V4L2_HEVC_SEI_PIC_STRUCT_TOP_FIELD 1 +#define V4L2_HEVC_SEI_PIC_STRUCT_BOTTOM_FIELD 2 +#define V4L2_HEVC_SEI_PIC_STRUCT_TOP_BOTTOM 3 +#define V4L2_HEVC_SEI_PIC_STRUCT_BOTTOM_TOP 4 +#define V4L2_HEVC_SEI_PIC_STRUCT_TOP_BOTTOM_TOP 5 +#define V4L2_HEVC_SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM 6 +#define V4L2_HEVC_SEI_PIC_STRUCT_FRAME_DOUBLING 7 +#define V4L2_HEVC_SEI_PIC_STRUCT_FRAME_TRIPLING 8 +#define V4L2_HEVC_SEI_PIC_STRUCT_TOP_PAIRED_PREVIOUS_BOTTOM 9 +#define V4L2_HEVC_SEI_PIC_STRUCT_BOTTOM_PAIRED_PREVIOUS_TOP 10 +#define V4L2_HEVC_SEI_PIC_STRUCT_TOP_PAIRED_NEXT_BOTTOM 11 +#define V4L2_HEVC_SEI_PIC_STRUCT_BOTTOM_PAIRED_NEXT_TOP 12 + +#define V4L2_HEVC_DPB_ENTRIES_NUM_MAX 16 + +/** + * struct v4l2_hevc_dpb_entry - HEVC decoded picture buffer entry + * + * @timestamp: timestamp of the V4L2 capture buffer to use as reference. + * @flags: long term flag for the reference frame + * @field_pic: whether the reference is a field picture or a frame. + * @reserved: padding field. Should be zeroed by applications. + * @pic_order_cnt_val: the picture order count of the current picture. + */ +struct v4l2_hevc_dpb_entry { + __u64 timestamp; + __u8 flags; + __u8 field_pic; + __u16 reserved; + __s32 pic_order_cnt_val; +}; + +/** + * struct v4l2_hevc_pred_weight_table - HEVC weighted prediction parameters + * + * @delta_luma_weight_l0: the difference of the weighting factor applied + * to the luma prediction value for list 0 + * @luma_offset_l0: the additive offset applied to the luma prediction value + * for list 0 + * @delta_chroma_weight_l0: the difference of the weighting factor applied + * to the chroma prediction values for list 0 + * @chroma_offset_l0: the difference of the additive offset applied to + * the chroma prediction values for list 0 + * @delta_luma_weight_l1: the difference of the weighting factor applied + * to the luma prediction value for list 1 + * @luma_offset_l1: the additive offset applied to the luma prediction value + * for list 1 + * @delta_chroma_weight_l1: the difference of the weighting factor applied + * to the chroma prediction values for list 1 + * @chroma_offset_l1: the difference of the additive offset applied to + * the chroma prediction values for list 1 + * @luma_log2_weight_denom: the base 2 logarithm of the denominator for + * all luma weighting factors + * @delta_chroma_log2_weight_denom: the difference of the base 2 logarithm + * of the denominator for all chroma + * weighting factors + */ +struct v4l2_hevc_pred_weight_table { + __s8 delta_luma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; + __s8 luma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; + __s8 delta_chroma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; + __s8 chroma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; + + __s8 delta_luma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; + __s8 luma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; + __s8 delta_chroma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; + __s8 chroma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; + + __u8 luma_log2_weight_denom; + __s8 delta_chroma_log2_weight_denom; +}; + +#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA (1ULL << 0) +#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA (1ULL << 1) +#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED (1ULL << 2) +#define V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO (1ULL << 3) +#define V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT (1ULL << 4) +#define V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0 (1ULL << 5) +#define V4L2_HEVC_SLICE_PARAMS_FLAG_USE_INTEGER_MV (1ULL << 6) +#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED (1ULL << 7) +#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 8) +#define V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT (1ULL << 9) + +/** + * struct v4l2_ctrl_hevc_slice_params - HEVC slice parameters + * + * This control is a dynamically sized 1-dimensional array, + * V4L2_CTRL_FLAG_DYNAMIC_ARRAY flag must be set when using it. + * + * @bit_size: size (in bits) of the current slice data + * @data_byte_offset: offset (in bytes) to the video data in the current slice data + * @num_entry_point_offsets: specifies the number of entry point offset syntax + * elements in the slice header. + * @nal_unit_type: specifies the coding type of the slice (B, P or I) + * @nuh_temporal_id_plus1: minus 1 specifies a temporal identifier for the NAL unit + * @slice_type: see V4L2_HEVC_SLICE_TYPE_{} + * @colour_plane_id: specifies the colour plane associated with the current slice + * @slice_pic_order_cnt: specifies the picture order count + * @num_ref_idx_l0_active_minus1: this value plus 1 specifies the maximum + * reference index for reference picture list 0 + * that may be used to decode the slice + * @num_ref_idx_l1_active_minus1: this value plus 1 specifies the maximum + * reference index for reference picture list 1 + * that may be used to decode the slice + * @collocated_ref_idx: specifies the reference index of the collocated picture used + * for temporal motion vector prediction + * @five_minus_max_num_merge_cand: specifies the maximum number of merging + * motion vector prediction candidates supported in + * the slice subtracted from 5 + * @slice_qp_delta: specifies the initial value of QpY to be used for the coding + * blocks in the slice + * @slice_cb_qp_offset: specifies a difference to be added to the value of pps_cb_qp_offset + * @slice_cr_qp_offset: specifies a difference to be added to the value of pps_cr_qp_offset + * @slice_act_y_qp_offset: screen content extension parameters + * @slice_act_cb_qp_offset: screen content extension parameters + * @slice_act_cr_qp_offset: screen content extension parameters + * @slice_beta_offset_div2: specify the deblocking parameter offsets for beta divided by 2 + * @slice_tc_offset_div2: specify the deblocking parameter offsets for tC divided by 2 + * @pic_struct: indicates whether a picture should be displayed as a frame or as one or + * more fields + * @reserved0: padding field. Should be zeroed by applications. + * @slice_segment_addr: specifies the address of the first coding tree block in + * the slice segment + * @ref_idx_l0: the list of L0 reference elements as indices in the DPB + * @ref_idx_l1: the list of L1 reference elements as indices in the DPB + * @short_term_ref_pic_set_size: specifies the size of short-term reference + * pictures set included in the SPS + * @long_term_ref_pic_set_size: specifies the size of long-term reference + * pictures set include in the SPS + * @pred_weight_table: the prediction weight coefficients for inter-picture + * prediction + * @reserved1: padding field. Should be zeroed by applications. + * @flags: see V4L2_HEVC_SLICE_PARAMS_FLAG_{} + */ +struct v4l2_ctrl_hevc_slice_params { + __u32 bit_size; + __u32 data_byte_offset; + __u32 num_entry_point_offsets; + + /* ISO/IEC 23008-2, ITU-T Rec. H.265: NAL unit header */ + __u8 nal_unit_type; + __u8 nuh_temporal_id_plus1; + + /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ + __u8 slice_type; + __u8 colour_plane_id; + __s32 slice_pic_order_cnt; + __u8 num_ref_idx_l0_active_minus1; + __u8 num_ref_idx_l1_active_minus1; + __u8 collocated_ref_idx; + __u8 five_minus_max_num_merge_cand; + __s8 slice_qp_delta; + __s8 slice_cb_qp_offset; + __s8 slice_cr_qp_offset; + __s8 slice_act_y_qp_offset; + __s8 slice_act_cb_qp_offset; + __s8 slice_act_cr_qp_offset; + __s8 slice_beta_offset_div2; + __s8 slice_tc_offset_div2; + + /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture timing SEI message */ + __u8 pic_struct; + + __u8 reserved0[3]; + /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ + __u32 slice_segment_addr; + __u8 ref_idx_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; + __u8 ref_idx_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; + __u16 short_term_ref_pic_set_size; + __u16 long_term_ref_pic_set_size; + + /* ISO/IEC 23008-2, ITU-T Rec. H.265: Weighted prediction parameter */ + struct v4l2_hevc_pred_weight_table pred_weight_table; + + __u8 reserved1[2]; + __u64 flags; +}; + +#define V4L2_HEVC_DECODE_PARAM_FLAG_IRAP_PIC 0x1 +#define V4L2_HEVC_DECODE_PARAM_FLAG_IDR_PIC 0x2 +#define V4L2_HEVC_DECODE_PARAM_FLAG_NO_OUTPUT_OF_PRIOR 0x4 + +/** + * struct v4l2_ctrl_hevc_decode_params - HEVC decode parameters + * + * @pic_order_cnt_val: picture order count + * @short_term_ref_pic_set_size: specifies the size of short-term reference + * pictures set included in the SPS of the first slice + * @long_term_ref_pic_set_size: specifies the size of long-term reference + * pictures set include in the SPS of the first slice + * @num_active_dpb_entries: the number of entries in dpb + * @num_poc_st_curr_before: the number of reference pictures in the short-term + * set that come before the current frame + * @num_poc_st_curr_after: the number of reference pictures in the short-term + * set that come after the current frame + * @num_poc_lt_curr: the number of reference pictures in the long-term set + * @poc_st_curr_before: provides the index of the short term before references + * in DPB array + * @poc_st_curr_after: provides the index of the short term after references + * in DPB array + * @poc_lt_curr: provides the index of the long term references in DPB array + * @reserved: padding field. Should be zeroed by applications. + * @dpb: the decoded picture buffer, for meta-data about reference frames + * @flags: see V4L2_HEVC_DECODE_PARAM_FLAG_{} + */ +struct v4l2_ctrl_hevc_decode_params { + __s32 pic_order_cnt_val; + __u16 short_term_ref_pic_set_size; + __u16 long_term_ref_pic_set_size; + __u8 num_active_dpb_entries; + __u8 num_poc_st_curr_before; + __u8 num_poc_st_curr_after; + __u8 num_poc_lt_curr; + __u8 poc_st_curr_before[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; + __u8 poc_st_curr_after[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; + __u8 poc_lt_curr[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; + __u8 reserved[4]; + struct v4l2_hevc_dpb_entry dpb[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; + __u64 flags; +}; + +/** + * struct v4l2_ctrl_hevc_scaling_matrix - HEVC scaling lists parameters + * + * @scaling_list_4x4: scaling list is used for the scaling process for + * transform coefficients. The values on each scaling + * list are expected in raster scan order + * @scaling_list_8x8: scaling list is used for the scaling process for + * transform coefficients. The values on each scaling + * list are expected in raster scan order + * @scaling_list_16x16: scaling list is used for the scaling process for + * transform coefficients. The values on each scaling + * list are expected in raster scan order + * @scaling_list_32x32: scaling list is used for the scaling process for + * transform coefficients. The values on each scaling + * list are expected in raster scan order + * @scaling_list_dc_coef_16x16: scaling list is used for the scaling process + * for transform coefficients. The values on each + * scaling list are expected in raster scan order. + * @scaling_list_dc_coef_32x32: scaling list is used for the scaling process + * for transform coefficients. The values on each + * scaling list are expected in raster scan order. + */ +struct v4l2_ctrl_hevc_scaling_matrix { + __u8 scaling_list_4x4[6][16]; + __u8 scaling_list_8x8[6][64]; + __u8 scaling_list_16x16[6][64]; + __u8 scaling_list_32x32[2][64]; + __u8 scaling_list_dc_coef_16x16[6]; + __u8 scaling_list_dc_coef_32x32[2]; +}; + +#endif diff --git a/libavcodec/hevc_parser.c b/libavcodec/hevc_parser.c index 59f9a0ff3e..4ae7222e8b 100644 --- a/libavcodec/hevc_parser.c +++ b/libavcodec/hevc_parser.c @@ -97,6 +97,19 @@ static int hevc_parse_slice_header(AVCodecParserContext *s, H2645NAL *nal, avctx->profile = ps->sps->ptl.general_ptl.profile_idc; avctx->level = ps->sps->ptl.general_ptl.level_idc; + if (ps->sps->chroma_format_idc == 1) { + avctx->chroma_sample_location = ps->sps->vui.common.chroma_loc_info_present_flag ? + ps->sps->vui.common.chroma_sample_loc_type_top_field + 1 : + AVCHROMA_LOC_LEFT; + } + else if (ps->sps->chroma_format_idc == 2 || + ps->sps->chroma_format_idc == 3) { + avctx->chroma_sample_location = AVCHROMA_LOC_TOPLEFT;; + } + else { + avctx->chroma_sample_location = AVCHROMA_LOC_UNSPECIFIED; + } + if (ps->vps->vps_timing_info_present_flag) { num = ps->vps->vps_num_units_in_tick; den = ps->vps->vps_time_scale; diff --git a/libavcodec/hevc_refs.c b/libavcodec/hevc_refs.c index 811e8feff8..f7cf14eabc 100644 --- a/libavcodec/hevc_refs.c +++ b/libavcodec/hevc_refs.c @@ -98,18 +98,22 @@ static HEVCFrame *alloc_frame(HEVCContext *s) if (!frame->rpl_buf) goto fail; - frame->tab_mvf_buf = av_buffer_pool_get(s->tab_mvf_pool); - if (!frame->tab_mvf_buf) - goto fail; - frame->tab_mvf = (MvField *)frame->tab_mvf_buf->data; + if (s->tab_mvf_pool) { + frame->tab_mvf_buf = av_buffer_pool_get(s->tab_mvf_pool); + if (!frame->tab_mvf_buf) + goto fail; + frame->tab_mvf = (MvField *)frame->tab_mvf_buf->data; + } - frame->rpl_tab_buf = av_buffer_pool_get(s->rpl_tab_pool); - if (!frame->rpl_tab_buf) - goto fail; - frame->rpl_tab = (RefPicListTab **)frame->rpl_tab_buf->data; - frame->ctb_count = s->ps.sps->ctb_width * s->ps.sps->ctb_height; - for (j = 0; j < frame->ctb_count; j++) - frame->rpl_tab[j] = (RefPicListTab *)frame->rpl_buf->data; + if (s->rpl_tab_pool) { + frame->rpl_tab_buf = av_buffer_pool_get(s->rpl_tab_pool); + if (!frame->rpl_tab_buf) + goto fail; + frame->rpl_tab = (RefPicListTab **)frame->rpl_tab_buf->data; + frame->ctb_count = s->ps.sps->ctb_width * s->ps.sps->ctb_height; + for (j = 0; j < frame->ctb_count; j++) + frame->rpl_tab[j] = (RefPicListTab *)frame->rpl_buf->data; + } frame->frame->top_field_first = s->sei.picture_timing.picture_struct == AV_PICTURE_STRUCTURE_TOP_FIELD; frame->frame->interlaced_frame = (s->sei.picture_timing.picture_struct == AV_PICTURE_STRUCTURE_TOP_FIELD) || (s->sei.picture_timing.picture_struct == AV_PICTURE_STRUCTURE_BOTTOM_FIELD); @@ -297,14 +301,17 @@ static int init_slice_rpl(HEVCContext *s) int ctb_count = frame->ctb_count; int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_segment_addr]; int i; + RefPicListTab * const tab = (RefPicListTab *)frame->rpl_buf->data + s->slice_idx; if (s->slice_idx >= frame->rpl_buf->size / sizeof(RefPicListTab)) return AVERROR_INVALIDDATA; - for (i = ctb_addr_ts; i < ctb_count; i++) - frame->rpl_tab[i] = (RefPicListTab *)frame->rpl_buf->data + s->slice_idx; + if (frame->rpl_tab) { + for (i = ctb_addr_ts; i < ctb_count; i++) + frame->rpl_tab[i] = tab; + } - frame->refPicList = (RefPicList *)frame->rpl_tab[ctb_addr_ts]; + frame->refPicList = tab->refPicList; return 0; } diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c index 567e8d81d4..17f53322fb 100644 --- a/libavcodec/hevcdec.c +++ b/libavcodec/hevcdec.c @@ -347,6 +347,19 @@ static void export_stream_params(HEVCContext *s, const HEVCSPS *sps) else avctx->color_range = AVCOL_RANGE_MPEG; + if (sps->chroma_format_idc == 1) { + avctx->chroma_sample_location = sps->vui.common.chroma_loc_info_present_flag ? + sps->vui.common.chroma_sample_loc_type_top_field + 1 : + AVCHROMA_LOC_LEFT; + } + else if (sps->chroma_format_idc == 2 || + sps->chroma_format_idc == 3) { + avctx->chroma_sample_location = AVCHROMA_LOC_TOPLEFT;; + } + else { + avctx->chroma_sample_location = AVCHROMA_LOC_UNSPECIFIED; + } + if (sps->vui.common.colour_description_present_flag) { avctx->color_primaries = sps->vui.common.colour_primaries; avctx->color_trc = sps->vui.common.transfer_characteristics; @@ -403,6 +416,7 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps) #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + \ CONFIG_HEVC_D3D11VA_HWACCEL * 2 + \ CONFIG_HEVC_NVDEC_HWACCEL + \ + CONFIG_HEVC_V4L2REQUEST_HWACCEL + \ CONFIG_HEVC_VAAPI_HWACCEL + \ CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL + \ CONFIG_HEVC_VDPAU_HWACCEL) @@ -429,6 +443,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps) #endif #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX; +#endif +#if CONFIG_HEVC_V4L2REQUEST_HWACCEL + *fmt++ = AV_PIX_FMT_DRM_PRIME; #endif break; case AV_PIX_FMT_YUV420P10: @@ -450,6 +467,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps) #endif #if CONFIG_HEVC_NVDEC_HWACCEL *fmt++ = AV_PIX_FMT_CUDA; +#endif +#if CONFIG_HEVC_V4L2REQUEST_HWACCEL + *fmt++ = AV_PIX_FMT_DRM_PRIME; #endif break; case AV_PIX_FMT_YUV444P: @@ -516,6 +536,16 @@ static int set_sps(HEVCContext *s, const HEVCSPS *sps, if (!sps) return 0; + // If hwaccel then we don't need all the s/w decode helper arrays + if (s->avctx->hwaccel) { + export_stream_params(s, sps); + + s->avctx->pix_fmt = pix_fmt; + s->ps.sps = sps; + s->ps.vps = (HEVCVPS*) s->ps.vps_list[s->ps.sps->vps_id]->data; + return 0; + } + ret = pic_arrays_init(s, sps); if (ret < 0) goto fail; @@ -2870,11 +2900,13 @@ static int hevc_frame_start(HEVCContext *s) ((s->ps.sps->height >> s->ps.sps->log2_min_cb_size) + 1); int ret; - memset(s->horizontal_bs, 0, s->bs_width * s->bs_height); - memset(s->vertical_bs, 0, s->bs_width * s->bs_height); - memset(s->cbf_luma, 0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height); - memset(s->is_pcm, 0, (s->ps.sps->min_pu_width + 1) * (s->ps.sps->min_pu_height + 1)); - memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address)); + if (s->horizontal_bs) { + memset(s->horizontal_bs, 0, s->bs_width * s->bs_height); + memset(s->vertical_bs, 0, s->bs_width * s->bs_height); + memset(s->cbf_luma, 0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height); + memset(s->is_pcm, 0, (s->ps.sps->min_pu_width + 1) * (s->ps.sps->min_pu_height + 1)); + memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address)); + } s->is_decoded = 0; s->first_nal_type = s->nal_unit_type; @@ -3362,8 +3394,13 @@ static int hevc_decode_frame(AVCodecContext *avctx, AVFrame *rframe, s->ref = NULL; ret = decode_nal_units(s, avpkt->data, avpkt->size); - if (ret < 0) + if (ret < 0) { + // Ensure that hwaccel knows this frame is over + if (s->avctx->hwaccel && s->avctx->hwaccel->abort_frame) + s->avctx->hwaccel->abort_frame(s->avctx); + return ret; + } if (avctx->hwaccel) { if (s->ref && (ret = avctx->hwaccel->end_frame(avctx)) < 0) { @@ -3413,15 +3450,19 @@ static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src) dst->needs_fg = 1; } - dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf); - if (!dst->tab_mvf_buf) - goto fail; - dst->tab_mvf = src->tab_mvf; + if (src->tab_mvf_buf) { + dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf); + if (!dst->tab_mvf_buf) + goto fail; + dst->tab_mvf = src->tab_mvf; + } - dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf); - if (!dst->rpl_tab_buf) - goto fail; - dst->rpl_tab = src->rpl_tab; + if (src->rpl_tab_buf) { + dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf); + if (!dst->rpl_tab_buf) + goto fail; + dst->rpl_tab = src->rpl_tab; + } dst->rpl_buf = av_buffer_ref(src->rpl_buf); if (!dst->rpl_buf) @@ -3731,6 +3772,9 @@ const FFCodec ff_hevc_decoder = { #if CONFIG_HEVC_NVDEC_HWACCEL HWACCEL_NVDEC(hevc), #endif +#if CONFIG_HEVC_V4L2REQUEST_HWACCEL + HWACCEL_V4L2REQUEST(hevc), +#endif #if CONFIG_HEVC_VAAPI_HWACCEL HWACCEL_VAAPI(hevc), #endif diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h index aca55831f3..f32d1c4ec4 100644 --- a/libavcodec/hwaccels.h +++ b/libavcodec/hwaccels.h @@ -40,6 +40,7 @@ extern const AVHWAccel ff_hevc_d3d11va_hwaccel; extern const AVHWAccel ff_hevc_d3d11va2_hwaccel; extern const AVHWAccel ff_hevc_dxva2_hwaccel; extern const AVHWAccel ff_hevc_nvdec_hwaccel; +extern const AVHWAccel ff_hevc_v4l2request_hwaccel; extern const AVHWAccel ff_hevc_vaapi_hwaccel; extern const AVHWAccel ff_hevc_vdpau_hwaccel; extern const AVHWAccel ff_hevc_videotoolbox_hwaccel; diff --git a/libavcodec/hwconfig.h b/libavcodec/hwconfig.h index 721424912c..b8aa383071 100644 --- a/libavcodec/hwconfig.h +++ b/libavcodec/hwconfig.h @@ -24,6 +24,7 @@ #define HWACCEL_CAP_ASYNC_SAFE (1 << 0) +#define HWACCEL_CAP_MT_SAFE (1 << 1) typedef struct AVCodecHWConfigInternal { @@ -70,6 +71,8 @@ typedef struct AVCodecHWConfigInternal { HW_CONFIG_HWACCEL(1, 1, 0, D3D11, D3D11VA, ff_ ## codec ## _d3d11va2_hwaccel) #define HWACCEL_NVDEC(codec) \ HW_CONFIG_HWACCEL(1, 1, 0, CUDA, CUDA, ff_ ## codec ## _nvdec_hwaccel) +#define HWACCEL_V4L2REQUEST(codec) \ + HW_CONFIG_HWACCEL(1, 0, 0, DRM_PRIME, DRM, ff_ ## codec ## _v4l2request_hwaccel) #define HWACCEL_VAAPI(codec) \ HW_CONFIG_HWACCEL(1, 1, 1, VAAPI, VAAPI, ff_ ## codec ## _vaapi_hwaccel) #define HWACCEL_VDPAU(codec) \ diff --git a/libavcodec/mmaldec.c b/libavcodec/mmaldec.c index 3092f58510..6f41b41ac4 100644 --- a/libavcodec/mmaldec.c +++ b/libavcodec/mmaldec.c @@ -24,6 +24,9 @@ * MMAL Video Decoder */ +#pragma GCC diagnostic push +// Many many redundant decls in the header files +#pragma GCC diagnostic ignored "-Wredundant-decls" #include #include #include @@ -31,6 +34,7 @@ #include #include #include +#pragma GCC diagnostic pop #include #include "avcodec.h" diff --git a/libavcodec/pthread_frame.c b/libavcodec/pthread_frame.c index d9d5afaa82..b14f8e9360 100644 --- a/libavcodec/pthread_frame.c +++ b/libavcodec/pthread_frame.c @@ -204,7 +204,8 @@ static attribute_align_arg void *frame_worker_thread(void *arg) /* if the previous thread uses hwaccel then we take the lock to ensure * the threads don't run concurrently */ - if (avctx->hwaccel) { + if (avctx->hwaccel && + !(avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE)) { pthread_mutex_lock(&p->parent->hwaccel_mutex); p->hwaccel_serializing = 1; } @@ -230,7 +231,7 @@ static attribute_align_arg void *frame_worker_thread(void *arg) p->hwaccel_serializing = 0; pthread_mutex_unlock(&p->parent->hwaccel_mutex); } - av_assert0(!avctx->hwaccel); + av_assert0(!avctx->hwaccel || (avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE)); if (p->async_serializing) { p->async_serializing = 0; @@ -318,6 +319,12 @@ FF_ENABLE_DEPRECATION_WARNINGS } dst->hwaccel_flags = src->hwaccel_flags; + if (src->hwaccel && + (src->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE)) { + dst->hwaccel = src->hwaccel; + dst->hwaccel_context = src->hwaccel_context; + dst->internal->hwaccel_priv_data = src->internal->hwaccel_priv_data; + } err = av_buffer_replace(&dst->internal->pool, src->internal->pool); if (err < 0) @@ -433,10 +440,13 @@ static int submit_packet(PerThreadContext *p, AVCodecContext *user_avctx, } /* transfer the stashed hwaccel state, if any */ - av_assert0(!p->avctx->hwaccel); - FFSWAP(const AVHWAccel*, p->avctx->hwaccel, fctx->stash_hwaccel); - FFSWAP(void*, p->avctx->hwaccel_context, fctx->stash_hwaccel_context); - FFSWAP(void*, p->avctx->internal->hwaccel_priv_data, fctx->stash_hwaccel_priv); + av_assert0(!p->avctx->hwaccel || (p->avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE)); + if (p->avctx->hwaccel && + !(p->avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE)) { + FFSWAP(const AVHWAccel*, p->avctx->hwaccel, fctx->stash_hwaccel); + FFSWAP(void*, p->avctx->hwaccel_context, fctx->stash_hwaccel_context); + FFSWAP(void*, p->avctx->internal->hwaccel_priv_data, fctx->stash_hwaccel_priv); + } av_packet_unref(p->avpkt); ret = av_packet_ref(p->avpkt, avpkt); @@ -590,7 +600,9 @@ void ff_thread_finish_setup(AVCodecContext *avctx) { if (!(avctx->active_thread_type&FF_THREAD_FRAME)) return; - if (avctx->hwaccel && !p->hwaccel_serializing) { + if (avctx->hwaccel && + !(avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE) && + !p->hwaccel_serializing) { pthread_mutex_lock(&p->parent->hwaccel_mutex); p->hwaccel_serializing = 1; } @@ -607,9 +619,12 @@ void ff_thread_finish_setup(AVCodecContext *avctx) { * this is done here so that this worker thread can wipe its own hwaccel * state after decoding, without requiring synchronization */ av_assert0(!p->parent->stash_hwaccel); - p->parent->stash_hwaccel = avctx->hwaccel; - p->parent->stash_hwaccel_context = avctx->hwaccel_context; - p->parent->stash_hwaccel_priv = avctx->internal->hwaccel_priv_data; + if (avctx->hwaccel && + !(avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE)) { + p->parent->stash_hwaccel = avctx->hwaccel; + p->parent->stash_hwaccel_context = avctx->hwaccel_context; + p->parent->stash_hwaccel_priv = avctx->internal->hwaccel_priv_data; + } pthread_mutex_lock(&p->progress_mutex); if(atomic_load(&p->state) == STATE_SETUP_FINISHED){ @@ -664,6 +679,15 @@ void ff_frame_thread_free(AVCodecContext *avctx, int thread_count) park_frame_worker_threads(fctx, thread_count); + if (fctx->prev_thread && + avctx->hwaccel && (avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE) && + avctx->internal->hwaccel_priv_data != + fctx->prev_thread->avctx->internal->hwaccel_priv_data) { + if (update_context_from_thread(avctx, fctx->prev_thread->avctx, 1) < 0) { + av_log(avctx, AV_LOG_ERROR, "Failed to update user thread.\n"); + } + } + for (i = 0; i < thread_count; i++) { PerThreadContext *p = &fctx->threads[i]; AVCodecContext *ctx = p->avctx; @@ -707,10 +731,13 @@ void ff_frame_thread_free(AVCodecContext *avctx, int thread_count) /* if we have stashed hwaccel state, move it to the user-facing context, * so it will be freed in avcodec_close() */ - av_assert0(!avctx->hwaccel); - FFSWAP(const AVHWAccel*, avctx->hwaccel, fctx->stash_hwaccel); - FFSWAP(void*, avctx->hwaccel_context, fctx->stash_hwaccel_context); - FFSWAP(void*, avctx->internal->hwaccel_priv_data, fctx->stash_hwaccel_priv); + av_assert0(!avctx->hwaccel || (avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE)); + if (avctx->hwaccel && + !(avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE)) { + FFSWAP(const AVHWAccel*, avctx->hwaccel, fctx->stash_hwaccel); + FFSWAP(void*, avctx->hwaccel_context, fctx->stash_hwaccel_context); + FFSWAP(void*, avctx->internal->hwaccel_priv_data, fctx->stash_hwaccel_priv); + } av_freep(&avctx->internal->thread_ctx); } diff --git a/libavcodec/raw.c b/libavcodec/raw.c index 1e5b48d1e0..1e689f9ee0 100644 --- a/libavcodec/raw.c +++ b/libavcodec/raw.c @@ -295,6 +295,12 @@ static const PixelFormatTag raw_pix_fmt_tags[] = { { AV_PIX_FMT_RGB565LE,MKTAG( 3 , 0 , 0 , 0 ) }, /* flipped RGB565LE */ { AV_PIX_FMT_YUV444P, MKTAG('Y', 'V', '2', '4') }, /* YUV444P, swapped UV */ + /* RPI (Might as well define for everything) */ + { AV_PIX_FMT_SAND128, MKTAG('S', 'A', 'N', 'D') }, + { AV_PIX_FMT_RPI4_8, MKTAG('S', 'A', 'N', 'D') }, + { AV_PIX_FMT_SAND64_10, MKTAG('S', 'N', 'D', 'A') }, + { AV_PIX_FMT_RPI4_10, MKTAG('S', 'N', 'D', 'B') }, + { AV_PIX_FMT_NONE, 0 }, }; diff --git a/libavcodec/rawenc.c b/libavcodec/rawenc.c index 8c577006d9..8ca0379e12 100644 --- a/libavcodec/rawenc.c +++ b/libavcodec/rawenc.c @@ -24,6 +24,7 @@ * Raw Video Encoder */ +#include "config.h" #include "avcodec.h" #include "codec_internal.h" #include "encode.h" @@ -33,6 +34,10 @@ #include "libavutil/intreadwrite.h" #include "libavutil/imgutils.h" #include "libavutil/internal.h" +#include "libavutil/avassert.h" +#if CONFIG_SAND +#include "libavutil/rpi_sand_fns.h" +#endif static av_cold int raw_encode_init(AVCodecContext *avctx) { @@ -46,22 +51,114 @@ static av_cold int raw_encode_init(AVCodecContext *avctx) return 0; } +#if CONFIG_SAND +static int raw_sand8_as_yuv420(AVCodecContext *avctx, AVPacket *pkt, + const AVFrame *frame) +{ + const int width = av_frame_cropped_width(frame); + const int height = av_frame_cropped_height(frame); + const int x0 = frame->crop_left; + const int y0 = frame->crop_top; + const int size = width * height * 3 / 2; + uint8_t * dst; + int ret; + + if ((ret = ff_get_encode_buffer(avctx, pkt, size, 0)) < 0) + return ret; + + dst = pkt->data; + + av_rpi_sand_to_planar_y8(dst, width, frame->data[0], frame->linesize[0], frame->linesize[3], x0, y0, width, height); + dst += width * height; + av_rpi_sand_to_planar_c8(dst, width / 2, dst + width * height / 4, width / 2, + frame->data[1], frame->linesize[1], av_rpi_sand_frame_stride2(frame), x0 / 2, y0 / 2, width / 2, height / 2); + return 0; +} + +static int raw_sand16_as_yuv420(AVCodecContext *avctx, AVPacket *pkt, + const AVFrame *frame) +{ + const int width = av_frame_cropped_width(frame); + const int height = av_frame_cropped_height(frame); + const int x0 = frame->crop_left; + const int y0 = frame->crop_top; + const int size = width * height * 3; + uint8_t * dst; + int ret; + + if ((ret = ff_get_encode_buffer(avctx, pkt, size, 0)) < 0) + return ret; + + dst = pkt->data; + + av_rpi_sand_to_planar_y16(dst, width * 2, frame->data[0], frame->linesize[0], frame->linesize[3], x0 * 2, y0, width * 2, height); + dst += width * height * 2; + av_rpi_sand_to_planar_c16(dst, width, dst + width * height / 2, width, + frame->data[1], frame->linesize[1], av_rpi_sand_frame_stride2(frame), x0, y0 / 2, width, height / 2); + return 0; +} + +static int raw_sand30_as_yuv420(AVCodecContext *avctx, AVPacket *pkt, + const AVFrame *frame) +{ + const int width = av_frame_cropped_width(frame); + const int height = av_frame_cropped_height(frame); + const int x0 = frame->crop_left; + const int y0 = frame->crop_top; + const int size = width * height * 3; + uint8_t * dst; + int ret; + + if ((ret = ff_get_encode_buffer(avctx, pkt, size, 0)) < 0) + return ret; + + dst = pkt->data; + + av_rpi_sand30_to_planar_y16(dst, width * 2, frame->data[0], frame->linesize[0], frame->linesize[3], x0, y0, width, height); + dst += width * height * 2; + av_rpi_sand30_to_planar_c16(dst, width, dst + width * height / 2, width, + frame->data[1], frame->linesize[1], av_rpi_sand_frame_stride2(frame), x0/2, y0 / 2, width/2, height / 2); + return 0; +} +#endif + + static int raw_encode(AVCodecContext *avctx, AVPacket *pkt, - const AVFrame *frame, int *got_packet) + const AVFrame *src_frame, int *got_packet) { - int ret = av_image_get_buffer_size(frame->format, - frame->width, frame->height, 1); + int ret; + AVFrame * frame = NULL; - if (ret < 0) +#if CONFIG_SAND + if (av_rpi_is_sand_frame(src_frame)) { + ret = av_rpi_is_sand8_frame(src_frame) ? raw_sand8_as_yuv420(avctx, pkt, src_frame) : + av_rpi_is_sand16_frame(src_frame) ? raw_sand16_as_yuv420(avctx, pkt, src_frame) : + av_rpi_is_sand30_frame(src_frame) ? raw_sand30_as_yuv420(avctx, pkt, src_frame) : -1; + *got_packet = (ret == 0); return ret; + } +#endif + + if ((frame = av_frame_clone(src_frame)) == NULL) { + ret = AVERROR(ENOMEM); + goto fail; + } + + if ((ret = av_frame_apply_cropping(frame, AV_FRAME_CROP_UNALIGNED)) < 0) + goto fail; + + ret = av_image_get_buffer_size(frame->format, + frame->width, frame->height, 1); + if (ret < 0) + goto fail; if ((ret = ff_get_encode_buffer(avctx, pkt, ret, 0)) < 0) - return ret; + goto fail; if ((ret = av_image_copy_to_buffer(pkt->data, pkt->size, (const uint8_t **)frame->data, frame->linesize, frame->format, frame->width, frame->height, 1)) < 0) - return ret; + goto fail; if(avctx->codec_tag == AV_RL32("yuv2") && ret > 0 && frame->format == AV_PIX_FMT_YUYV422) { @@ -77,8 +174,15 @@ static int raw_encode(AVCodecContext *avctx, AVPacket *pkt, AV_WB64(&pkt->data[8 * x], v << 48 | v >> 16); } } + pkt->flags |= AV_PKT_FLAG_KEY; + av_frame_free(&frame); *got_packet = 1; return 0; + +fail: + av_frame_free(&frame); + *got_packet = 0; + return ret; } const FFCodec ff_rawvideo_encoder = { diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c index 3f5471067a..8d80d19788 100644 --- a/libavcodec/v4l2_buffers.c +++ b/libavcodec/v4l2_buffers.c @@ -21,6 +21,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#include #include #include #include @@ -28,57 +29,89 @@ #include #include #include "libavcodec/avcodec.h" +#include "libavcodec/internal.h" +#include "libavutil/avassert.h" #include "libavutil/pixdesc.h" +#include "libavutil/hwcontext.h" #include "v4l2_context.h" #include "v4l2_buffers.h" #include "v4l2_m2m.h" +#include "v4l2_req_dmabufs.h" +#include "weak_link.h" #define USEC_PER_SEC 1000000 -static AVRational v4l2_timebase = { 1, USEC_PER_SEC }; +static const AVRational v4l2_timebase = { 1, USEC_PER_SEC }; -static inline V4L2m2mContext *buf_to_m2mctx(V4L2Buffer *buf) +static inline V4L2m2mContext *ctx_to_m2mctx(const V4L2Context *ctx) { - return V4L2_TYPE_IS_OUTPUT(buf->context->type) ? - container_of(buf->context, V4L2m2mContext, output) : - container_of(buf->context, V4L2m2mContext, capture); + return V4L2_TYPE_IS_OUTPUT(ctx->type) ? + container_of(ctx, V4L2m2mContext, output) : + container_of(ctx, V4L2m2mContext, capture); } -static inline AVCodecContext *logger(V4L2Buffer *buf) +static inline V4L2m2mContext *buf_to_m2mctx(const V4L2Buffer * const buf) { - return buf_to_m2mctx(buf)->avctx; + return ctx_to_m2mctx(buf->context); } -static inline AVRational v4l2_get_timebase(V4L2Buffer *avbuf) +static inline AVCodecContext *logger(const V4L2Buffer * const buf) { - V4L2m2mContext *s = buf_to_m2mctx(avbuf); + return buf_to_m2mctx(buf)->avctx; +} - if (s->avctx->pkt_timebase.num) - return s->avctx->pkt_timebase; - return s->avctx->time_base; +static inline AVRational v4l2_get_timebase(const V4L2Buffer * const avbuf) +{ + const V4L2m2mContext *s = buf_to_m2mctx(avbuf); + const AVRational tb = s->avctx->pkt_timebase.num ? + s->avctx->pkt_timebase : + s->avctx->time_base; + return tb.num && tb.den ? tb : v4l2_timebase; } -static inline void v4l2_set_pts(V4L2Buffer *out, int64_t pts) +static inline struct timeval tv_from_int(const int64_t t) { - int64_t v4l2_pts; + return (struct timeval){ + .tv_usec = t % USEC_PER_SEC, + .tv_sec = t / USEC_PER_SEC + }; +} - if (pts == AV_NOPTS_VALUE) - pts = 0; +static inline int64_t int_from_tv(const struct timeval t) +{ + return (int64_t)t.tv_sec * USEC_PER_SEC + t.tv_usec; +} +static inline void v4l2_set_pts(V4L2Buffer * const out, const int64_t pts) +{ /* convert pts to v4l2 timebase */ - v4l2_pts = av_rescale_q(pts, v4l2_get_timebase(out), v4l2_timebase); - out->buf.timestamp.tv_usec = v4l2_pts % USEC_PER_SEC; - out->buf.timestamp.tv_sec = v4l2_pts / USEC_PER_SEC; + const int64_t v4l2_pts = + pts == AV_NOPTS_VALUE ? 0 : + av_rescale_q(pts, v4l2_get_timebase(out), v4l2_timebase); + out->buf.timestamp = tv_from_int(v4l2_pts); } -static inline int64_t v4l2_get_pts(V4L2Buffer *avbuf) +static inline int64_t v4l2_get_pts(const V4L2Buffer * const avbuf) { - int64_t v4l2_pts; - + const int64_t v4l2_pts = int_from_tv(avbuf->buf.timestamp); + return v4l2_pts != 0 ? v4l2_pts : AV_NOPTS_VALUE; +#if 0 /* convert pts back to encoder timebase */ - v4l2_pts = (int64_t)avbuf->buf.timestamp.tv_sec * USEC_PER_SEC + - avbuf->buf.timestamp.tv_usec; + return + avbuf->context->no_pts_rescale ? v4l2_pts : + v4l2_pts == 0 ? AV_NOPTS_VALUE : + av_rescale_q(v4l2_pts, v4l2_timebase, v4l2_get_timebase(avbuf)); +#endif +} - return av_rescale_q(v4l2_pts, v4l2_timebase, v4l2_get_timebase(avbuf)); +static void set_buf_length(V4L2Buffer *out, unsigned int plane, uint32_t bytesused, uint32_t length) +{ + if (V4L2_TYPE_IS_MULTIPLANAR(out->buf.type)) { + out->planes[plane].bytesused = bytesused; + out->planes[plane].length = length; + } else { + out->buf.bytesused = bytesused; + out->buf.length = length; + } } static enum AVColorPrimaries v4l2_get_color_primaries(V4L2Buffer *buf) @@ -115,6 +148,105 @@ static enum AVColorPrimaries v4l2_get_color_primaries(V4L2Buffer *buf) return AVCOL_PRI_UNSPECIFIED; } +static void v4l2_set_color(V4L2Buffer *buf, + const enum AVColorPrimaries avcp, + const enum AVColorSpace avcs, + const enum AVColorTransferCharacteristic avxc) +{ + enum v4l2_ycbcr_encoding ycbcr = V4L2_YCBCR_ENC_DEFAULT; + enum v4l2_colorspace cs = V4L2_COLORSPACE_DEFAULT; + enum v4l2_xfer_func xfer = V4L2_XFER_FUNC_DEFAULT; + + switch (avcp) { + case AVCOL_PRI_BT709: + cs = V4L2_COLORSPACE_REC709; + ycbcr = V4L2_YCBCR_ENC_709; + break; + case AVCOL_PRI_BT470M: + cs = V4L2_COLORSPACE_470_SYSTEM_M; + ycbcr = V4L2_YCBCR_ENC_601; + break; + case AVCOL_PRI_BT470BG: + cs = V4L2_COLORSPACE_470_SYSTEM_BG; + break; + case AVCOL_PRI_SMPTE170M: + cs = V4L2_COLORSPACE_SMPTE170M; + break; + case AVCOL_PRI_SMPTE240M: + cs = V4L2_COLORSPACE_SMPTE240M; + break; + case AVCOL_PRI_BT2020: + cs = V4L2_COLORSPACE_BT2020; + break; + case AVCOL_PRI_SMPTE428: + case AVCOL_PRI_SMPTE431: + case AVCOL_PRI_SMPTE432: + case AVCOL_PRI_EBU3213: + case AVCOL_PRI_RESERVED: + case AVCOL_PRI_FILM: + case AVCOL_PRI_UNSPECIFIED: + default: + break; + } + + switch (avcs) { + case AVCOL_SPC_RGB: + cs = V4L2_COLORSPACE_SRGB; + break; + case AVCOL_SPC_BT709: + cs = V4L2_COLORSPACE_REC709; + break; + case AVCOL_SPC_FCC: + cs = V4L2_COLORSPACE_470_SYSTEM_M; + break; + case AVCOL_SPC_BT470BG: + cs = V4L2_COLORSPACE_470_SYSTEM_BG; + break; + case AVCOL_SPC_SMPTE170M: + cs = V4L2_COLORSPACE_SMPTE170M; + break; + case AVCOL_SPC_SMPTE240M: + cs = V4L2_COLORSPACE_SMPTE240M; + break; + case AVCOL_SPC_BT2020_CL: + cs = V4L2_COLORSPACE_BT2020; + ycbcr = V4L2_YCBCR_ENC_BT2020_CONST_LUM; + break; + case AVCOL_SPC_BT2020_NCL: + cs = V4L2_COLORSPACE_BT2020; + break; + default: + break; + } + + switch (xfer) { + case AVCOL_TRC_BT709: + xfer = V4L2_XFER_FUNC_709; + break; + case AVCOL_TRC_IEC61966_2_1: + xfer = V4L2_XFER_FUNC_SRGB; + break; + case AVCOL_TRC_SMPTE240M: + xfer = V4L2_XFER_FUNC_SMPTE240M; + break; + case AVCOL_TRC_SMPTE2084: + xfer = V4L2_XFER_FUNC_SMPTE2084; + break; + default: + break; + } + + if (V4L2_TYPE_IS_MULTIPLANAR(buf->buf.type)) { + buf->context->format.fmt.pix_mp.colorspace = cs; + buf->context->format.fmt.pix_mp.ycbcr_enc = ycbcr; + buf->context->format.fmt.pix_mp.xfer_func = xfer; + } else { + buf->context->format.fmt.pix.colorspace = cs; + buf->context->format.fmt.pix.ycbcr_enc = ycbcr; + buf->context->format.fmt.pix.xfer_func = xfer; + } +} + static enum AVColorRange v4l2_get_color_range(V4L2Buffer *buf) { enum v4l2_quantization qt; @@ -133,6 +265,20 @@ static enum AVColorRange v4l2_get_color_range(V4L2Buffer *buf) return AVCOL_RANGE_UNSPECIFIED; } +static void v4l2_set_color_range(V4L2Buffer *buf, const enum AVColorRange avcr) +{ + const enum v4l2_quantization q = + avcr == AVCOL_RANGE_MPEG ? V4L2_QUANTIZATION_LIM_RANGE : + avcr == AVCOL_RANGE_JPEG ? V4L2_QUANTIZATION_FULL_RANGE : + V4L2_QUANTIZATION_DEFAULT; + + if (V4L2_TYPE_IS_MULTIPLANAR(buf->buf.type)) { + buf->context->format.fmt.pix_mp.quantization = q; + } else { + buf->context->format.fmt.pix.quantization = q; + } +} + static enum AVColorSpace v4l2_get_color_space(V4L2Buffer *buf) { enum v4l2_ycbcr_encoding ycbcr; @@ -209,73 +355,178 @@ static enum AVColorTransferCharacteristic v4l2_get_color_trc(V4L2Buffer *buf) return AVCOL_TRC_UNSPECIFIED; } -static void v4l2_free_buffer(void *opaque, uint8_t *unused) +static int v4l2_buf_is_interlaced(const V4L2Buffer * const buf) { - V4L2Buffer* avbuf = opaque; - V4L2m2mContext *s = buf_to_m2mctx(avbuf); + return V4L2_FIELD_IS_INTERLACED(buf->buf.field); +} - if (atomic_fetch_sub(&avbuf->context_refcount, 1) == 1) { - atomic_fetch_sub_explicit(&s->refcount, 1, memory_order_acq_rel); +static int v4l2_buf_is_top_first(const V4L2Buffer * const buf) +{ + return buf->buf.field == V4L2_FIELD_INTERLACED_TB; +} - if (s->reinit) { - if (!atomic_load(&s->refcount)) - sem_post(&s->refsync); - } else { - if (s->draining && V4L2_TYPE_IS_OUTPUT(avbuf->context->type)) { - /* no need to queue more buffers to the driver */ - avbuf->status = V4L2BUF_AVAILABLE; - } - else if (avbuf->context->streamon) - ff_v4l2_buffer_enqueue(avbuf); - } +static void v4l2_set_interlace(V4L2Buffer * const buf, const int is_interlaced, const int is_tff) +{ + buf->buf.field = !is_interlaced ? V4L2_FIELD_NONE : + is_tff ? V4L2_FIELD_INTERLACED_TB : V4L2_FIELD_INTERLACED_BT; +} - av_buffer_unref(&avbuf->context_ref); +static uint8_t * v4l2_get_drm_frame(V4L2Buffer *avbuf) +{ + AVDRMFrameDescriptor *drm_desc = &avbuf->drm_frame; + AVDRMLayerDescriptor *layer; + + /* fill the DRM frame descriptor */ + drm_desc->nb_objects = avbuf->num_planes; + drm_desc->nb_layers = 1; + + layer = &drm_desc->layers[0]; + layer->nb_planes = avbuf->num_planes; + + for (int i = 0; i < avbuf->num_planes; i++) { + layer->planes[i].object_index = i; + layer->planes[i].offset = avbuf->plane_info[i].offset; + layer->planes[i].pitch = avbuf->plane_info[i].bytesperline; } + + switch (avbuf->context->av_pix_fmt) { + case AV_PIX_FMT_YUYV422: + + layer->format = DRM_FORMAT_YUYV; + layer->nb_planes = 1; + + break; + + case AV_PIX_FMT_NV12: + case AV_PIX_FMT_NV21: + + layer->format = avbuf->context->av_pix_fmt == AV_PIX_FMT_NV12 ? + DRM_FORMAT_NV12 : DRM_FORMAT_NV21; + + if (avbuf->num_planes > 1) + break; + + layer->nb_planes = 2; + + layer->planes[1].object_index = 0; + layer->planes[1].offset = avbuf->plane_info[0].bytesperline * + avbuf->context->format.fmt.pix.height; + layer->planes[1].pitch = avbuf->plane_info[0].bytesperline; + break; + + case AV_PIX_FMT_YUV420P: + + layer->format = DRM_FORMAT_YUV420; + + if (avbuf->num_planes > 1) + break; + + layer->nb_planes = 3; + + layer->planes[1].object_index = 0; + layer->planes[1].offset = avbuf->plane_info[0].bytesperline * + avbuf->context->format.fmt.pix.height; + layer->planes[1].pitch = avbuf->plane_info[0].bytesperline >> 1; + + layer->planes[2].object_index = 0; + layer->planes[2].offset = layer->planes[1].offset + + ((avbuf->plane_info[0].bytesperline * + avbuf->context->format.fmt.pix.height) >> 2); + layer->planes[2].pitch = avbuf->plane_info[0].bytesperline >> 1; + break; + + default: + drm_desc->nb_layers = 0; + break; + } + + return (uint8_t *) drm_desc; } -static int v4l2_buf_increase_ref(V4L2Buffer *in) +static void v4l2_free_bufref(void *opaque, uint8_t *data) { - V4L2m2mContext *s = buf_to_m2mctx(in); + AVBufferRef * bufref = (AVBufferRef *)data; + V4L2Buffer *avbuf = (V4L2Buffer *)bufref->data; + struct V4L2Context *ctx = ff_weak_link_lock(&avbuf->context_wl); - if (in->context_ref) - atomic_fetch_add(&in->context_refcount, 1); - else { - in->context_ref = av_buffer_ref(s->self_ref); - if (!in->context_ref) - return AVERROR(ENOMEM); + if (ctx != NULL) { + // Buffer still attached to context + V4L2m2mContext *s = buf_to_m2mctx(avbuf); - in->context_refcount = 1; - } + ff_mutex_lock(&ctx->lock); - in->status = V4L2BUF_RET_USER; - atomic_fetch_add_explicit(&s->refcount, 1, memory_order_relaxed); + ff_v4l2_buffer_set_avail(avbuf); - return 0; + if (s->draining && V4L2_TYPE_IS_OUTPUT(ctx->type)) { + av_log(logger(avbuf), AV_LOG_DEBUG, "%s: Buffer avail\n", ctx->name); + /* no need to queue more buffers to the driver */ + } + else if (ctx->streamon) { + av_log(logger(avbuf), AV_LOG_DEBUG, "%s: Buffer requeue\n", ctx->name); + avbuf->buf.timestamp.tv_sec = 0; + avbuf->buf.timestamp.tv_usec = 0; + ff_v4l2_buffer_enqueue(avbuf); // will set to IN_DRIVER + } + else { + av_log(logger(avbuf), AV_LOG_DEBUG, "%s: Buffer freed but streamoff\n", ctx->name); + } + + ff_mutex_unlock(&ctx->lock); + } + + ff_weak_link_unlock(avbuf->context_wl); + av_buffer_unref(&bufref); } -static int v4l2_buf_to_bufref(V4L2Buffer *in, int plane, AVBufferRef **buf) +static inline uint32_t ff_v4l2_buf_len(const struct v4l2_buffer * b, unsigned int i) { - int ret; + return V4L2_TYPE_IS_MULTIPLANAR(b->type) ? b->m.planes[i].length : b->length; +} - if (plane >= in->num_planes) - return AVERROR(EINVAL); +static int v4l2_buffer_export_drm(V4L2Buffer* avbuf) +{ + int i, ret; + const V4L2m2mContext * const s = buf_to_m2mctx(avbuf); - /* even though most encoders return 0 in data_offset encoding vp8 does require this value */ - *buf = av_buffer_create((char *)in->plane_info[plane].mm_addr + in->planes[plane].data_offset, - in->plane_info[plane].length, v4l2_free_buffer, in, 0); - if (!*buf) - return AVERROR(ENOMEM); + for (i = 0; i < avbuf->num_planes; i++) { + int dma_fd = -1; + const uint32_t blen = ff_v4l2_buf_len(&avbuf->buf, i); + + if (s->db_ctl != NULL) { + if ((avbuf->dmabuf[i] = dmabuf_alloc(s->db_ctl, blen)) == NULL) + return AVERROR(ENOMEM); + dma_fd = dmabuf_fd(avbuf->dmabuf[i]); + if (V4L2_TYPE_IS_MULTIPLANAR(avbuf->buf.type)) + avbuf->buf.m.planes[i].m.fd = dma_fd; + else + avbuf->buf.m.fd = dma_fd; + } + else { + struct v4l2_exportbuffer expbuf; + memset(&expbuf, 0, sizeof(expbuf)); + + expbuf.index = avbuf->buf.index; + expbuf.type = avbuf->buf.type; + expbuf.plane = i; + + ret = ioctl(s->fd, VIDIOC_EXPBUF, &expbuf); + if (ret < 0) + return AVERROR(errno); + dma_fd = expbuf.fd; + } - ret = v4l2_buf_increase_ref(in); - if (ret) - av_buffer_unref(buf); + avbuf->drm_frame.objects[i].size = blen; + avbuf->drm_frame.objects[i].fd = dma_fd; + avbuf->drm_frame.objects[i].format_modifier = DRM_FORMAT_MOD_LINEAR; + } - return ret; + return 0; } static int v4l2_bufref_to_buf(V4L2Buffer *out, int plane, const uint8_t* data, int size, int offset) { unsigned int bytesused, length; + int rv = 0; if (plane >= out->num_planes) return AVERROR(EINVAL); @@ -283,32 +534,57 @@ static int v4l2_bufref_to_buf(V4L2Buffer *out, int plane, const uint8_t* data, i length = out->plane_info[plane].length; bytesused = FFMIN(size+offset, length); - memcpy((uint8_t*)out->plane_info[plane].mm_addr+offset, data, FFMIN(size, length-offset)); - - if (V4L2_TYPE_IS_MULTIPLANAR(out->buf.type)) { - out->planes[plane].bytesused = bytesused; - out->planes[plane].length = length; - } else { - out->buf.bytesused = bytesused; - out->buf.length = length; + if (size > length - offset) { + size = length - offset; + rv = AVERROR(ENOMEM); } - return 0; + memcpy((uint8_t*)out->plane_info[plane].mm_addr+offset, data, size); + + set_buf_length(out, plane, bytesused, length); + + return rv; +} + +static AVBufferRef * wrap_avbuf(V4L2Buffer * const avbuf) +{ + AVBufferRef * bufref = av_buffer_ref(avbuf->context->bufrefs[avbuf->buf.index]); + AVBufferRef * newbuf; + + if (!bufref) + return NULL; + + newbuf = av_buffer_create((uint8_t *)bufref, sizeof(*bufref), v4l2_free_bufref, NULL, 0); + if (newbuf == NULL) + av_buffer_unref(&bufref); + + avbuf->status = V4L2BUF_RET_USER; + return newbuf; } static int v4l2_buffer_buf_to_swframe(AVFrame *frame, V4L2Buffer *avbuf) { - int i, ret; + int i; frame->format = avbuf->context->av_pix_fmt; - for (i = 0; i < avbuf->num_planes; i++) { - ret = v4l2_buf_to_bufref(avbuf, i, &frame->buf[i]); - if (ret) - return ret; + frame->buf[0] = wrap_avbuf(avbuf); + if (frame->buf[0] == NULL) + return AVERROR(ENOMEM); + if (buf_to_m2mctx(avbuf)->output_drm) { + /* 1. get references to the actual data */ + frame->data[0] = (uint8_t *) v4l2_get_drm_frame(avbuf); + frame->format = AV_PIX_FMT_DRM_PRIME; + frame->hw_frames_ctx = av_buffer_ref(avbuf->context->frames_ref); + return 0; + } + + + /* 1. get references to the actual data */ + for (i = 0; i < avbuf->num_planes; i++) { + frame->data[i] = (uint8_t *)avbuf->plane_info[i].mm_addr + avbuf->planes[i].data_offset; frame->linesize[i] = avbuf->plane_info[i].bytesperline; - frame->data[i] = frame->buf[i]->data; } /* fixup special cases */ @@ -317,17 +593,17 @@ static int v4l2_buffer_buf_to_swframe(AVFrame *frame, V4L2Buffer *avbuf) case AV_PIX_FMT_NV21: if (avbuf->num_planes > 1) break; - frame->linesize[1] = avbuf->plane_info[0].bytesperline; - frame->data[1] = frame->buf[0]->data + avbuf->plane_info[0].bytesperline * avbuf->context->format.fmt.pix_mp.height; + frame->linesize[1] = frame->linesize[0]; + frame->data[1] = frame->data[0] + frame->linesize[0] * ff_v4l2_get_format_height(&avbuf->context->format); break; case AV_PIX_FMT_YUV420P: if (avbuf->num_planes > 1) break; - frame->linesize[1] = avbuf->plane_info[0].bytesperline >> 1; - frame->linesize[2] = avbuf->plane_info[0].bytesperline >> 1; - frame->data[1] = frame->buf[0]->data + avbuf->plane_info[0].bytesperline * avbuf->context->format.fmt.pix_mp.height; - frame->data[2] = frame->data[1] + ((avbuf->plane_info[0].bytesperline * avbuf->context->format.fmt.pix_mp.height) >> 2); + frame->linesize[1] = frame->linesize[0] / 2; + frame->linesize[2] = frame->linesize[1]; + frame->data[1] = frame->data[0] + frame->linesize[0] * ff_v4l2_get_format_height(&avbuf->context->format); + frame->data[2] = frame->data[1] + frame->linesize[1] * ff_v4l2_get_format_height(&avbuf->context->format) / 2; break; default: @@ -337,68 +613,127 @@ static int v4l2_buffer_buf_to_swframe(AVFrame *frame, V4L2Buffer *avbuf) return 0; } +static void cpy_2d(uint8_t * dst, int dst_stride, const uint8_t * src, int src_stride, int w, int h) +{ + if (dst_stride == src_stride && w + 32 >= dst_stride) { + memcpy(dst, src, dst_stride * h); + } + else { + while (--h >= 0) { + memcpy(dst, src, w); + dst += dst_stride; + src += src_stride; + } + } +} + +static int is_chroma(const AVPixFmtDescriptor *desc, int i, int num_planes) +{ + return i != 0 && !(i == num_planes - 1 && (desc->flags & AV_PIX_FMT_FLAG_ALPHA)); +} + +static int v4l2_buffer_primeframe_to_buf(const AVFrame *frame, V4L2Buffer *out) +{ + const AVDRMFrameDescriptor *const src = (const AVDRMFrameDescriptor *)frame->data[0]; + + if (frame->format != AV_PIX_FMT_DRM_PRIME || !src) + return AVERROR(EINVAL); + + av_assert0(out->buf.memory == V4L2_MEMORY_DMABUF); + + if (V4L2_TYPE_IS_MULTIPLANAR(out->buf.type)) { + // Only currently cope with single buffer types + if (out->buf.length != 1) + return AVERROR_PATCHWELCOME; + if (src->nb_objects != 1) + return AVERROR(EINVAL); + + out->planes[0].m.fd = src->objects[0].fd; + } + else { + if (src->nb_objects != 1) + return AVERROR(EINVAL); + + out->buf.m.fd = src->objects[0].fd; + } + + // No need to copy src AVDescriptor and if we did then we may confuse + // fd close on free + out->ref_buf = av_buffer_ref(frame->buf[0]); + + return 0; +} + static int v4l2_buffer_swframe_to_buf(const AVFrame *frame, V4L2Buffer *out) { - int i, ret; - struct v4l2_format fmt = out->context->format; - int pixel_format = V4L2_TYPE_IS_MULTIPLANAR(fmt.type) ? - fmt.fmt.pix_mp.pixelformat : fmt.fmt.pix.pixelformat; - int height = V4L2_TYPE_IS_MULTIPLANAR(fmt.type) ? - fmt.fmt.pix_mp.height : fmt.fmt.pix.height; - int is_planar_format = 0; - - switch (pixel_format) { - case V4L2_PIX_FMT_YUV420M: - case V4L2_PIX_FMT_YVU420M: -#ifdef V4L2_PIX_FMT_YUV422M - case V4L2_PIX_FMT_YUV422M: -#endif -#ifdef V4L2_PIX_FMT_YVU422M - case V4L2_PIX_FMT_YVU422M: -#endif -#ifdef V4L2_PIX_FMT_YUV444M - case V4L2_PIX_FMT_YUV444M: -#endif -#ifdef V4L2_PIX_FMT_YVU444M - case V4L2_PIX_FMT_YVU444M: -#endif - case V4L2_PIX_FMT_NV12M: - case V4L2_PIX_FMT_NV21M: - case V4L2_PIX_FMT_NV12MT_16X16: - case V4L2_PIX_FMT_NV12MT: - case V4L2_PIX_FMT_NV16M: - case V4L2_PIX_FMT_NV61M: - is_planar_format = 1; - } - - if (!is_planar_format) { - const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format); - int planes_nb = 0; - int offset = 0; - - for (i = 0; i < desc->nb_components; i++) - planes_nb = FFMAX(planes_nb, desc->comp[i].plane + 1); - - for (i = 0; i < planes_nb; i++) { - int size, h = height; - if (i == 1 || i == 2) { + int i; + int num_planes = 0; + int pel_strides[4] = {0}; + + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format); + + if ((desc->flags & AV_PIX_FMT_FLAG_HWACCEL) != 0) { + av_log(NULL, AV_LOG_ERROR, "%s: HWACCEL cannot be copied\n", __func__); + return -1; + } + + for (i = 0; i != desc->nb_components; ++i) { + if (desc->comp[i].plane >= num_planes) + num_planes = desc->comp[i].plane + 1; + pel_strides[desc->comp[i].plane] = desc->comp[i].step; + } + + if (out->num_planes > 1) { + if (num_planes != out->num_planes) { + av_log(NULL, AV_LOG_ERROR, "%s: Num planes mismatch: %d != %d\n", __func__, num_planes, out->num_planes); + return -1; + } + for (i = 0; i != num_planes; ++i) { + int w = frame->width; + int h = frame->height; + if (is_chroma(desc, i, num_planes)) { + w = AV_CEIL_RSHIFT(w, desc->log2_chroma_w); h = AV_CEIL_RSHIFT(h, desc->log2_chroma_h); } - size = frame->linesize[i] * h; - ret = v4l2_bufref_to_buf(out, 0, frame->data[i], size, offset); - if (ret) - return ret; - offset += size; + + cpy_2d(out->plane_info[i].mm_addr, out->plane_info[i].bytesperline, + frame->data[i], frame->linesize[i], + w * pel_strides[i], h); + set_buf_length(out, i, out->plane_info[i].bytesperline * h, out->plane_info[i].length); } - return 0; } + else + { + unsigned int offset = 0; + + for (i = 0; i != num_planes; ++i) { + int w = frame->width; + int h = frame->height; + int dst_stride = out->plane_info[0].bytesperline; + uint8_t * const dst = (uint8_t *)out->plane_info[0].mm_addr + offset; + + if (is_chroma(desc, i, num_planes)) { + // Is chroma + dst_stride >>= desc->log2_chroma_w; + offset += dst_stride * (out->context->height >> desc->log2_chroma_h); + w = AV_CEIL_RSHIFT(w, desc->log2_chroma_w); + h = AV_CEIL_RSHIFT(h, desc->log2_chroma_h); + } + else { + // Is luma or alpha + offset += dst_stride * out->context->height; + } + if (offset > out->plane_info[0].length) { + av_log(NULL, AV_LOG_ERROR, "%s: Plane total %u > buffer size %zu\n", __func__, offset, out->plane_info[0].length); + return -1; + } - for (i = 0; i < out->num_planes; i++) { - ret = v4l2_bufref_to_buf(out, i, frame->buf[i]->data, frame->buf[i]->size, 0); - if (ret) - return ret; + cpy_2d(dst, dst_stride, + frame->data[i], frame->linesize[i], + w * pel_strides[i], h); + } + set_buf_length(out, 0, offset, out->plane_info[0].length); } - return 0; } @@ -408,16 +743,31 @@ static int v4l2_buffer_swframe_to_buf(const AVFrame *frame, V4L2Buffer *out) * ******************************************************************************/ -int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out) +int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out, const int64_t track_ts) { - v4l2_set_pts(out, frame->pts); - - return v4l2_buffer_swframe_to_buf(frame, out); + out->buf.flags = frame->key_frame ? + (out->buf.flags | V4L2_BUF_FLAG_KEYFRAME) : + (out->buf.flags & ~V4L2_BUF_FLAG_KEYFRAME); + // Beware that colour info is held in format rather than the actual + // v4l2 buffer struct so this may not be as useful as you might hope + v4l2_set_color(out, frame->color_primaries, frame->colorspace, frame->color_trc); + v4l2_set_color_range(out, frame->color_range); + // PTS & interlace are buffer vars + if (track_ts) + out->buf.timestamp = tv_from_int(track_ts); + else + v4l2_set_pts(out, frame->pts); + v4l2_set_interlace(out, frame->interlaced_frame, frame->top_field_first); + + return frame->format == AV_PIX_FMT_DRM_PRIME ? + v4l2_buffer_primeframe_to_buf(frame, out) : + v4l2_buffer_swframe_to_buf(frame, out); } int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf) { int ret; + V4L2Context * const ctx = avbuf->context; av_frame_unref(frame); @@ -428,17 +778,32 @@ int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf) /* 2. get frame information */ frame->key_frame = !!(avbuf->buf.flags & V4L2_BUF_FLAG_KEYFRAME); + frame->pict_type = frame->key_frame ? AV_PICTURE_TYPE_I : + (avbuf->buf.flags & V4L2_BUF_FLAG_PFRAME) != 0 ? AV_PICTURE_TYPE_P : + (avbuf->buf.flags & V4L2_BUF_FLAG_BFRAME) != 0 ? AV_PICTURE_TYPE_B : + AV_PICTURE_TYPE_NONE; frame->color_primaries = v4l2_get_color_primaries(avbuf); frame->colorspace = v4l2_get_color_space(avbuf); frame->color_range = v4l2_get_color_range(avbuf); frame->color_trc = v4l2_get_color_trc(avbuf); frame->pts = v4l2_get_pts(avbuf); frame->pkt_dts = AV_NOPTS_VALUE; + frame->interlaced_frame = v4l2_buf_is_interlaced(avbuf); + frame->top_field_first = v4l2_buf_is_top_first(avbuf); /* these values are updated also during re-init in v4l2_process_driver_event */ - frame->height = avbuf->context->height; - frame->width = avbuf->context->width; - frame->sample_aspect_ratio = avbuf->context->sample_aspect_ratio; + frame->height = ctx->height; + frame->width = ctx->width; + frame->sample_aspect_ratio = ctx->sample_aspect_ratio; + + if (ctx->selection.height && ctx->selection.width) { + frame->crop_left = ctx->selection.left < frame->width ? ctx->selection.left : 0; + frame->crop_top = ctx->selection.top < frame->height ? ctx->selection.top : 0; + frame->crop_right = ctx->selection.left + ctx->selection.width < frame->width ? + frame->width - (ctx->selection.left + ctx->selection.width) : 0; + frame->crop_bottom = ctx->selection.top + ctx->selection.height < frame->height ? + frame->height - (ctx->selection.top + ctx->selection.height) : 0; + } /* 3. report errors upstream */ if (avbuf->buf.flags & V4L2_BUF_FLAG_ERROR) { @@ -451,15 +816,15 @@ int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf) int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *avbuf) { - int ret; - av_packet_unref(pkt); - ret = v4l2_buf_to_bufref(avbuf, 0, &pkt->buf); - if (ret) - return ret; + + pkt->buf = wrap_avbuf(avbuf); + if (pkt->buf == NULL) + return AVERROR(ENOMEM); pkt->size = V4L2_TYPE_IS_MULTIPLANAR(avbuf->buf.type) ? avbuf->buf.m.planes[0].bytesused : avbuf->buf.bytesused; - pkt->data = pkt->buf->data; + pkt->data = (uint8_t*)avbuf->plane_info[0].mm_addr + avbuf->planes[0].data_offset; + pkt->flags = 0; if (avbuf->buf.flags & V4L2_BUF_FLAG_KEYFRAME) pkt->flags |= AV_PKT_FLAG_KEY; @@ -474,39 +839,107 @@ int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *avbuf) return 0; } -int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out) +int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket * const pkt, V4L2Buffer * const out, + const void *extdata, size_t extlen, + const int64_t timestamp) { int ret; - ret = v4l2_bufref_to_buf(out, 0, pkt->data, pkt->size, 0); - if (ret) + if (extlen) { + ret = v4l2_bufref_to_buf(out, 0, extdata, extlen, 0); + if (ret) + return ret; + } + + ret = v4l2_bufref_to_buf(out, 0, pkt->data, pkt->size, extlen); + if (ret && ret != AVERROR(ENOMEM)) return ret; - v4l2_set_pts(out, pkt->pts); + if (timestamp) + out->buf.timestamp = tv_from_int(timestamp); + else + v4l2_set_pts(out, pkt->pts); - if (pkt->flags & AV_PKT_FLAG_KEY) - out->flags = V4L2_BUF_FLAG_KEYFRAME; + out->buf.flags = (pkt->flags & AV_PKT_FLAG_KEY) != 0 ? + (out->buf.flags | V4L2_BUF_FLAG_KEYFRAME) : + (out->buf.flags & ~V4L2_BUF_FLAG_KEYFRAME); - return 0; + return ret; +} + +int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out) +{ + return ff_v4l2_buffer_avpkt_to_buf_ext(pkt, out, NULL, 0, 0); } -int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index) + +static void v4l2_buffer_buffer_free(void *opaque, uint8_t *data) +{ + V4L2Buffer * const avbuf = (V4L2Buffer *)data; + int i; + + for (i = 0; i != FF_ARRAY_ELEMS(avbuf->plane_info); ++i) { + struct V4L2Plane_info *p = avbuf->plane_info + i; + if (p->mm_addr != NULL) + munmap(p->mm_addr, p->length); + } + + if (avbuf->dmabuf[0] == NULL) { + for (i = 0; i != FF_ARRAY_ELEMS(avbuf->drm_frame.objects); ++i) { + if (avbuf->drm_frame.objects[i].fd != -1) + close(avbuf->drm_frame.objects[i].fd); + } + } + else { + for (i = 0; i != FF_ARRAY_ELEMS(avbuf->dmabuf); ++i) { + dmabuf_free(avbuf->dmabuf[i]); + } + } + + av_buffer_unref(&avbuf->ref_buf); + + ff_weak_link_unref(&avbuf->context_wl); + + av_free(avbuf); +} + + +int ff_v4l2_buffer_initialize(AVBufferRef ** pbufref, int index, V4L2Context *ctx, enum v4l2_memory mem) { - V4L2Context *ctx = avbuf->context; int ret, i; + V4L2Buffer * const avbuf = av_mallocz(sizeof(*avbuf)); + AVBufferRef * bufref; + V4L2m2mContext * const s = ctx_to_m2mctx(ctx); - avbuf->buf.memory = V4L2_MEMORY_MMAP; + *pbufref = NULL; + if (avbuf == NULL) + return AVERROR(ENOMEM); + + bufref = av_buffer_create((uint8_t*)avbuf, sizeof(*avbuf), v4l2_buffer_buffer_free, NULL, 0); + if (bufref == NULL) { + av_free(avbuf); + return AVERROR(ENOMEM); + } + + avbuf->context = ctx; + avbuf->buf.memory = mem; avbuf->buf.type = ctx->type; avbuf->buf.index = index; + for (i = 0; i != FF_ARRAY_ELEMS(avbuf->drm_frame.objects); ++i) { + avbuf->drm_frame.objects[i].fd = -1; + } + + avbuf->context_wl = ff_weak_link_ref(ctx->wl_master); + if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) { avbuf->buf.length = VIDEO_MAX_PLANES; avbuf->buf.m.planes = avbuf->planes; } - ret = ioctl(buf_to_m2mctx(avbuf)->fd, VIDIOC_QUERYBUF, &avbuf->buf); + ret = ioctl(s->fd, VIDIOC_QUERYBUF, &avbuf->buf); if (ret < 0) - return AVERROR(errno); + goto fail; if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) { avbuf->num_planes = 0; @@ -519,6 +952,8 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index) avbuf->num_planes = 1; for (i = 0; i < avbuf->num_planes; i++) { + const int want_mmap = avbuf->buf.memory == V4L2_MEMORY_MMAP && + (V4L2_TYPE_IS_OUTPUT(ctx->type) || !buf_to_m2mctx(avbuf)->output_drm); avbuf->plane_info[i].bytesperline = V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? ctx->format.fmt.pix_mp.plane_fmt[i].bytesperline : @@ -526,25 +961,31 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index) if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) { avbuf->plane_info[i].length = avbuf->buf.m.planes[i].length; - avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.m.planes[i].length, - PROT_READ | PROT_WRITE, MAP_SHARED, - buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.planes[i].m.mem_offset); + avbuf->plane_info[i].offset = avbuf->buf.m.planes[i].data_offset; + + if (want_mmap) + avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.m.planes[i].length, + PROT_READ | PROT_WRITE, MAP_SHARED, + buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.planes[i].m.mem_offset); } else { avbuf->plane_info[i].length = avbuf->buf.length; - avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.length, - PROT_READ | PROT_WRITE, MAP_SHARED, - buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.offset); + avbuf->plane_info[i].offset = 0; + + if (want_mmap) + avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.length, + PROT_READ | PROT_WRITE, MAP_SHARED, + buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.offset); } - if (avbuf->plane_info[i].mm_addr == MAP_FAILED) - return AVERROR(ENOMEM); + if (avbuf->plane_info[i].mm_addr == MAP_FAILED) { + avbuf->plane_info[i].mm_addr = NULL; + ret = AVERROR(ENOMEM); + goto fail; + } } avbuf->status = V4L2BUF_AVAILABLE; - if (V4L2_TYPE_IS_OUTPUT(ctx->type)) - return 0; - if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) { avbuf->buf.m.planes = avbuf->planes; avbuf->buf.length = avbuf->num_planes; @@ -554,20 +995,53 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index) avbuf->buf.length = avbuf->planes[0].length; } - return ff_v4l2_buffer_enqueue(avbuf); + if (!V4L2_TYPE_IS_OUTPUT(ctx->type)) { + if (s->output_drm) { + ret = v4l2_buffer_export_drm(avbuf); + if (ret) { + av_log(logger(avbuf), AV_LOG_ERROR, "Failed to get exported drm handles\n"); + goto fail; + } + } + } + + *pbufref = bufref; + return 0; + +fail: + av_buffer_unref(&bufref); + return ret; } int ff_v4l2_buffer_enqueue(V4L2Buffer* avbuf) { int ret; + int qc; - avbuf->buf.flags = avbuf->flags; + if (avbuf->buf.timestamp.tv_sec || avbuf->buf.timestamp.tv_usec) { + av_log(logger(avbuf), AV_LOG_DEBUG, "--- %s pre VIDIOC_QBUF: index %d, ts=%ld.%06ld count=%d\n", + avbuf->context->name, avbuf->buf.index, + avbuf->buf.timestamp.tv_sec, avbuf->buf.timestamp.tv_usec, + avbuf->context->q_count); + } ret = ioctl(buf_to_m2mctx(avbuf)->fd, VIDIOC_QBUF, &avbuf->buf); - if (ret < 0) - return AVERROR(errno); + if (ret < 0) { + int err = errno; + av_log(logger(avbuf), AV_LOG_ERROR, "--- %s VIDIOC_QBUF: index %d FAIL err %d (%s)\n", + avbuf->context->name, avbuf->buf.index, + err, strerror(err)); + return AVERROR(err); + } + // Lock not wanted - if called from buffer free then lock already obtained + qc = atomic_fetch_add(&avbuf->context->q_count, 1) + 1; avbuf->status = V4L2BUF_IN_DRIVER; + pthread_cond_broadcast(&avbuf->context->cond); + + av_log(logger(avbuf), AV_LOG_DEBUG, "--- %s VIDIOC_QBUF: index %d, ts=%ld.%06ld count=%d\n", + avbuf->context->name, avbuf->buf.index, + avbuf->buf.timestamp.tv_sec, avbuf->buf.timestamp.tv_usec, qc); return 0; } diff --git a/libavcodec/v4l2_buffers.h b/libavcodec/v4l2_buffers.h index 3d2ff1b9a5..444ad94b14 100644 --- a/libavcodec/v4l2_buffers.h +++ b/libavcodec/v4l2_buffers.h @@ -28,31 +28,47 @@ #include #include +#include "avcodec.h" #include "libavutil/buffer.h" #include "libavutil/frame.h" +#include "libavutil/hwcontext_drm.h" #include "packet.h" enum V4L2Buffer_status { V4L2BUF_AVAILABLE, V4L2BUF_IN_DRIVER, + V4L2BUF_IN_USE, V4L2BUF_RET_USER, }; /** * V4L2Buffer (wrapper for v4l2_buffer management) */ +struct V4L2Context; +struct ff_weak_link_client; +struct dmabuf_h; + typedef struct V4L2Buffer { - /* each buffer needs to have a reference to its context */ + /* each buffer needs to have a reference to its context + * The pointer is good enough for most operation but once the buffer has + * been passed to the user the buffer may become orphaned so for free ops + * the weak link must be used to ensure that the context is actually + * there + */ struct V4L2Context *context; + struct ff_weak_link_client *context_wl; - /* This object is refcounted per-plane, so we need to keep track - * of how many context-refs we are holding. */ - AVBufferRef *context_ref; - atomic_uint context_refcount; + /* DRM descriptor */ + AVDRMFrameDescriptor drm_frame; + /* For DRM_PRIME encode - need to keep a ref to the source buffer till we + * are done + */ + AVBufferRef * ref_buf; /* keep track of the mmap address and mmap length */ struct V4L2Plane_info { - int bytesperline; + size_t bytesperline; + size_t offset; void * mm_addr; size_t length; } plane_info[VIDEO_MAX_PLANES]; @@ -63,9 +79,9 @@ typedef struct V4L2Buffer { struct v4l2_buffer buf; struct v4l2_plane planes[VIDEO_MAX_PLANES]; - int flags; enum V4L2Buffer_status status; + struct dmabuf_h * dmabuf[VIDEO_MAX_PLANES]; // If externally alloced dmabufs - stash other info here } V4L2Buffer; /** @@ -101,6 +117,10 @@ int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *buf); */ int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out); +int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket * const pkt, V4L2Buffer * const out, + const void *extdata, size_t extlen, + const int64_t timestamp); + /** * Extracts the data from an AVFrame to a V4L2Buffer * @@ -109,7 +129,7 @@ int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out); * * @returns 0 in case of success, a negative AVERROR code otherwise */ -int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out); +int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out, const int64_t track_ts); /** * Initializes a V4L2Buffer @@ -119,7 +139,7 @@ int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out); * * @returns 0 in case of success, a negative AVERROR code otherwise */ -int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index); +int ff_v4l2_buffer_initialize(AVBufferRef **avbuf, int index, struct V4L2Context *ctx, enum v4l2_memory mem); /** * Enqueues a V4L2Buffer @@ -130,5 +150,12 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index); */ int ff_v4l2_buffer_enqueue(V4L2Buffer* avbuf); +static inline void +ff_v4l2_buffer_set_avail(V4L2Buffer* const avbuf) +{ + avbuf->status = V4L2BUF_AVAILABLE; + av_buffer_unref(&avbuf->ref_buf); +} + #endif // AVCODEC_V4L2_BUFFERS_H diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c index a40be94690..79a31cf930 100644 --- a/libavcodec/v4l2_context.c +++ b/libavcodec/v4l2_context.c @@ -27,11 +27,13 @@ #include #include #include +#include "libavutil/avassert.h" #include "libavcodec/avcodec.h" #include "decode.h" #include "v4l2_buffers.h" #include "v4l2_fmt.h" #include "v4l2_m2m.h" +#include "weak_link.h" struct v4l2_format_update { uint32_t v4l2_fmt; @@ -41,26 +43,168 @@ struct v4l2_format_update { int update_avfmt; }; -static inline V4L2m2mContext *ctx_to_m2mctx(V4L2Context *ctx) + +static inline int64_t track_to_pts(AVCodecContext *avctx, unsigned int n) { - return V4L2_TYPE_IS_OUTPUT(ctx->type) ? - container_of(ctx, V4L2m2mContext, output) : - container_of(ctx, V4L2m2mContext, capture); + return (int64_t)n; } -static inline AVCodecContext *logger(V4L2Context *ctx) +static inline unsigned int pts_to_track(AVCodecContext *avctx, const int64_t pts) { - return ctx_to_m2mctx(ctx)->avctx; + return (unsigned int)pts; +} + +// FFmpeg requires us to propagate a number of vars from the coded pkt into +// the decoded frame. The only thing that tracks like that in V4L2 stateful +// is timestamp. PTS maps to timestamp for this decode. FFmpeg makes no +// guarantees about PTS being unique or specified for every frame so replace +// the supplied PTS with a simple incrementing number and keep a circular +// buffer of all the things we want preserved (including the original PTS) +// indexed by the tracking no. +static int64_t +xlat_pts_pkt_in(AVCodecContext *const avctx, xlat_track_t *const x, const AVPacket *const avpkt) +{ + int64_t track_pts; + + // Avoid 0 + if (++x->track_no == 0) + x->track_no = 1; + + track_pts = track_to_pts(avctx, x->track_no); + + av_log(avctx, AV_LOG_TRACE, "In pkt PTS=%" PRId64 ", DTS=%" PRId64 ", track=%" PRId64 ", n=%u\n", avpkt->pts, avpkt->dts, track_pts, x->track_no); + x->track_els[x->track_no % FF_V4L2_M2M_TRACK_SIZE] = (V4L2m2mTrackEl){ + .discard = 0, + .pending = 1, + .pkt_size = avpkt->size, + .pts = avpkt->pts, + .dts = avpkt->dts, + .reordered_opaque = avctx->reordered_opaque, + .pkt_pos = avpkt->pos, + .pkt_duration = avpkt->duration, + .track_pts = track_pts + }; + return track_pts; +} + +static int64_t +xlat_pts_frame_in(AVCodecContext *const avctx, xlat_track_t *const x, const AVFrame *const frame) +{ + int64_t track_pts; + + // Avoid 0 + if (++x->track_no == 0) + x->track_no = 1; + + track_pts = track_to_pts(avctx, x->track_no); + + av_log(avctx, AV_LOG_TRACE, "In frame PTS=%" PRId64 ", track=%" PRId64 ", n=%u\n", frame->pts, track_pts, x->track_no); + x->track_els[x->track_no % FF_V4L2_M2M_TRACK_SIZE] = (V4L2m2mTrackEl){ + .discard = 0, + .pending = 1, + .pkt_size = 0, + .pts = frame->pts, + .dts = AV_NOPTS_VALUE, + .reordered_opaque = frame->reordered_opaque, + .pkt_pos = frame->pkt_pos, + .pkt_duration = frame->pkt_duration, + .track_pts = track_pts + }; + return track_pts; +} + + +// Returns -1 if we should discard the frame +static int +xlat_pts_frame_out(AVCodecContext *const avctx, + xlat_track_t * const x, + AVFrame *const frame) +{ + unsigned int n = pts_to_track(avctx, frame->pts) % FF_V4L2_M2M_TRACK_SIZE; + V4L2m2mTrackEl *const t = x->track_els + n; + if (frame->pts == AV_NOPTS_VALUE || frame->pts != t->track_pts) + { + av_log(avctx, frame->pts == AV_NOPTS_VALUE ? AV_LOG_DEBUG : AV_LOG_WARNING, + "Frame tracking failure: pts=%" PRId64 ", track[%d]=%" PRId64 "\n", frame->pts, n, t->track_pts); + frame->pts = AV_NOPTS_VALUE; + frame->pkt_dts = AV_NOPTS_VALUE; + frame->reordered_opaque = x->last_opaque; + frame->pkt_pos = -1; + frame->pkt_duration = 0; + frame->pkt_size = -1; + } + else if (!t->discard) + { + frame->pts = t->pending ? t->pts : AV_NOPTS_VALUE; + frame->pkt_dts = t->dts; + frame->reordered_opaque = t->reordered_opaque; + frame->pkt_pos = t->pkt_pos; + frame->pkt_duration = t->pkt_duration; + frame->pkt_size = t->pkt_size; + + x->last_opaque = x->track_els[n].reordered_opaque; + if (frame->pts != AV_NOPTS_VALUE) + x->last_pts = frame->pts; + t->pending = 0; + } + else + { + av_log(avctx, AV_LOG_DEBUG, "Discard frame (flushed): pts=%" PRId64 ", track[%d]=%" PRId64 "\n", frame->pts, n, t->track_pts); + return -1; + } + + av_log(avctx, AV_LOG_TRACE, "Out frame PTS=%" PRId64 "/%"PRId64", DTS=%" PRId64 ", track=%"PRId64", n=%d\n", + frame->pts, frame->best_effort_timestamp, frame->pkt_dts, t->track_pts, n); + return 0; } -static inline unsigned int v4l2_get_width(struct v4l2_format *fmt) +// Returns -1 if we should discard the frame +static int +xlat_pts_pkt_out(AVCodecContext *const avctx, + xlat_track_t * const x, + AVPacket *const pkt) { - return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.width : fmt->fmt.pix.width; + unsigned int n = pts_to_track(avctx, pkt->pts) % FF_V4L2_M2M_TRACK_SIZE; + V4L2m2mTrackEl *const t = x->track_els + n; + if (pkt->pts == AV_NOPTS_VALUE || pkt->pts != t->track_pts) + { + av_log(avctx, pkt->pts == AV_NOPTS_VALUE ? AV_LOG_DEBUG : AV_LOG_WARNING, + "Pkt tracking failure: pts=%" PRId64 ", track[%d]=%" PRId64 "\n", pkt->pts, n, t->track_pts); + pkt->pts = AV_NOPTS_VALUE; + } + else if (!t->discard) + { + pkt->pts = t->pending ? t->pts : AV_NOPTS_VALUE; + + x->last_opaque = x->track_els[n].reordered_opaque; + if (pkt->pts != AV_NOPTS_VALUE) + x->last_pts = pkt->pts; + t->pending = 0; + } + else + { + av_log(avctx, AV_LOG_DEBUG, "Discard packet (flushed): pts=%" PRId64 ", track[%d]=%" PRId64 "\n", pkt->pts, n, t->track_pts); + return -1; + } + + // * Would like something much better than this...xlat(offset + out_count)? + pkt->dts = pkt->pts; + av_log(avctx, AV_LOG_TRACE, "Out pkt PTS=%" PRId64 ", track=%"PRId64", n=%d\n", + pkt->pts, t->track_pts, n); + return 0; } -static inline unsigned int v4l2_get_height(struct v4l2_format *fmt) + +static inline V4L2m2mContext *ctx_to_m2mctx(const V4L2Context *ctx) { - return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.height : fmt->fmt.pix.height; + return V4L2_TYPE_IS_OUTPUT(ctx->type) ? + container_of(ctx, V4L2m2mContext, output) : + container_of(ctx, V4L2m2mContext, capture); +} + +static inline AVCodecContext *logger(const V4L2Context *ctx) +{ + return ctx_to_m2mctx(ctx)->avctx; } static AVRational v4l2_get_sar(V4L2Context *ctx) @@ -81,21 +225,29 @@ static AVRational v4l2_get_sar(V4L2Context *ctx) return sar; } -static inline unsigned int v4l2_resolution_changed(V4L2Context *ctx, struct v4l2_format *fmt2) +static inline int ctx_buffers_alloced(const V4L2Context * const ctx) +{ + return ctx->bufrefs != NULL; +} + +// Width/Height changed or we don't have an alloc in the first place? +static int ctx_resolution_changed(const V4L2Context *ctx, const struct v4l2_format *fmt2) { - struct v4l2_format *fmt1 = &ctx->format; - int ret = V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? - fmt1->fmt.pix_mp.width != fmt2->fmt.pix_mp.width || - fmt1->fmt.pix_mp.height != fmt2->fmt.pix_mp.height - : - fmt1->fmt.pix.width != fmt2->fmt.pix.width || - fmt1->fmt.pix.height != fmt2->fmt.pix.height; + const struct v4l2_format *fmt1 = &ctx->format; + int ret = !ctx_buffers_alloced(ctx) || + (V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? + fmt1->fmt.pix_mp.width != fmt2->fmt.pix_mp.width || + fmt1->fmt.pix_mp.height != fmt2->fmt.pix_mp.height + : + fmt1->fmt.pix.width != fmt2->fmt.pix.width || + fmt1->fmt.pix.height != fmt2->fmt.pix.height); if (ret) - av_log(logger(ctx), AV_LOG_DEBUG, "%s changed (%dx%d) -> (%dx%d)\n", + av_log(logger(ctx), AV_LOG_DEBUG, "V4L2 %s changed: alloc=%d (%dx%d) -> (%dx%d)\n", ctx->name, - v4l2_get_width(fmt1), v4l2_get_height(fmt1), - v4l2_get_width(fmt2), v4l2_get_height(fmt2)); + ctx_buffers_alloced(ctx), + ff_v4l2_get_format_width(fmt1), ff_v4l2_get_format_height(fmt1), + ff_v4l2_get_format_width(fmt2), ff_v4l2_get_format_height(fmt2)); return ret; } @@ -153,76 +305,110 @@ static inline void v4l2_save_to_context(V4L2Context* ctx, struct v4l2_format_upd } } -static int v4l2_start_decode(V4L2Context *ctx) +static int get_default_selection(V4L2Context * const ctx, struct v4l2_rect *r) { - struct v4l2_decoder_cmd cmd = { - .cmd = V4L2_DEC_CMD_START, - .flags = 0, + V4L2m2mContext * const s = ctx_to_m2mctx(ctx); + struct v4l2_selection selection = { + .type = V4L2_BUF_TYPE_VIDEO_CAPTURE, + .target = V4L2_SEL_TGT_COMPOSE }; - int ret; - ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_DECODER_CMD, &cmd); - if (ret) + memset(r, 0, sizeof(*r)); + if (ioctl(s->fd, VIDIOC_G_SELECTION, &selection)) return AVERROR(errno); + *r = selection.r; return 0; } -/** - * handle resolution change event and end of stream event - * returns 1 if reinit was successful, negative if it failed - * returns 0 if reinit was not executed - */ -static int v4l2_handle_event(V4L2Context *ctx) +static int do_source_change(V4L2m2mContext * const s) { - V4L2m2mContext *s = ctx_to_m2mctx(ctx); - struct v4l2_format cap_fmt = s->capture.format; - struct v4l2_event evt = { 0 }; + AVCodecContext *const avctx = s->avctx; + int ret; + int reinit; + struct v4l2_format cap_fmt = s->capture.format; - ret = ioctl(s->fd, VIDIOC_DQEVENT, &evt); - if (ret < 0) { - av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_DQEVENT\n", ctx->name); - return 0; - } + s->capture.done = 0; - if (evt.type == V4L2_EVENT_EOS) { - ctx->done = 1; + ret = ioctl(s->fd, VIDIOC_G_FMT, &cap_fmt); + if (ret) { + av_log(avctx, AV_LOG_ERROR, "%s VIDIOC_G_FMT failed\n", s->capture.name); return 0; } - if (evt.type != V4L2_EVENT_SOURCE_CHANGE) - return 0; + get_default_selection(&s->capture, &s->capture.selection); - ret = ioctl(s->fd, VIDIOC_G_FMT, &cap_fmt); - if (ret) { - av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_G_FMT\n", s->capture.name); - return 0; + reinit = ctx_resolution_changed(&s->capture, &cap_fmt); + if ((s->quirks & FF_V4L2_QUIRK_REINIT_ALWAYS) != 0) + reinit = 1; + + s->capture.format = cap_fmt; + if (reinit) { + s->capture.height = ff_v4l2_get_format_height(&cap_fmt); + s->capture.width = ff_v4l2_get_format_width(&cap_fmt); } - if (v4l2_resolution_changed(&s->capture, &cap_fmt)) { - s->capture.height = v4l2_get_height(&cap_fmt); - s->capture.width = v4l2_get_width(&cap_fmt); - s->capture.sample_aspect_ratio = v4l2_get_sar(&s->capture); - } else { - v4l2_start_decode(ctx); - return 0; + // If we don't support selection (or it is bust) and we obviously have HD then kludge + if ((s->capture.selection.width == 0 || s->capture.selection.height == 0) && + (s->capture.height == 1088 && s->capture.width == 1920)) { + s->capture.selection = (struct v4l2_rect){.width = 1920, .height = 1080}; } - s->reinit = 1; + s->capture.sample_aspect_ratio = v4l2_get_sar(&s->capture); - if (s->avctx) - ret = ff_set_dimensions(s->avctx, s->capture.width, s->capture.height); - if (ret < 0) - av_log(logger(ctx), AV_LOG_WARNING, "update avcodec height and width\n"); + av_log(avctx, AV_LOG_DEBUG, "Source change: SAR: %d/%d, wxh %dx%d crop %dx%d @ %d,%d, reinit=%d\n", + s->capture.sample_aspect_ratio.num, s->capture.sample_aspect_ratio.den, + s->capture.width, s->capture.height, + s->capture.selection.width, s->capture.selection.height, + s->capture.selection.left, s->capture.selection.top, reinit); - ret = ff_v4l2_m2m_codec_reinit(s); - if (ret) { - av_log(logger(ctx), AV_LOG_ERROR, "v4l2_m2m_codec_reinit\n"); - return AVERROR(EINVAL); + if (reinit) { + if (avctx) + ret = ff_set_dimensions(s->avctx, + s->capture.selection.width != 0 ? s->capture.selection.width : s->capture.width, + s->capture.selection.height != 0 ? s->capture.selection.height : s->capture.height); + if (ret < 0) + av_log(avctx, AV_LOG_WARNING, "update avcodec height and width failed\n"); + + ret = ff_v4l2_m2m_codec_reinit(s); + if (ret) { + av_log(avctx, AV_LOG_ERROR, "v4l2_m2m_codec_reinit failed\n"); + return AVERROR(EINVAL); + } + + if (s->capture.width > ff_v4l2_get_format_width(&s->capture.format) || + s->capture.height > ff_v4l2_get_format_height(&s->capture.format)) { + av_log(avctx, AV_LOG_ERROR, "Format post reinit too small: wanted %dx%d > got %dx%d\n", + s->capture.width, s->capture.height, + ff_v4l2_get_format_width(&s->capture.format), ff_v4l2_get_format_height(&s->capture.format)); + return AVERROR(EINVAL); + } + + // Update pixel format - should only actually do something on initial change + s->capture.av_pix_fmt = + ff_v4l2_format_v4l2_to_avfmt(ff_v4l2_get_format_pixelformat(&s->capture.format), AV_CODEC_ID_RAWVIDEO); + if (s->output_drm) { + avctx->pix_fmt = AV_PIX_FMT_DRM_PRIME; + avctx->sw_pix_fmt = s->capture.av_pix_fmt; + } + else + avctx->pix_fmt = s->capture.av_pix_fmt; + + goto reinit_run; } + /* Buffers are OK so just stream off to ack */ + av_log(avctx, AV_LOG_DEBUG, "%s: Parameters only - restart decode\n", __func__); + + ret = ff_v4l2_context_set_status(&s->capture, VIDIOC_STREAMOFF); + if (ret) + av_log(avctx, AV_LOG_ERROR, "capture VIDIOC_STREAMOFF failed\n"); + s->draining = 0; + /* reinit executed */ +reinit_run: + ret = ff_v4l2_context_set_status(&s->capture, VIDIOC_STREAMON); return 1; } @@ -266,171 +452,293 @@ static int v4l2_stop_encode(V4L2Context *ctx) return 0; } -static V4L2Buffer* v4l2_dequeue_v4l2buf(V4L2Context *ctx, int timeout) +// DQ a buffer +// Amalgamates all the various ways there are of signalling EOS/Event to +// generate a consistant EPIPE. +// +// Sets ctx->flag_last if next dq would produce EPIPE (i.e. stream has stopped) +// +// Returns: +// 0 Success +// AVERROR(EPIPE) Nothing more to read +// AVERROR(ENOSPC) No buffers in Q to put result in +// * AVERROR(..) + + static int +dq_buf(V4L2Context * const ctx, V4L2Buffer ** const ppavbuf) { - struct v4l2_plane planes[VIDEO_MAX_PLANES]; - struct v4l2_buffer buf = { 0 }; - V4L2Buffer *avbuf; - struct pollfd pfd = { - .events = POLLIN | POLLRDNORM | POLLPRI | POLLOUT | POLLWRNORM, /* default blocking capture */ - .fd = ctx_to_m2mctx(ctx)->fd, + V4L2m2mContext * const m = ctx_to_m2mctx(ctx); + AVCodecContext * const avctx = m->avctx; + V4L2Buffer * avbuf; + const int is_mp = V4L2_TYPE_IS_MULTIPLANAR(ctx->type); + + struct v4l2_plane planes[VIDEO_MAX_PLANES] = {{0}}; + + struct v4l2_buffer buf = { + .type = ctx->type, + .memory = V4L2_MEMORY_MMAP, }; - int i, ret; - if (!V4L2_TYPE_IS_OUTPUT(ctx->type) && ctx->buffers) { - for (i = 0; i < ctx->num_buffers; i++) { - if (ctx->buffers[i].status == V4L2BUF_IN_DRIVER) - break; - } - if (i == ctx->num_buffers) - av_log(logger(ctx), AV_LOG_WARNING, "All capture buffers returned to " - "userspace. Increase num_capture_buffers " - "to prevent device deadlock or dropped " - "packets/frames.\n"); - } - - /* if we are draining and there are no more capture buffers queued in the driver we are done */ - if (!V4L2_TYPE_IS_OUTPUT(ctx->type) && ctx_to_m2mctx(ctx)->draining) { - for (i = 0; i < ctx->num_buffers; i++) { - /* capture buffer initialization happens during decode hence - * detection happens at runtime - */ - if (!ctx->buffers) - break; - - if (ctx->buffers[i].status == V4L2BUF_IN_DRIVER) - goto start; + *ppavbuf = NULL; + + if (ctx->flag_last) + return AVERROR(EPIPE); + + if (is_mp) { + buf.length = VIDEO_MAX_PLANES; + buf.m.planes = planes; + } + + while (ioctl(m->fd, VIDIOC_DQBUF, &buf) != 0) { + const int err = errno; + av_assert0(AVERROR(err) < 0); + if (err != EINTR) { + av_log(avctx, AV_LOG_DEBUG, "%s VIDIOC_DQBUF, errno (%s)\n", + ctx->name, av_err2str(AVERROR(err))); + + if (err == EPIPE) + ctx->flag_last = 1; + + return AVERROR(err); } - ctx->done = 1; - return NULL; } + atomic_fetch_sub(&ctx->q_count, 1); + + avbuf = (V4L2Buffer *)ctx->bufrefs[buf.index]->data; + ff_v4l2_buffer_set_avail(avbuf); + avbuf->buf = buf; + if (is_mp) { + memcpy(avbuf->planes, planes, sizeof(planes)); + avbuf->buf.m.planes = avbuf->planes; + } + // Done with any attached buffer + av_buffer_unref(&avbuf->ref_buf); -start: - if (V4L2_TYPE_IS_OUTPUT(ctx->type)) - pfd.events = POLLOUT | POLLWRNORM; - else { - /* no need to listen to requests for more input while draining */ - if (ctx_to_m2mctx(ctx)->draining) - pfd.events = POLLIN | POLLRDNORM | POLLPRI; + if (V4L2_TYPE_IS_CAPTURE(ctx->type)) { + // Zero length cap buffer return == EOS + if ((is_mp ? buf.m.planes[0].bytesused : buf.bytesused) == 0) { + av_log(avctx, AV_LOG_DEBUG, "Buffer empty - reQ\n"); + + // Must reQ so we don't leak + // May not matter if the next thing we do is release all the + // buffers but better to be tidy. + ff_v4l2_buffer_enqueue(avbuf); + + ctx->flag_last = 1; + return AVERROR(EPIPE); + } + +#ifdef V4L2_BUF_FLAG_LAST + // If flag_last set then this contains data but is the last frame + // so remember that but return OK + if ((buf.flags & V4L2_BUF_FLAG_LAST) != 0) + ctx->flag_last = 1; +#endif } - for (;;) { - ret = poll(&pfd, 1, timeout); - if (ret > 0) - break; - if (errno == EINTR) + *ppavbuf = avbuf; + return 0; +} + +/** + * handle resolution change event and end of stream event + * Expects to be called after the stream has stopped + * + * returns 1 if reinit was successful, negative if it failed + * returns 0 if reinit was not executed + */ +static int +get_event(V4L2m2mContext * const m) +{ + AVCodecContext * const avctx = m->avctx; + struct v4l2_event evt = { 0 }; + + while (ioctl(m->fd, VIDIOC_DQEVENT, &evt) != 0) { + const int rv = AVERROR(errno); + if (rv == AVERROR(EINTR)) continue; - return NULL; + if (rv == AVERROR(EAGAIN)) { + av_log(avctx, AV_LOG_WARNING, "V4L2 failed to get expected event - assume EOS\n"); + return AVERROR_EOF; + } + av_log(avctx, AV_LOG_ERROR, "V4L2 VIDIOC_DQEVENT: %s\n", av_err2str(rv)); + return rv; } - /* 0. handle errors */ - if (pfd.revents & POLLERR) { - /* if we are trying to get free buffers but none have been queued yet - no need to raise a warning */ - if (timeout == 0) { - for (i = 0; i < ctx->num_buffers; i++) { - if (ctx->buffers[i].status != V4L2BUF_AVAILABLE) - av_log(logger(ctx), AV_LOG_WARNING, "%s POLLERR\n", ctx->name); - } - } - else - av_log(logger(ctx), AV_LOG_WARNING, "%s POLLERR\n", ctx->name); + av_log(avctx, AV_LOG_DEBUG, "Dq event %d\n", evt.type); - return NULL; + if (evt.type == V4L2_EVENT_EOS) { + av_log(avctx, AV_LOG_TRACE, "V4L2 VIDIOC_EVENT_EOS\n"); + return AVERROR_EOF; } - /* 1. handle resolution changes */ - if (pfd.revents & POLLPRI) { - ret = v4l2_handle_event(ctx); - if (ret < 0) { - /* if re-init failed, abort */ - ctx->done = 1; - return NULL; - } - if (ret) { - /* if re-init was successful drop the buffer (if there was one) - * since we had to reconfigure capture (unmap all buffers) - */ - return NULL; + if (evt.type == V4L2_EVENT_SOURCE_CHANGE) + return do_source_change(m); + + return 0; +} + +static inline int +dq_ok(const V4L2Context * const c) +{ + return c->streamon && atomic_load(&c->q_count) != 0; +} + +// Get a buffer +// If output then just gets the buffer in the expected way +// If capture then runs the capture state m/c to deal with res change etc. +// If return value == 0 then *ppavbuf != NULL + +static int +get_qbuf(V4L2Context * const ctx, V4L2Buffer ** const ppavbuf, const int timeout) +{ + V4L2m2mContext * const m = ctx_to_m2mctx(ctx); + AVCodecContext * const avctx = m->avctx; + const int is_cap = V4L2_TYPE_IS_CAPTURE(ctx->type); + + const unsigned int poll_cap = (POLLIN | POLLRDNORM); + const unsigned int poll_out = (POLLOUT | POLLWRNORM); + const unsigned int poll_event = POLLPRI; + + *ppavbuf = NULL; + + for (;;) { + struct pollfd pfd = { + .fd = m->fd, + // If capture && stream not started then assume we are waiting for the initial event + .events = !is_cap ? poll_out : + !ff_v4l2_ctx_eos(ctx) && ctx->streamon ? poll_cap : + poll_event, + }; + int ret; + + if (ctx->done) { + av_log(avctx, AV_LOG_TRACE, "V4L2 %s already done\n", ctx->name); + return AVERROR_EOF; } - } - /* 2. dequeue the buffer */ - if (pfd.revents & (POLLIN | POLLRDNORM | POLLOUT | POLLWRNORM)) { + // If capture && timeout == -1 then also wait for rx buffer free + if (is_cap && timeout == -1 && dq_ok(&m->output) && !m->draining) + pfd.events |= poll_out; - if (!V4L2_TYPE_IS_OUTPUT(ctx->type)) { - /* there is a capture buffer ready */ - if (pfd.revents & (POLLIN | POLLRDNORM)) - goto dequeue; + // If nothing Qed all we will get is POLLERR - avoid that + if ((pfd.events == poll_out && !dq_ok(&m->output)) || + (pfd.events == poll_cap && !dq_ok(&m->capture)) || + (pfd.events == (poll_cap | poll_out) && !dq_ok(&m->capture) && !dq_ok(&m->output))) { + av_log(avctx, AV_LOG_TRACE, "V4L2 poll %s empty\n", ctx->name); + return AVERROR(ENOSPC); + } - /* the driver is ready to accept more input; instead of waiting for the capture - * buffer to complete we return NULL so input can proceed (we are single threaded) - */ - if (pfd.revents & (POLLOUT | POLLWRNORM)) - return NULL; + // Timeout kludged s.t. "forever" eventually gives up & produces logging + // If waiting for an event when we have seen a last_frame then we expect + // it to be ready already so force a short timeout + ret = poll(&pfd, 1, + ff_v4l2_ctx_eos(ctx) ? 10 : + timeout == -1 ? 3000 : timeout); + if (ret < 0) { + ret = AVERROR(errno); // Remember errno before logging etc. + av_assert0(ret < 0); } -dequeue: - memset(&buf, 0, sizeof(buf)); - buf.memory = V4L2_MEMORY_MMAP; - buf.type = ctx->type; - if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) { - memset(planes, 0, sizeof(planes)); - buf.length = VIDEO_MAX_PLANES; - buf.m.planes = planes; + av_log(avctx, AV_LOG_TRACE, "V4L2 poll %s ret=%d, timeout=%d, events=%#x, revents=%#x\n", + ctx->name, ret, timeout, pfd.events, pfd.revents); + + if (ret < 0) { + if (ret == AVERROR(EINTR)) + continue; + av_log(avctx, AV_LOG_ERROR, "V4L2 %s poll error %d (%s)\n", ctx->name, AVUNERROR(ret), av_err2str(ret)); + return ret; } - ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_DQBUF, &buf); - if (ret) { - if (errno != EAGAIN) { - ctx->done = 1; - if (errno != EPIPE) - av_log(logger(ctx), AV_LOG_DEBUG, "%s VIDIOC_DQBUF, errno (%s)\n", - ctx->name, av_err2str(AVERROR(errno))); + if (ret == 0) { + if (timeout == -1) + av_log(avctx, AV_LOG_ERROR, "V4L2 %s poll unexpected timeout: events=%#x\n", ctx->name, pfd.events); + if (ff_v4l2_ctx_eos(ctx)) { + av_log(avctx, AV_LOG_WARNING, "V4L2 %s poll event timeout\n", ctx->name); + ret = get_event(m); + if (ret < 0) { + ctx->done = 1; + return ret; + } } - return NULL; + return AVERROR(EAGAIN); + } + + if ((pfd.revents & POLLERR) != 0) { + av_log(avctx, AV_LOG_WARNING, "V4L2 %s POLLERR\n", ctx->name); + return AVERROR_UNKNOWN; } - if (ctx_to_m2mctx(ctx)->draining && !V4L2_TYPE_IS_OUTPUT(ctx->type)) { - int bytesused = V4L2_TYPE_IS_MULTIPLANAR(buf.type) ? - buf.m.planes[0].bytesused : buf.bytesused; - if (bytesused == 0) { + if ((pfd.revents & poll_event) != 0) { + ret = get_event(m); + if (ret < 0) { ctx->done = 1; - return NULL; + return ret; } -#ifdef V4L2_BUF_FLAG_LAST - if (buf.flags & V4L2_BUF_FLAG_LAST) - ctx->done = 1; -#endif + continue; + } + + if ((pfd.revents & poll_cap) != 0) { + ret = dq_buf(ctx, ppavbuf); + if (ret == AVERROR(EPIPE)) + continue; + return ret; } - avbuf = &ctx->buffers[buf.index]; - avbuf->status = V4L2BUF_AVAILABLE; - avbuf->buf = buf; - if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) { - memcpy(avbuf->planes, planes, sizeof(planes)); - avbuf->buf.m.planes = avbuf->planes; + if ((pfd.revents & poll_out) != 0) { + if (is_cap) + return AVERROR(EAGAIN); + return dq_buf(ctx, ppavbuf); } - return avbuf; + + av_log(avctx, AV_LOG_ERROR, "V4L2 poll unexpected events=%#x, revents=%#x\n", pfd.events, pfd.revents); + return AVERROR_UNKNOWN; } +} - return NULL; +// Clear out flags and timestamps that should should be set by the user +// Returns the passed avbuf +static V4L2Buffer * +clean_v4l2_buffer(V4L2Buffer * const avbuf) +{ + struct v4l2_buffer *const buf = &avbuf->buf; + + buf->flags = 0; + buf->field = V4L2_FIELD_ANY; + buf->timestamp = (struct timeval){0}; + buf->timecode = (struct v4l2_timecode){0}; + buf->sequence = 0; + + return avbuf; +} + +int +ff_v4l2_dq_all(V4L2Context *const ctx, int timeout1) +{ + V4L2Buffer * avbuf; + if (timeout1 != 0) { + int rv = get_qbuf(ctx, &avbuf, timeout1); + if (rv != 0) + return rv; + } + do { + get_qbuf(ctx, &avbuf, 0); + } while (avbuf); + return 0; } static V4L2Buffer* v4l2_getfree_v4l2buf(V4L2Context *ctx) { - int timeout = 0; /* return when no more buffers to dequeue */ int i; /* get back as many output buffers as possible */ - if (V4L2_TYPE_IS_OUTPUT(ctx->type)) { - do { - } while (v4l2_dequeue_v4l2buf(ctx, timeout)); - } + if (V4L2_TYPE_IS_OUTPUT(ctx->type)) + ff_v4l2_dq_all(ctx, 0); for (i = 0; i < ctx->num_buffers; i++) { - if (ctx->buffers[i].status == V4L2BUF_AVAILABLE) - return &ctx->buffers[i]; + V4L2Buffer * const avbuf = (V4L2Buffer *)ctx->bufrefs[i]->data; + if (avbuf->status == V4L2BUF_AVAILABLE) + return clean_v4l2_buffer(avbuf); } return NULL; @@ -438,25 +746,45 @@ static V4L2Buffer* v4l2_getfree_v4l2buf(V4L2Context *ctx) static int v4l2_release_buffers(V4L2Context* ctx) { - struct v4l2_requestbuffers req = { - .memory = V4L2_MEMORY_MMAP, - .type = ctx->type, - .count = 0, /* 0 -> unmaps buffers from the driver */ - }; - int i, j; + int i; + int ret = 0; + const int fd = ctx_to_m2mctx(ctx)->fd; - for (i = 0; i < ctx->num_buffers; i++) { - V4L2Buffer *buffer = &ctx->buffers[i]; + // Orphan any buffers in the wild + ff_weak_link_break(&ctx->wl_master); + + if (ctx->bufrefs) { + for (i = 0; i < ctx->num_buffers; i++) + av_buffer_unref(ctx->bufrefs + i); + } + + if (fd != -1) { + struct v4l2_requestbuffers req = { + .memory = V4L2_MEMORY_MMAP, + .type = ctx->type, + .count = 0, /* 0 -> unmap all buffers from the driver */ + }; + + while ((ret = ioctl(fd, VIDIOC_REQBUFS, &req)) == -1) { + if (errno == EINTR) + continue; - for (j = 0; j < buffer->num_planes; j++) { - struct V4L2Plane_info *p = &buffer->plane_info[j]; - if (p->mm_addr && p->length) - if (munmap(p->mm_addr, p->length) < 0) - av_log(logger(ctx), AV_LOG_ERROR, "%s unmap plane (%s))\n", ctx->name, av_err2str(AVERROR(errno))); + ret = AVERROR(errno); + + av_log(logger(ctx), AV_LOG_ERROR, "release all %s buffers (%s)\n", + ctx->name, av_err2str(AVERROR(errno))); + + if (ctx_to_m2mctx(ctx)->output_drm) + av_log(logger(ctx), AV_LOG_ERROR, + "Make sure the DRM client releases all FB/GEM objects before closing the codec (ie):\n" + "for all buffers: \n" + " 1. drmModeRmFB(..)\n" + " 2. drmIoctl(.., DRM_IOCTL_GEM_CLOSE,... )\n"); } } + atomic_store(&ctx->q_count, 0); - return ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_REQBUFS, &req); + return ret; } static inline int v4l2_try_raw_format(V4L2Context* ctx, enum AVPixelFormat pixfmt) @@ -485,6 +813,8 @@ static inline int v4l2_try_raw_format(V4L2Context* ctx, enum AVPixelFormat pixfm static int v4l2_get_raw_format(V4L2Context* ctx, enum AVPixelFormat *p) { + V4L2m2mContext* s = ctx_to_m2mctx(ctx); + V4L2m2mPriv *priv = s->avctx->priv_data; enum AVPixelFormat pixfmt = ctx->av_pix_fmt; struct v4l2_fmtdesc fdesc; int ret; @@ -498,21 +828,22 @@ static int v4l2_get_raw_format(V4L2Context* ctx, enum AVPixelFormat *p) return 0; } - for (;;) { + for (;; ++fdesc.index) { ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_ENUM_FMT, &fdesc); if (ret) return AVERROR(EINVAL); + if (priv->pix_fmt != AV_PIX_FMT_NONE) { + if (fdesc.pixelformat != ff_v4l2_format_avfmt_to_v4l2(priv->pix_fmt)) + continue; + } + pixfmt = ff_v4l2_format_v4l2_to_avfmt(fdesc.pixelformat, AV_CODEC_ID_RAWVIDEO); ret = v4l2_try_raw_format(ctx, pixfmt); - if (ret){ - fdesc.index++; - continue; + if (ret == 0) { + *p = pixfmt; + return 0; } - - *p = pixfmt; - - return 0; } return AVERROR(EINVAL); @@ -555,30 +886,99 @@ static int v4l2_get_coded_format(V4L2Context* ctx, uint32_t *p) * *****************************************************************************/ + +static void flush_all_buffers_status(V4L2Context* const ctx) +{ + int i; + + if (!ctx->bufrefs) + return; + + for (i = 0; i < ctx->num_buffers; ++i) { + struct V4L2Buffer * const buf = (struct V4L2Buffer *)ctx->bufrefs[i]->data; + if (buf->status == V4L2BUF_IN_DRIVER) + ff_v4l2_buffer_set_avail(buf); + } + atomic_store(&ctx->q_count, 0); +} + +static int stuff_all_buffers(AVCodecContext * avctx, V4L2Context* ctx) +{ + int i; + int rv; + + if (!ctx->bufrefs) { + rv = ff_v4l2_context_init(ctx); + if (rv) { + av_log(avctx, AV_LOG_ERROR, "can't request capture buffers\n"); + return rv; + } + } + + for (i = 0; i < ctx->num_buffers; ++i) { + struct V4L2Buffer * const buf = (struct V4L2Buffer *)ctx->bufrefs[i]->data; + if (buf->status == V4L2BUF_AVAILABLE) { + rv = ff_v4l2_buffer_enqueue(buf); + if (rv < 0) + return rv; + } + } + return 0; +} + int ff_v4l2_context_set_status(V4L2Context* ctx, uint32_t cmd) { int type = ctx->type; - int ret; + int ret = 0; + AVCodecContext * const avctx = logger(ctx); - ret = ioctl(ctx_to_m2mctx(ctx)->fd, cmd, &type); - if (ret < 0) - return AVERROR(errno); + // Avoid doing anything if there is nothing we can do + if (cmd == VIDIOC_STREAMOFF && !ctx_buffers_alloced(ctx) && !ctx->streamon) + return 0; - ctx->streamon = (cmd == VIDIOC_STREAMON); + ff_mutex_lock(&ctx->lock); - return 0; + if (cmd == VIDIOC_STREAMON && !V4L2_TYPE_IS_OUTPUT(ctx->type)) + stuff_all_buffers(avctx, ctx); + + if (ioctl(ctx_to_m2mctx(ctx)->fd, cmd, &type) < 0) { + const int err = errno; + av_log(avctx, AV_LOG_ERROR, "%s set status %d (%s) failed: err=%d\n", ctx->name, + cmd, (cmd == VIDIOC_STREAMON) ? "ON" : "OFF", err); + ret = AVERROR(err); + } + else + { + if (cmd == VIDIOC_STREAMOFF) + flush_all_buffers_status(ctx); + else + ctx->first_buf = 1; + + ctx->streamon = (cmd == VIDIOC_STREAMON); + av_log(avctx, AV_LOG_DEBUG, "%s set status %d (%s) OK\n", ctx->name, + cmd, (cmd == VIDIOC_STREAMON) ? "ON" : "OFF"); + } + + // Both stream off & on effectively clear flag_last + ctx->flag_last = 0; + + ff_mutex_unlock(&ctx->lock); + + return ret; } int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* frame) { - V4L2m2mContext *s = ctx_to_m2mctx(ctx); + V4L2m2mContext *const s = ctx_to_m2mctx(ctx); + AVCodecContext *const avctx = s->avctx; + int64_t track_ts; V4L2Buffer* avbuf; int ret; if (!frame) { ret = v4l2_stop_encode(ctx); if (ret) - av_log(logger(ctx), AV_LOG_ERROR, "%s stop_encode\n", ctx->name); + av_log(avctx, AV_LOG_ERROR, "%s stop_encode\n", ctx->name); s->draining= 1; return 0; } @@ -587,23 +987,29 @@ int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* frame) if (!avbuf) return AVERROR(EAGAIN); - ret = ff_v4l2_buffer_avframe_to_buf(frame, avbuf); + track_ts = xlat_pts_frame_in(avctx, &s->xlat, frame); + + ret = ff_v4l2_buffer_avframe_to_buf(frame, avbuf, track_ts); if (ret) return ret; return ff_v4l2_buffer_enqueue(avbuf); } -int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt) +int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt, + const void * extdata, size_t extlen) { V4L2m2mContext *s = ctx_to_m2mctx(ctx); + AVCodecContext *const avctx = s->avctx; V4L2Buffer* avbuf; int ret; + int64_t track_ts; if (!pkt->size) { ret = v4l2_stop_decode(ctx); + // Log but otherwise ignore stop failure if (ret) - av_log(logger(ctx), AV_LOG_ERROR, "%s stop_decode\n", ctx->name); + av_log(avctx, AV_LOG_ERROR, "%s stop_decode failed: err=%d\n", ctx->name, ret); s->draining = 1; return 0; } @@ -612,8 +1018,13 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt) if (!avbuf) return AVERROR(EAGAIN); - ret = ff_v4l2_buffer_avpkt_to_buf(pkt, avbuf); - if (ret) + track_ts = xlat_pts_pkt_in(avctx, &s->xlat, pkt); + + ret = ff_v4l2_buffer_avpkt_to_buf_ext(pkt, avbuf, extdata, extlen, track_ts); + if (ret == AVERROR(ENOMEM)) + av_log(logger(ctx), AV_LOG_ERROR, "Buffer overflow in %s: pkt->size=%d > buf->length=%d\n", + __func__, pkt->size, avbuf->planes[0].length); + else if (ret) return ret; return ff_v4l2_buffer_enqueue(avbuf); @@ -621,42 +1032,36 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt) int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* frame, int timeout) { + V4L2m2mContext *s = ctx_to_m2mctx(ctx); + AVCodecContext *const avctx = s->avctx; V4L2Buffer *avbuf; + int rv; - /* - * timeout=-1 blocks until: - * 1. decoded frame available - * 2. an input buffer is ready to be dequeued - */ - avbuf = v4l2_dequeue_v4l2buf(ctx, timeout); - if (!avbuf) { - if (ctx->done) - return AVERROR_EOF; - - return AVERROR(EAGAIN); - } + do { + if ((rv = get_qbuf(ctx, &avbuf, timeout)) != 0) + return rv; + if ((rv = ff_v4l2_buffer_buf_to_avframe(frame, avbuf)) != 0) + return rv; + } while (xlat_pts_frame_out(avctx, &s->xlat, frame) != 0); - return ff_v4l2_buffer_buf_to_avframe(frame, avbuf); + return 0; } -int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt) +int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt, int timeout) { + V4L2m2mContext *s = ctx_to_m2mctx(ctx); + AVCodecContext *const avctx = s->avctx; V4L2Buffer *avbuf; + int rv; - /* - * blocks until: - * 1. encoded packet available - * 2. an input buffer ready to be dequeued - */ - avbuf = v4l2_dequeue_v4l2buf(ctx, -1); - if (!avbuf) { - if (ctx->done) - return AVERROR_EOF; + do { + if ((rv = get_qbuf(ctx, &avbuf, timeout)) != 0) + return rv == AVERROR(ENOSPC) ? AVERROR(EAGAIN) : rv; // Caller not currently expecting ENOSPC + if ((rv = ff_v4l2_buffer_buf_to_avpkt(pkt, avbuf)) != 0) + return rv; + } while (xlat_pts_pkt_out(avctx, &s->xlat, pkt) != 0); - return AVERROR(EAGAIN); - } - - return ff_v4l2_buffer_buf_to_avpkt(pkt, avbuf); + return 0; } int ff_v4l2_context_get_format(V4L2Context* ctx, int probe) @@ -688,78 +1093,179 @@ int ff_v4l2_context_get_format(V4L2Context* ctx, int probe) int ff_v4l2_context_set_format(V4L2Context* ctx) { - return ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_S_FMT, &ctx->format); + int ret; + + ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_S_FMT, &ctx->format); + if (ret != 0) + return ret; + + // Check returned size against min size and if smaller have another go + // Only worry about plane[0] as this is meant to enforce limits for + // encoded streams where we might know a bit more about the shape + // than the driver + if (V4L2_TYPE_IS_MULTIPLANAR(ctx->format.type)) { + if (ctx->min_buf_size <= ctx->format.fmt.pix_mp.plane_fmt[0].sizeimage) + return 0; + ctx->format.fmt.pix_mp.plane_fmt[0].sizeimage = ctx->min_buf_size; + } + else { + if (ctx->min_buf_size <= ctx->format.fmt.pix.sizeimage) + return 0; + ctx->format.fmt.pix.sizeimage = ctx->min_buf_size; + } + + ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_S_FMT, &ctx->format); + return ret; } void ff_v4l2_context_release(V4L2Context* ctx) { int ret; - if (!ctx->buffers) + if (!ctx->bufrefs) return; ret = v4l2_release_buffers(ctx); if (ret) av_log(logger(ctx), AV_LOG_WARNING, "V4L2 failed to unmap the %s buffers\n", ctx->name); - av_freep(&ctx->buffers); + av_freep(&ctx->bufrefs); + av_buffer_unref(&ctx->frames_ref); + + ff_mutex_destroy(&ctx->lock); + pthread_cond_destroy(&ctx->cond); } -int ff_v4l2_context_init(V4L2Context* ctx) + +static int create_buffers(V4L2Context* const ctx, const unsigned int req_buffers, const enum v4l2_memory mem) { - V4L2m2mContext *s = ctx_to_m2mctx(ctx); + V4L2m2mContext * const s = ctx_to_m2mctx(ctx); struct v4l2_requestbuffers req; - int ret, i; - - if (!v4l2_type_supported(ctx)) { - av_log(logger(ctx), AV_LOG_ERROR, "type %i not supported\n", ctx->type); - return AVERROR_PATCHWELCOME; - } + int ret; + int i; - ret = ioctl(s->fd, VIDIOC_G_FMT, &ctx->format); - if (ret) - av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_G_FMT failed\n", ctx->name); + av_assert0(ctx->bufrefs == NULL); memset(&req, 0, sizeof(req)); - req.count = ctx->num_buffers; - req.memory = V4L2_MEMORY_MMAP; + req.count = req_buffers; + req.memory = mem; req.type = ctx->type; - ret = ioctl(s->fd, VIDIOC_REQBUFS, &req); - if (ret < 0) { - av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_REQBUFS failed: %s\n", ctx->name, strerror(errno)); - return AVERROR(errno); + while ((ret = ioctl(s->fd, VIDIOC_REQBUFS, &req)) == -1) { + if (errno != EINTR) { + ret = AVERROR(errno); + av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_REQBUFS failed: %s\n", ctx->name, av_err2str(ret)); + return ret; + } } ctx->num_buffers = req.count; - ctx->buffers = av_mallocz(ctx->num_buffers * sizeof(V4L2Buffer)); - if (!ctx->buffers) { + ctx->bufrefs = av_mallocz(ctx->num_buffers * sizeof(*ctx->bufrefs)); + if (!ctx->bufrefs) { av_log(logger(ctx), AV_LOG_ERROR, "%s malloc enomem\n", ctx->name); - return AVERROR(ENOMEM); + goto fail_release; } - for (i = 0; i < req.count; i++) { - ctx->buffers[i].context = ctx; - ret = ff_v4l2_buffer_initialize(&ctx->buffers[i], i); - if (ret < 0) { + ctx->wl_master = ff_weak_link_new(ctx); + if (!ctx->wl_master) { + ret = AVERROR(ENOMEM); + goto fail_release; + } + + for (i = 0; i < ctx->num_buffers; i++) { + ret = ff_v4l2_buffer_initialize(&ctx->bufrefs[i], i, ctx, mem); + if (ret) { av_log(logger(ctx), AV_LOG_ERROR, "%s buffer[%d] initialization (%s)\n", ctx->name, i, av_err2str(ret)); - goto error; + goto fail_release; } } av_log(logger(ctx), AV_LOG_DEBUG, "%s: %s %02d buffers initialized: %04ux%04u, sizeimage %08u, bytesperline %08u\n", ctx->name, V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? av_fourcc2str(ctx->format.fmt.pix_mp.pixelformat) : av_fourcc2str(ctx->format.fmt.pix.pixelformat), req.count, - v4l2_get_width(&ctx->format), - v4l2_get_height(&ctx->format), + ff_v4l2_get_format_width(&ctx->format), + ff_v4l2_get_format_height(&ctx->format), V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? ctx->format.fmt.pix_mp.plane_fmt[0].sizeimage : ctx->format.fmt.pix.sizeimage, V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? ctx->format.fmt.pix_mp.plane_fmt[0].bytesperline : ctx->format.fmt.pix.bytesperline); return 0; -error: +fail_release: v4l2_release_buffers(ctx); + av_freep(&ctx->bufrefs); + return ret; +} + +int ff_v4l2_context_init(V4L2Context* ctx) +{ + struct v4l2_queryctrl qctrl; + V4L2m2mContext * const s = ctx_to_m2mctx(ctx); + int ret; + + // It is not valid to reinit a context without a previous release + av_assert0(ctx->bufrefs == NULL); + + if (!v4l2_type_supported(ctx)) { + av_log(logger(ctx), AV_LOG_ERROR, "type %i not supported\n", ctx->type); + return AVERROR_PATCHWELCOME; + } + + ff_mutex_init(&ctx->lock, NULL); + pthread_cond_init(&ctx->cond, NULL); + atomic_init(&ctx->q_count, 0); + + if (s->output_drm) { + AVHWFramesContext *hwframes; + + ctx->frames_ref = av_hwframe_ctx_alloc(s->device_ref); + if (!ctx->frames_ref) { + ret = AVERROR(ENOMEM); + goto fail_unlock; + } - av_freep(&ctx->buffers); + hwframes = (AVHWFramesContext*)ctx->frames_ref->data; + hwframes->format = AV_PIX_FMT_DRM_PRIME; + hwframes->sw_format = ctx->av_pix_fmt; + hwframes->width = ctx->width != 0 ? ctx->width : s->avctx->width; + hwframes->height = ctx->height != 0 ? ctx->height : s->avctx->height; + ret = av_hwframe_ctx_init(ctx->frames_ref); + if (ret < 0) + goto fail_unref_hwframes; + } + + ret = ioctl(s->fd, VIDIOC_G_FMT, &ctx->format); + if (ret) { + ret = AVERROR(errno); + av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_G_FMT failed: %s\n", ctx->name, av_err2str(ret)); + goto fail_unref_hwframes; + } + + memset(&qctrl, 0, sizeof(qctrl)); + qctrl.id = V4L2_CID_MIN_BUFFERS_FOR_OUTPUT; + if (ioctl(s->fd, VIDIOC_QUERYCTRL, &qctrl) != 0) { + ret = AVERROR(errno); + if (ret != AVERROR(EINVAL)) { + av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_QUERCTRL failed: %s\n", ctx->name, av_err2str(ret)); + goto fail_unref_hwframes; + } + // Control unsupported - set default if wanted + if (ctx->num_buffers < 2) + ctx->num_buffers = 4; + } + else { + if (ctx->num_buffers < 2) + ctx->num_buffers = qctrl.minimum + 2; + ctx->num_buffers = av_clip(ctx->num_buffers, qctrl.minimum, qctrl.maximum); + } + + ret = create_buffers(ctx, ctx->num_buffers, ctx->buf_mem); + if (ret < 0) + goto fail_unref_hwframes; + + return 0; +fail_unref_hwframes: + av_buffer_unref(&ctx->frames_ref); +fail_unlock: + ff_mutex_destroy(&ctx->lock); return ret; } diff --git a/libavcodec/v4l2_context.h b/libavcodec/v4l2_context.h index 6f7460c89a..5afed3e6ec 100644 --- a/libavcodec/v4l2_context.h +++ b/libavcodec/v4l2_context.h @@ -32,6 +32,8 @@ #include "libavutil/rational.h" #include "codec_id.h" #include "packet.h" +#include "libavutil/buffer.h" +#include "libavutil/thread.h" #include "v4l2_buffers.h" typedef struct V4L2Context { @@ -71,28 +73,57 @@ typedef struct V4L2Context { */ int width, height; AVRational sample_aspect_ratio; + struct v4l2_rect selection; /** - * Indexed array of V4L2Buffers + * If the default size of buffer is less than this then try to + * set to this. */ - V4L2Buffer *buffers; + uint32_t min_buf_size; + + /** + * Indexed array of pointers to V4L2Buffers + */ + AVBufferRef **bufrefs; /** * Readonly after init. */ int num_buffers; + /** + * Buffer memory type V4L2_MEMORY_MMAP or V4L2_MEMORY_DMABUF + */ + enum v4l2_memory buf_mem; + /** * Whether the stream has been started (VIDIOC_STREAMON has been sent). */ int streamon; + /* 1st buffer after stream on */ + int first_buf; + /** * Either no more buffers available or an unrecoverable error was notified * by the V4L2 kernel driver: once set the context has to be exited. */ int done; + int flag_last; + + /** + * If NZ then when Qing frame/pkt use this rather than the + * "real" PTS + */ + uint64_t track_ts; + + AVBufferRef *frames_ref; + atomic_int q_count; + struct ff_weak_link_master *wl_master; + + AVMutex lock; + pthread_cond_t cond; } V4L2Context; /** @@ -148,7 +179,7 @@ int ff_v4l2_context_set_status(V4L2Context* ctx, uint32_t cmd); * @param[inout] pkt The AVPacket to dequeue to. * @return 0 in case of success, AVERROR(EAGAIN) if no buffer was ready, another negative error in case of error. */ -int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt); +int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt, int timeout); /** * Dequeues a buffer from a V4L2Context to an AVFrame. @@ -157,7 +188,10 @@ int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt); * @param[in] ctx The V4L2Context to dequeue from. * @param[inout] f The AVFrame to dequeue to. * @param[in] timeout The timeout for dequeue (-1 to block, 0 to return immediately, or milliseconds) + * * @return 0 in case of success, AVERROR(EAGAIN) if no buffer was ready, another negative error in case of error. + * AVERROR(ENOSPC) if no buffer availible to put + * the frame in */ int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* f, int timeout); @@ -171,7 +205,7 @@ int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* f, int timeout); * @param[in] pkt A pointer to an AVPacket. * @return 0 in case of success, a negative error otherwise. */ -int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt); +int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt, const void * ext_data, size_t ext_size); /** * Enqueues a buffer to a V4L2Context from an AVFrame @@ -184,4 +218,28 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt); */ int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* f); +/** + * Dequeue all buffers on this queue + * + * Used to recycle output buffers + * + * @param[in] ctx The V4L2Context to dequeue from. + * @param[in] timeout1 A timeout on dequeuing the 1st buffer, + * all others have a timeout of zero + * @return AVERROR(EAGAIN) if timeout1 non-zero then the return + * of the first dequeue operation, 0 otherwise. + */ +int ff_v4l2_dq_all(V4L2Context *const ctx, int timeout1); + +/** + * Returns the number of buffers currently queued + * + * @param[in] ctx The V4L2Context to evaluate + */ +static inline int +ff_v4l2_context_q_count(const V4L2Context* const ctx) +{ + return atomic_load(&ctx->q_count); +} + #endif // AVCODEC_V4L2_CONTEXT_H diff --git a/libavcodec/v4l2_m2m.c b/libavcodec/v4l2_m2m.c index 602efb7a16..28d9ed4988 100644 --- a/libavcodec/v4l2_m2m.c +++ b/libavcodec/v4l2_m2m.c @@ -34,6 +34,15 @@ #include "v4l2_context.h" #include "v4l2_fmt.h" #include "v4l2_m2m.h" +#include "v4l2_req_dmabufs.h" + +static void +xlat_init(xlat_track_t * const x) +{ + memset(x, 0, sizeof(*x)); + x->last_pts = AV_NOPTS_VALUE; +} + static inline int v4l2_splane_video(struct v4l2_capability *cap) { @@ -67,7 +76,9 @@ static int v4l2_prepare_contexts(V4L2m2mContext *s, int probe) s->capture.done = s->output.done = 0; s->capture.name = "capture"; + s->capture.buf_mem = s->db_ctl != NULL ? V4L2_MEMORY_DMABUF : V4L2_MEMORY_MMAP; s->output.name = "output"; + s->output.buf_mem = s->input_drm ? V4L2_MEMORY_DMABUF : V4L2_MEMORY_MMAP; atomic_init(&s->refcount, 0); sem_init(&s->refsync, 0, 0); @@ -84,18 +95,58 @@ static int v4l2_prepare_contexts(V4L2m2mContext *s, int probe) if (v4l2_mplane_video(&cap)) { s->capture.type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE; s->output.type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE; + s->output.format.type = s->output.type; return 0; } if (v4l2_splane_video(&cap)) { s->capture.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; s->output.type = V4L2_BUF_TYPE_VIDEO_OUTPUT; + s->output.format.type = s->output.type; return 0; } return AVERROR(EINVAL); } +static int check_size(AVCodecContext * const avctx, V4L2m2mContext * const s) +{ + struct v4l2_format fmt = {.type = s->output.type}; + int rv; + uint32_t pixfmt = ff_v4l2_format_avfmt_to_v4l2(avctx->pix_fmt); + unsigned int w; + unsigned int h; + + if (V4L2_TYPE_IS_MULTIPLANAR(fmt.type)) { + fmt.fmt.pix_mp.pixelformat = pixfmt; + fmt.fmt.pix_mp.width = avctx->width; + fmt.fmt.pix_mp.height = avctx->height; + } + else { + fmt.fmt.pix.pixelformat = pixfmt; + fmt.fmt.pix.width = avctx->width; + fmt.fmt.pix.height = avctx->height; + } + + rv = ioctl(s->fd, VIDIOC_TRY_FMT, &fmt); + + if (rv != 0) { + rv = AVERROR(errno); + av_log(avctx, AV_LOG_ERROR, "%s: Tryfmt failed: %s\n", __func__, av_err2str(rv)); + return rv; + } + + w = ff_v4l2_get_format_width(&fmt); + h = ff_v4l2_get_format_height(&fmt); + + if (w < avctx->width || h < avctx->height) { + av_log(avctx, AV_LOG_WARNING, "%s: Size check failed: asked for %dx%d, got: %dx%d\n", __func__, avctx->width, avctx->height, w, h); + return AVERROR(EINVAL); + } + + return 0; +} + static int v4l2_probe_driver(V4L2m2mContext *s) { void *log_ctx = s->avctx; @@ -115,6 +166,11 @@ static int v4l2_probe_driver(V4L2m2mContext *s) goto done; } + // If being given frames (encode) check that V4L2 can cope with the size + if (s->output.av_codec_id == AV_CODEC_ID_RAWVIDEO && + (ret = check_size(s->avctx, s)) != 0) + goto done; + ret = ff_v4l2_context_get_format(&s->capture, 1); if (ret) { av_log(log_ctx, AV_LOG_DEBUG, "v4l2 capture format not supported\n"); @@ -216,13 +272,7 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mContext *s) av_log(log_ctx, AV_LOG_ERROR, "capture VIDIOC_STREAMOFF\n"); /* 2. unmap the capture buffers (v4l2 and ffmpeg): - * we must wait for all references to be released before being allowed - * to queue new buffers. */ - av_log(log_ctx, AV_LOG_DEBUG, "waiting for user to release AVBufferRefs\n"); - if (atomic_load(&s->refcount)) - while(sem_wait(&s->refsync) == -1 && errno == EINTR); - ff_v4l2_context_release(&s->capture); /* 3. get the new capture format */ @@ -241,7 +291,6 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mContext *s) /* 5. complete reinit */ s->draining = 0; - s->reinit = 0; return 0; } @@ -258,6 +307,9 @@ static void v4l2_m2m_destroy_context(void *opaque, uint8_t *context) av_frame_unref(s->frame); av_frame_free(&s->frame); av_packet_unref(&s->buf_pkt); + av_freep(&s->extdata_data); + + av_log(s->avctx, AV_LOG_DEBUG, "V4L2 Context destroyed\n"); av_free(s); } @@ -270,6 +322,11 @@ int ff_v4l2_m2m_codec_end(V4L2m2mPriv *priv) if (!s) return 0; + av_log(s->avctx, AV_LOG_DEBUG, "V4L2 Codec end\n"); + + if (s->avctx && av_codec_is_decoder(s->avctx->codec)) + av_packet_unref(&s->buf_pkt); + if (s->fd >= 0) { ret = ff_v4l2_context_set_status(&s->output, VIDIOC_STREAMOFF); if (ret) @@ -282,7 +339,15 @@ int ff_v4l2_m2m_codec_end(V4L2m2mPriv *priv) ff_v4l2_context_release(&s->output); + dmabufs_ctl_unref(&s->db_ctl); + close(s->fd); + s->fd = -1; + s->self_ref = NULL; + // This is only called on avctx close so after this point we don't have that + // Crash sooner if we find we are using it (can still log with avctx = NULL) + s->avctx = NULL; + priv->context = NULL; av_buffer_unref(&priv->context_ref); return 0; @@ -326,35 +391,38 @@ int ff_v4l2_m2m_codec_init(V4L2m2mPriv *priv) return v4l2_configure_contexts(s); } -int ff_v4l2_m2m_create_context(V4L2m2mPriv *priv, V4L2m2mContext **s) +int ff_v4l2_m2m_create_context(V4L2m2mPriv *priv, V4L2m2mContext **pps) { - *s = av_mallocz(sizeof(V4L2m2mContext)); - if (!*s) + V4L2m2mContext * const s = av_mallocz(sizeof(V4L2m2mContext)); + + *pps = NULL; + if (!s) return AVERROR(ENOMEM); - priv->context_ref = av_buffer_create((uint8_t *) *s, sizeof(V4L2m2mContext), + priv->context_ref = av_buffer_create((uint8_t *)s, sizeof(*s), &v4l2_m2m_destroy_context, NULL, 0); if (!priv->context_ref) { - av_freep(s); + av_free(s); return AVERROR(ENOMEM); } /* assign the context */ - priv->context = *s; - (*s)->priv = priv; + priv->context = s; + s->priv = priv; /* populate it */ - priv->context->capture.num_buffers = priv->num_capture_buffers; - priv->context->output.num_buffers = priv->num_output_buffers; - priv->context->self_ref = priv->context_ref; - priv->context->fd = -1; + s->capture.num_buffers = priv->num_capture_buffers; + s->output.num_buffers = priv->num_output_buffers; + s->self_ref = priv->context_ref; + s->fd = -1; + xlat_init(&s->xlat); priv->context->frame = av_frame_alloc(); if (!priv->context->frame) { av_buffer_unref(&priv->context_ref); - *s = NULL; /* freed when unreferencing context_ref */ return AVERROR(ENOMEM); } + *pps = s; return 0; } diff --git a/libavcodec/v4l2_m2m.h b/libavcodec/v4l2_m2m.h index 04d86d7b92..a506e69d67 100644 --- a/libavcodec/v4l2_m2m.h +++ b/libavcodec/v4l2_m2m.h @@ -30,6 +30,7 @@ #include #include "libavcodec/avcodec.h" +#include "libavutil/pixfmt.h" #include "v4l2_context.h" #define container_of(ptr, type, member) ({ \ @@ -40,6 +41,38 @@ { "num_output_buffers", "Number of buffers in the output context",\ OFFSET(num_output_buffers), AV_OPT_TYPE_INT, { .i64 = 16 }, 2, INT_MAX, FLAGS } +#define FF_V4L2_M2M_TRACK_SIZE 128 +typedef struct V4L2m2mTrackEl { + int discard; // If we see this buffer its been flushed, so discard + int pending; + int pkt_size; + int64_t pts; + int64_t dts; + int64_t reordered_opaque; + int64_t pkt_pos; + int64_t pkt_duration; + int64_t track_pts; +} V4L2m2mTrackEl; + +typedef struct pts_stats_s +{ + void * logctx; + const char * name; // For debug + unsigned int last_count; + unsigned int last_interval; + int64_t last_pts; + int64_t guess; +} pts_stats_t; + +typedef struct xlat_track_s { + unsigned int track_no; + int64_t last_pts; // Last valid PTS decoded + int64_t last_opaque; + V4L2m2mTrackEl track_els[FF_V4L2_M2M_TRACK_SIZE]; +} xlat_track_t; + +struct dmabufs_ctl; + typedef struct V4L2m2mContext { char devname[PATH_MAX]; int fd; @@ -52,10 +85,10 @@ typedef struct V4L2m2mContext { AVCodecContext *avctx; sem_t refsync; atomic_uint refcount; - int reinit; /* null frame/packet received */ int draining; + int running; AVPacket buf_pkt; /* Reference to a frame. Only used during encoding */ @@ -66,6 +99,36 @@ typedef struct V4L2m2mContext { /* reference back to V4L2m2mPriv */ void *priv; + + AVBufferRef *device_ref; + + /* generate DRM frames */ + int output_drm; + + /* input frames are drmprime */ + int input_drm; + + /* Frame tracking */ + xlat_track_t xlat; + + pts_stats_t pts_stat; + + /* req pkt */ + int req_pkt; + int reorder_size; + + /* Ext data sent */ + int extdata_sent; + /* Ext data sent in packet - overrides ctx */ + void * extdata_data; + size_t extdata_size; + +#define FF_V4L2_QUIRK_REINIT_ALWAYS 1 +#define FF_V4L2_QUIRK_ENUM_FRAMESIZES_BROKEN 2 + /* Quirks */ + unsigned int quirks; + + struct dmabufs_ctl * db_ctl; } V4L2m2mContext; typedef struct V4L2m2mPriv { @@ -76,6 +139,8 @@ typedef struct V4L2m2mPriv { int num_output_buffers; int num_capture_buffers; + const char * dmabuf_alloc; + enum AVPixelFormat pix_fmt; } V4L2m2mPriv; /** @@ -129,4 +194,26 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mContext *ctx); */ int ff_v4l2_m2m_codec_full_reinit(V4L2m2mContext *ctx); + +static inline unsigned int ff_v4l2_get_format_width(const struct v4l2_format * const fmt) +{ + return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.width : fmt->fmt.pix.width; +} + +static inline unsigned int ff_v4l2_get_format_height(const struct v4l2_format * const fmt) +{ + return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.height : fmt->fmt.pix.height; +} + +static inline uint32_t ff_v4l2_get_format_pixelformat(const struct v4l2_format * const fmt) +{ + return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.pixelformat : fmt->fmt.pix.pixelformat; +} + +static inline int ff_v4l2_ctx_eos(const V4L2Context * const ctx) +{ + return ctx->flag_last; +} + + #endif /* AVCODEC_V4L2_M2M_H */ diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c index 4944d08511..11c83b2d66 100644 --- a/libavcodec/v4l2_m2m_dec.c +++ b/libavcodec/v4l2_m2m_dec.c @@ -21,8 +21,14 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#include "config_components.h" + #include #include + +#include "libavutil/avassert.h" +#include "libavutil/hwcontext.h" +#include "libavutil/hwcontext_drm.h" #include "libavutil/pixfmt.h" #include "libavutil/pixdesc.h" #include "libavutil/opt.h" @@ -30,75 +36,279 @@ #include "codec_internal.h" #include "libavcodec/decode.h" +#include "libavcodec/hwaccels.h" +#include "libavcodec/internal.h" +#include "libavcodec/hwconfig.h" + #include "v4l2_context.h" #include "v4l2_m2m.h" #include "v4l2_fmt.h" +#include "v4l2_req_dmabufs.h" -static int v4l2_try_start(AVCodecContext *avctx) +#if CONFIG_H264_DECODER +#include "h264_parse.h" +#endif +#if CONFIG_HEVC_DECODER +#include "hevc_parse.h" +#endif + +// Pick 64 for max last count - that is >1sec at 60fps +#define STATS_LAST_COUNT_MAX 64 +#define STATS_INTERVAL_MAX (1 << 30) + +#ifndef FF_API_BUFFER_SIZE_T +#define FF_API_BUFFER_SIZE_T 1 +#endif + +#define DUMP_FAILED_EXTRADATA 0 + +#if DUMP_FAILED_EXTRADATA +static inline char hex1(unsigned int x) { - V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context; - V4L2Context *const capture = &s->capture; - V4L2Context *const output = &s->output; - struct v4l2_selection selection = { 0 }; - int ret; + x &= 0xf; + return x <= 9 ? '0' + x : 'a' + x - 10; +} - /* 1. start the output process */ - if (!output->streamon) { - ret = ff_v4l2_context_set_status(output, VIDIOC_STREAMON); - if (ret < 0) { - av_log(avctx, AV_LOG_DEBUG, "VIDIOC_STREAMON on output context\n"); - return ret; - } +static inline char * hex2(char * s, unsigned int x) +{ + *s++ = hex1(x >> 4); + *s++ = hex1(x); + return s; +} + +static inline char * hex4(char * s, unsigned int x) +{ + s = hex2(s, x >> 8); + s = hex2(s, x); + return s; +} + +static inline char * dash2(char * s) +{ + *s++ = '-'; + *s++ = '-'; + return s; +} + +static void +data16(char * s, const unsigned int offset, const uint8_t * m, const size_t len) +{ + size_t i; + s = hex4(s, offset); + m += offset; + for (i = 0; i != 8; ++i) { + *s++ = ' '; + s = len > i + offset ? hex2(s, *m++) : dash2(s); } + *s++ = ' '; + *s++ = ':'; + for (; i != 16; ++i) { + *s++ = ' '; + s = len > i + offset ? hex2(s, *m++) : dash2(s); + } + *s++ = 0; +} - if (capture->streamon) - return 0; +static void +log_dump(void * logctx, int lvl, const void * const data, const size_t len) +{ + size_t i; + for (i = 0; i < len; i += 16) { + char buf[80]; + data16(buf, i, data, len); + av_log(logctx, lvl, "%s\n", buf); + } +} +#endif - /* 2. get the capture format */ - capture->format.type = capture->type; - ret = ioctl(s->fd, VIDIOC_G_FMT, &capture->format); - if (ret) { - av_log(avctx, AV_LOG_WARNING, "VIDIOC_G_FMT ioctl\n"); - return ret; +static unsigned int pts_stats_interval(const pts_stats_t * const stats) +{ + return stats->last_interval; +} + +static int64_t pts_stats_guess(const pts_stats_t * const stats, const int fail_bad_guess) +{ + if (stats->last_count <= 1) + return stats->last_pts; + if (stats->last_pts == AV_NOPTS_VALUE || + fail_bad_guess && (stats->last_interval == 0 || + stats->last_count >= STATS_LAST_COUNT_MAX)) + return AV_NOPTS_VALUE; + return stats->last_pts + (int64_t)(stats->last_count - 1) * (int64_t)stats->last_interval; +} + +static void pts_stats_add(pts_stats_t * const stats, int64_t pts) +{ + if (pts == AV_NOPTS_VALUE || pts == stats->last_pts) { + if (stats->last_count < STATS_LAST_COUNT_MAX) + ++stats->last_count; + return; } - /* 2.1 update the AVCodecContext */ - avctx->pix_fmt = ff_v4l2_format_v4l2_to_avfmt(capture->format.fmt.pix_mp.pixelformat, AV_CODEC_ID_RAWVIDEO); - capture->av_pix_fmt = avctx->pix_fmt; + if (stats->last_pts != AV_NOPTS_VALUE) { + const int64_t interval = pts - stats->last_pts; - /* 3. set the crop parameters */ - selection.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; - selection.r.height = avctx->coded_height; - selection.r.width = avctx->coded_width; - ret = ioctl(s->fd, VIDIOC_S_SELECTION, &selection); - if (!ret) { - ret = ioctl(s->fd, VIDIOC_G_SELECTION, &selection); - if (ret) { - av_log(avctx, AV_LOG_WARNING, "VIDIOC_G_SELECTION ioctl\n"); - } else { - av_log(avctx, AV_LOG_DEBUG, "crop output %dx%d\n", selection.r.width, selection.r.height); - /* update the size of the resulting frame */ - capture->height = selection.r.height; - capture->width = selection.r.width; + if (interval < 0 || interval >= STATS_INTERVAL_MAX || + stats->last_count >= STATS_LAST_COUNT_MAX) { + if (stats->last_interval != 0) + av_log(stats->logctx, AV_LOG_DEBUG, "%s: %s: Bad interval: %" PRId64 "/%d\n", + __func__, stats->name, interval, stats->last_count); + stats->last_interval = 0; + } + else { + const int64_t frame_time = interval / (int64_t)stats->last_count; + + if (frame_time != stats->last_interval) + av_log(stats->logctx, AV_LOG_DEBUG, "%s: %s: New interval: %u->%" PRId64 "/%d=%" PRId64 "\n", + __func__, stats->name, stats->last_interval, interval, stats->last_count, frame_time); + stats->last_interval = frame_time; } } - /* 4. init the capture context now that we have the capture format */ - if (!capture->buffers) { - ret = ff_v4l2_context_init(capture); - if (ret) { - av_log(avctx, AV_LOG_ERROR, "can't request capture buffers\n"); - return AVERROR(ENOMEM); + stats->last_pts = pts; + stats->last_count = 1; +} + +static void pts_stats_init(pts_stats_t * const stats, void * logctx, const char * name) +{ + *stats = (pts_stats_t){ + .logctx = logctx, + .name = name, + .last_count = 1, + .last_interval = 0, + .last_pts = AV_NOPTS_VALUE + }; +} + +// If abdata == NULL then this just counts space required +// Unpacks avcC if detected +static int +h264_xd_copy(const uint8_t * const extradata, const int extrasize, uint8_t * abdata) +{ + const uint8_t * const xdend = extradata + extrasize; + const uint8_t * p = extradata; + uint8_t * d = abdata; + unsigned int n; + unsigned int len; + const unsigned int hdrlen = 4; + unsigned int need_pps = 1; + + if (extrasize < 8) + return AVERROR(EINVAL); + + if (p[0] == 0 && p[1] == 0) { + // Assume a couple of leading zeros are good enough to indicate NAL + if (abdata) + memcpy(d, p, extrasize); + return extrasize; + } + + // avcC starts with a 1 + if (p[0] != 1) + return AVERROR(EINVAL); + + p += 5; + n = *p++ & 0x1f; + +doxps: + while (n--) { + if (xdend - p < 2) + return AVERROR(EINVAL); + len = (p[0] << 8) | p[1]; + p += 2; + if (xdend - p < (ptrdiff_t)len) + return AVERROR(EINVAL); + if (abdata) { + d[0] = 0; + d[1] = 0; + d[2] = 0; + d[3] = 1; + memcpy(d + 4, p, len); } + d += len + hdrlen; + p += len; + } + if (need_pps) { + need_pps = 0; + if (p >= xdend) + return AVERROR(EINVAL); + n = *p++; + goto doxps; } - /* 5. start the capture process */ - ret = ff_v4l2_context_set_status(capture, VIDIOC_STREAMON); - if (ret) { - av_log(avctx, AV_LOG_DEBUG, "VIDIOC_STREAMON, on capture context\n"); + return d - abdata; +} + +static int +copy_extradata(AVCodecContext * const avctx, + const void * const src_data, const int src_len, + void ** const pdst_data, size_t * const pdst_len) +{ + int len; + + *pdst_len = 0; + av_freep(pdst_data); + + if (avctx->codec_id == AV_CODEC_ID_H264) + len = h264_xd_copy(src_data, src_len, NULL); + else + len = src_len < 0 ? AVERROR(EINVAL) : src_len; + + // Zero length is OK but we want to stop - -ve is error val + if (len <= 0) + return len; + + if ((*pdst_data = av_malloc(len + AV_INPUT_BUFFER_PADDING_SIZE)) == NULL) + return AVERROR(ENOMEM); + + if (avctx->codec_id == AV_CODEC_ID_H264) + h264_xd_copy(src_data, src_len, *pdst_data); + else + memcpy(*pdst_data, src_data, len); + *pdst_len = len; + + return 0; +} + + + +static int check_output_streamon(AVCodecContext *const avctx, V4L2m2mContext *const s) +{ + int ret; + struct v4l2_decoder_cmd cmd = { + .cmd = V4L2_DEC_CMD_START, + .flags = 0, + }; + + if (s->output.streamon) + return 0; + + ret = ff_v4l2_context_set_status(&s->output, VIDIOC_STREAMON); + if (ret != 0) { + av_log(avctx, AV_LOG_ERROR, "VIDIOC_STREAMON on output context: %s\n", av_err2str(ret)); return ret; } + // STREAMON should do implicit START so this just for those that don't. + // It is optional so don't worry if it fails + if (ioctl(s->fd, VIDIOC_DECODER_CMD, &cmd) < 0) { + ret = AVERROR(errno); + av_log(avctx, AV_LOG_WARNING, "VIDIOC_DECODER_CMD start error: %s\n", av_err2str(ret)); + } + else { + av_log(avctx, AV_LOG_TRACE, "VIDIOC_DECODER_CMD start OK\n"); + } + return 0; +} + +static int v4l2_try_start(AVCodecContext *avctx) +{ + V4L2m2mContext * const s = ((V4L2m2mPriv*)avctx->priv_data)->context; + int ret; + + /* 1. start the output process */ + if ((ret = check_output_streamon(avctx, s)) != 0) + return ret; return 0; } @@ -133,62 +343,760 @@ static int v4l2_prepare_decoder(V4L2m2mContext *s) return 0; } -static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame) +static void +set_best_effort_pts(AVCodecContext *const avctx, + pts_stats_t * const ps, + AVFrame *const frame) +{ + pts_stats_add(ps, frame->pts); + + frame->best_effort_timestamp = pts_stats_guess(ps, 1); + // If we can't guess from just PTS - try DTS + if (frame->best_effort_timestamp == AV_NOPTS_VALUE) + frame->best_effort_timestamp = frame->pkt_dts; + + // We can't emulate what s/w does in a useful manner and using the + // "correct" answer seems to just confuse things. + frame->pkt_dts = frame->pts; + av_log(avctx, AV_LOG_TRACE, "Out PTS=%" PRId64 "/%"PRId64", DTS=%" PRId64 "\n", + frame->pts, frame->best_effort_timestamp, frame->pkt_dts); +} + +static void +xlat_flush(xlat_track_t * const x) +{ + unsigned int i; + // Do not reset track_no - this ensures that any frames left in the decoder + // that turn up later get discarded. + + x->last_pts = AV_NOPTS_VALUE; + x->last_opaque = 0; + for (i = 0; i != FF_V4L2_M2M_TRACK_SIZE; ++i) { + x->track_els[i].pending = 0; + x->track_els[i].discard = 1; + } +} + +static void +xlat_init(xlat_track_t * const x) +{ + memset(x, 0, sizeof(*x)); + xlat_flush(x); +} + +static int +xlat_pending(const V4L2m2mContext * const s) +{ + const xlat_track_t *const x = &s->xlat; + unsigned int n = x->track_no % FF_V4L2_M2M_TRACK_SIZE; + int i; + const int64_t now = pts_stats_guess(&s->pts_stat, 0); + int64_t first_dts = AV_NOPTS_VALUE; + int no_dts_count = 0; + unsigned int interval = pts_stats_interval(&s->pts_stat); + + for (i = 0; i < FF_V4L2_M2M_TRACK_SIZE; ++i, n = (n - 1) & (FF_V4L2_M2M_TRACK_SIZE - 1)) { + const V4L2m2mTrackEl * const t = x->track_els + n; + + if (first_dts == AV_NOPTS_VALUE) + if (t->dts == AV_NOPTS_VALUE) + ++no_dts_count; + else + first_dts = t->dts; + + // Discard only set on never-set or flushed entries + // So if we get here we've never successfully decoded a frame so allow + // more frames into the buffer before stalling + if (t->discard) + return i - 16; + + // If we've got this frame out then everything before this point + // must have entered the decoder + if (!t->pending) + break; + + // If we've never seen a pts all we can do is count frames + if (now == AV_NOPTS_VALUE) + continue; + + if (t->dts != AV_NOPTS_VALUE && now >= t->dts) + break; + } + + if (first_dts != AV_NOPTS_VALUE && now != AV_NOPTS_VALUE && interval != 0 && s->reorder_size != 0) { + const int iframes = (first_dts - now) / (int)interval; + const int t = iframes - s->reorder_size + no_dts_count; + +// av_log(s->avctx, AV_LOG_DEBUG, "Last:%"PRId64", Now:%"PRId64", First:%"PRId64", delta=%"PRId64", frames=%d, nodts=%d\n", +// x->last_dts, now, first_dts, first_dts - now, iframes, no_dts_count); + + if (iframes > 0 && iframes < 64 && t < i) { + return t; + } + } + + return i; +} + +static inline int stream_started(const V4L2m2mContext * const s) { + return s->output.streamon; +} + +#define NQ_OK 0 +#define NQ_Q_FULL 1 +#define NQ_SRC_EMPTY 2 +#define NQ_NONE 3 +#define NQ_DRAINING 4 +#define NQ_DEAD 5 + +#define TRY_DQ(nq_status) ((nq_status) >= NQ_OK && (nq_status) <= NQ_DRAINING) +#define RETRY_NQ(nq_status) ((nq_status) == NQ_Q_FULL || (nq_status) == NQ_NONE) + +// do_not_get If true then no new packet will be got but status will +// be set appropriately + +// AVERROR_EOF Flushing an already flushed stream +// -ve Error (all errors except EOF are unexpected) +// NQ_OK (0) OK +// NQ_Q_FULL Dst full (retry if we think V4L2 Q has space now) +// NQ_SRC_EMPTY Src empty (do not retry) +// NQ_NONE Enqueue not attempted +// NQ_DRAINING At EOS, dQ dest until EOS there too +// NQ_DEAD Not running (do not retry, do not attempt capture dQ) + +static int try_enqueue_src(AVCodecContext * const avctx, V4L2m2mContext * const s, const int do_not_get) { - V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context; - V4L2Context *const capture = &s->capture; - V4L2Context *const output = &s->output; int ret; - if (!s->buf_pkt.size) { - ret = ff_decode_get_packet(avctx, &s->buf_pkt); + // If we don't already have a coded packet - get a new one + // We will already have a coded pkt if the output Q was full last time we + // tried to Q it + if (!s->buf_pkt.size && !do_not_get) { + unsigned int i; + + for (i = 0; i < 256; ++i) { + uint8_t * side_data; + size_t side_size; + + ret = ff_decode_get_packet(avctx, &s->buf_pkt); + if (ret != 0) + break; + + // New extradata is the only side-data we undertand + side_data = av_packet_get_side_data(&s->buf_pkt, AV_PKT_DATA_NEW_EXTRADATA, &side_size); + if (side_data) { + av_log(avctx, AV_LOG_DEBUG, "New extradata\n"); + if ((ret = copy_extradata(avctx, side_data, (int)side_size, &s->extdata_data, &s->extdata_size)) < 0) + av_log(avctx, AV_LOG_WARNING, "Failed to copy new extra data: %s\n", av_err2str(ret)); + s->extdata_sent = 0; + } + + if (s->buf_pkt.size != 0) + break; + + if (s->buf_pkt.side_data_elems == 0) { + av_log(avctx, AV_LOG_WARNING, "Empty pkt from ff_decode_get_packet - treating as EOF\n"); + ret = AVERROR_EOF; + break; + } + + // Retry a side-data only pkt + } + // If i >= 256 something has gone wrong + if (i >= 256) { + av_log(avctx, AV_LOG_ERROR, "Too many side-data only packets\n"); + return AVERROR(EIO); + } + + if (ret == AVERROR(EAGAIN)) { + if (!stream_started(s)) { + av_log(avctx, AV_LOG_TRACE, "%s: receive_frame before 1st coded packet\n", __func__); + return NQ_DEAD; + } + return NQ_SRC_EMPTY; + } + + if (ret == AVERROR_EOF) { + // EOF - enter drain mode + av_log(avctx, AV_LOG_TRACE, "--- EOS req: ret=%d, size=%d, started=%d, drain=%d\n", + ret, s->buf_pkt.size, stream_started(s), s->draining); + if (!stream_started(s)) { + av_log(avctx, AV_LOG_DEBUG, "EOS on flushed stream\n"); + s->draining = 1; + s->capture.done = 1; + return AVERROR_EOF; + } + + if (!s->draining) { + // Calling enqueue with an empty pkt starts drain + av_assert0(s->buf_pkt.size == 0); + ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, NULL, 0); + if (ret) { + av_log(avctx, AV_LOG_ERROR, "Failed to start drain: ret=%d\n", ret); + return ret; + } + } + return NQ_DRAINING; + } + if (ret < 0) { - if (ret == AVERROR(EAGAIN)) - return ff_v4l2_context_dequeue_frame(capture, frame, 0); - else if (ret != AVERROR_EOF) - return ret; + av_log(avctx, AV_LOG_ERROR, "Failed to get coded packet: err=%d\n", ret); + return ret; } } - if (s->draining) - goto dequeue; + if (s->draining) { + if (s->buf_pkt.size) { + av_log(avctx, AV_LOG_WARNING, "Unexpected input whilst draining\n"); + av_packet_unref(&s->buf_pkt); + } + return NQ_DRAINING; + } + + if (!s->buf_pkt.size) + return NQ_NONE; - ret = ff_v4l2_context_enqueue_packet(output, &s->buf_pkt); - if (ret < 0 && ret != AVERROR(EAGAIN)) - goto fail; + if ((ret = check_output_streamon(avctx, s)) != 0) + return ret; - /* if EAGAIN don't unref packet and try to enqueue in the next iteration */ - if (ret != AVERROR(EAGAIN)) + if (s->extdata_sent) + ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, NULL, 0); + else + ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, s->extdata_data, s->extdata_size); + + if (ret == AVERROR(EAGAIN)) { + // Out of input buffers - keep packet + ret = NQ_Q_FULL; + } + else { + // In all other cases we are done with this packet av_packet_unref(&s->buf_pkt); + s->extdata_sent = 1; - if (!s->draining) { - ret = v4l2_try_start(avctx); if (ret) { - /* cant recover */ - if (ret != AVERROR(ENOMEM)) - ret = 0; - goto fail; + av_log(avctx, AV_LOG_ERROR, "Packet enqueue failure: err=%d\n", ret); + return ret; } } -dequeue: - return ff_v4l2_context_dequeue_frame(capture, frame, -1); -fail: - av_packet_unref(&s->buf_pkt); + // Start if we haven't + { + const int ret2 = v4l2_try_start(avctx); + if (ret2) { + av_log(avctx, AV_LOG_DEBUG, "Start failure: err=%d\n", ret2); + ret = (ret2 == AVERROR(ENOMEM)) ? ret2 : NQ_DEAD; + } + } + + return ret; +} + +static int qbuf_wait(AVCodecContext * const avctx, V4L2Context * const ctx) +{ + int rv = 0; + + ff_mutex_lock(&ctx->lock); + + while (atomic_load(&ctx->q_count) == 0 && ctx->streamon) { + if (pthread_cond_wait(&ctx->cond, &ctx->lock) != 0) { + rv = AVERROR(errno); + av_log(avctx, AV_LOG_ERROR, "Cond wait failure: %s\n", av_err2str(rv)); + break; + } + } + + ff_mutex_unlock(&ctx->lock); + return rv; +} + +static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame) +{ + V4L2m2mContext *const s = ((V4L2m2mPriv*)avctx->priv_data)->context; + int src_rv = -1; + int dst_rv = 1; // Non-zero (done), non-negative (error) number + unsigned int i = 0; + + do { + const int pending = xlat_pending(s); + const int prefer_dq = (pending > 4); + const int last_src_rv = src_rv; + + av_log(avctx, AV_LOG_TRACE, "Pending=%d, src_rv=%d, req_pkt=%d\n", pending, src_rv, s->req_pkt); + + // Enqueue another pkt for decode if + // (a) We don't have a lot of stuff in the buffer already OR + // (b) ... we (think we) do but we've failed to get a frame already OR + // (c) We've dequeued a lot of frames without asking for input + src_rv = try_enqueue_src(avctx, s, !(!prefer_dq || i != 0 || s->req_pkt > 2)); + + // If we got a frame last time or we've already tried to get a frame and + // we have nothing to enqueue then return now. rv will be AVERROR(EAGAIN) + // indicating that we want more input. + // This should mean that once decode starts we enter a stable state where + // we alternately ask for input and produce output + if ((i != 0 || s->req_pkt) && src_rv == NQ_SRC_EMPTY) + break; + + if (src_rv == NQ_Q_FULL && last_src_rv == NQ_Q_FULL) { + av_log(avctx, AV_LOG_WARNING, "Poll thinks src Q has space; none found\n"); + break; + } + + // Try to get a new frame if + // (a) we haven't already got one AND + // (b) enqueue returned a status indicating that decode should be attempted + if (dst_rv != 0 && TRY_DQ(src_rv)) { + // Pick a timeout depending on state + // The pending count isn't completely reliable so it is good enough + // hint that we want a frame but not good enough to require it in + // all cases; however if it has got > 31 that exceeds its margin of + // error so require a frame to prevent ridiculous levels of latency + const int t = + src_rv == NQ_Q_FULL ? -1 : + src_rv == NQ_DRAINING ? 300 : + prefer_dq ? (s->running && pending > 31 ? 100 : 5) : 0; + + // Dequeue frame will unref any previous contents of frame + // if it returns success so we don't need an explicit unref + // when discarding + // This returns AVERROR(EAGAIN) on timeout or if + // there is room in the input Q and timeout == -1 + dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, t); + + // Failure due to no buffer in Q? + if (dst_rv == AVERROR(ENOSPC)) { + // Wait & retry + if ((dst_rv = qbuf_wait(avctx, &s->capture)) == 0) { + dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, t); + } + } + + if (dst_rv == 0) { + set_best_effort_pts(avctx, &s->pts_stat, frame); + if (!s->running) { + s->running = 1; + av_log(avctx, AV_LOG_VERBOSE, "Decode running\n"); + } + } + + if (dst_rv == AVERROR(EAGAIN) && src_rv == NQ_DRAINING) { + av_log(avctx, AV_LOG_WARNING, "Timeout in drain - assume EOF"); + dst_rv = AVERROR_EOF; + s->capture.done = 1; + } + else if (dst_rv == AVERROR_EOF && (s->draining || s->capture.done)) + av_log(avctx, AV_LOG_DEBUG, "Dequeue EOF: draining=%d, cap.done=%d\n", + s->draining, s->capture.done); + else if (dst_rv && dst_rv != AVERROR(EAGAIN)) + av_log(avctx, AV_LOG_ERROR, "Packet dequeue failure: draining=%d, cap.done=%d, err=%d\n", + s->draining, s->capture.done, dst_rv); + } + + ++i; + if (i >= 256) { + av_log(avctx, AV_LOG_ERROR, "Unexpectedly large retry count: %d\n", i); + src_rv = AVERROR(EIO); + } + + // Continue trying to enqueue packets if either + // (a) we succeeded last time OR + // (b) we didn't ret a frame and we can retry the input + } while (src_rv == NQ_OK || (dst_rv == AVERROR(EAGAIN) && RETRY_NQ(src_rv))); + + // Ensure that the frame contains nothing if we aren't returning a frame + // (might happen when discarding) + if (dst_rv) + av_frame_unref(frame); + + // If we got a frame this time ask for a pkt next time + s->req_pkt = (dst_rv == 0) ? s->req_pkt + 1 : 0; + +#if 0 + if (dst_rv == 0) + { + static int z = 0; + if (++z > 50) { + av_log(avctx, AV_LOG_ERROR, "Streamoff and die?\n"); + ff_v4l2_context_set_status(&s->capture, VIDIOC_STREAMOFF); + return -1; + } + } +#endif + + return dst_rv == 0 ? 0 : + src_rv < 0 ? src_rv : + dst_rv < 0 ? dst_rv : + AVERROR(EAGAIN); +} + +#if 0 +#include +static int64_t us_time(void) +{ + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + return (int64_t)ts.tv_sec * 1000000 + ts.tv_nsec / 1000; +} + +static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame) +{ + int ret; + const int64_t now = us_time(); + int64_t done; + av_log(avctx, AV_LOG_TRACE, "<<< %s\n", __func__); + ret = v4l2_receive_frame2(avctx, frame); + done = us_time(); + av_log(avctx, AV_LOG_TRACE, ">>> %s: rx time=%" PRId64 ", rv=%d\n", __func__, done - now, ret); return ret; } +#endif + +static uint32_t +avprofile_to_v4l2(const enum AVCodecID codec_id, const int avprofile) +{ + switch (codec_id) { + case AV_CODEC_ID_H264: + switch (avprofile) { + case FF_PROFILE_H264_BASELINE: + return V4L2_MPEG_VIDEO_H264_PROFILE_BASELINE; + case FF_PROFILE_H264_CONSTRAINED_BASELINE: + return V4L2_MPEG_VIDEO_H264_PROFILE_CONSTRAINED_BASELINE; + case FF_PROFILE_H264_MAIN: + return V4L2_MPEG_VIDEO_H264_PROFILE_MAIN; + case FF_PROFILE_H264_EXTENDED: + return V4L2_MPEG_VIDEO_H264_PROFILE_EXTENDED; + case FF_PROFILE_H264_HIGH: + return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH; + case FF_PROFILE_H264_HIGH_10: + return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_10; + case FF_PROFILE_H264_HIGH_10_INTRA: + return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_10_INTRA; + case FF_PROFILE_H264_MULTIVIEW_HIGH: + case FF_PROFILE_H264_HIGH_422: + return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_422; + case FF_PROFILE_H264_HIGH_422_INTRA: + return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_422_INTRA; + case FF_PROFILE_H264_STEREO_HIGH: + return V4L2_MPEG_VIDEO_H264_PROFILE_STEREO_HIGH; + case FF_PROFILE_H264_HIGH_444_PREDICTIVE: + return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_444_PREDICTIVE; + case FF_PROFILE_H264_HIGH_444_INTRA: + return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_444_INTRA; + case FF_PROFILE_H264_CAVLC_444: + return V4L2_MPEG_VIDEO_H264_PROFILE_CAVLC_444_INTRA; + case FF_PROFILE_H264_HIGH_444: + default: + break; +// V4L2_MPEG_VIDEO_H264_PROFILE_SCALABLE_BASELINE = 12, +// V4L2_MPEG_VIDEO_H264_PROFILE_SCALABLE_HIGH = 13, +// V4L2_MPEG_VIDEO_H264_PROFILE_SCALABLE_HIGH_INTRA = 14, +// V4L2_MPEG_VIDEO_H264_PROFILE_MULTIVIEW_HIGH = 16, +// V4L2_MPEG_VIDEO_H264_PROFILE_CONSTRAINED_HIGH = 17, + } + break; + case AV_CODEC_ID_MPEG2VIDEO: + case AV_CODEC_ID_MPEG4: + case AV_CODEC_ID_VC1: + case AV_CODEC_ID_VP8: + case AV_CODEC_ID_VP9: + case AV_CODEC_ID_AV1: + // Most profiles are a simple number that matches the V4L2 enum + return avprofile; + default: + break; + } + return ~(uint32_t)0; +} + +// This check mirrors Chrome's profile check by testing to see if the profile +// exists as a possible value for the V4L2 profile control +static int +check_profile(AVCodecContext *const avctx, V4L2m2mContext *const s) +{ + struct v4l2_queryctrl query_ctrl; + struct v4l2_querymenu query_menu; + uint32_t profile_id; + + // An unset profile is almost certainly zero or -99 - do not reject + if (avctx->profile <= 0) { + av_log(avctx, AV_LOG_VERBOSE, "Profile %d <= 0 - check skipped\n", avctx->profile); + return 0; + } + + memset(&query_ctrl, 0, sizeof(query_ctrl)); + switch (avctx->codec_id) { + case AV_CODEC_ID_MPEG2VIDEO: + profile_id = V4L2_CID_MPEG_VIDEO_MPEG2_PROFILE; + break; + case AV_CODEC_ID_MPEG4: + profile_id = V4L2_CID_MPEG_VIDEO_MPEG4_PROFILE; + break; + case AV_CODEC_ID_H264: + profile_id = V4L2_CID_MPEG_VIDEO_H264_PROFILE; + break; + case AV_CODEC_ID_VP8: + profile_id = V4L2_CID_MPEG_VIDEO_VP8_PROFILE; + break; + case AV_CODEC_ID_VP9: + profile_id = V4L2_CID_MPEG_VIDEO_VP9_PROFILE; + break; +#ifdef V4L2_CID_MPEG_VIDEO_AV1_PROFILE + case AV_CODEC_ID_AV1: + profile_id = V4L2_CID_MPEG_VIDEO_AV1_PROFILE; + break; +#endif + default: + av_log(avctx, AV_LOG_VERBOSE, "Can't map profile for codec id %d; profile check skipped\n", avctx->codec_id); + return 0; + } + + query_ctrl = (struct v4l2_queryctrl){.id = profile_id}; + if (ioctl(s->fd, VIDIOC_QUERYCTRL, &query_ctrl) != 0) { + av_log(avctx, AV_LOG_VERBOSE, "Query profile ctrl (%#x) not supported: assume OK\n", query_ctrl.id); + } + else { + av_log(avctx, AV_LOG_DEBUG, "%s: Control supported: %#x\n", __func__, query_ctrl.id); + + query_menu = (struct v4l2_querymenu){ + .id = query_ctrl.id, + .index = avprofile_to_v4l2(avctx->codec_id, avctx->profile), + }; + + if (query_menu.index > query_ctrl.maximum || + query_menu.index < query_ctrl.minimum || + ioctl(s->fd, VIDIOC_QUERYMENU, &query_menu) != 0) { + return AVERROR(ENOENT); + } + } + + return 0; +}; + +static int +check_size(AVCodecContext * const avctx, V4L2m2mContext * const s) +{ + unsigned int i; + const uint32_t fcc = ff_v4l2_get_format_pixelformat(&s->capture.format); + const uint32_t w = avctx->coded_width; + const uint32_t h = avctx->coded_height; + + if (w == 0 || h == 0 || fcc == 0) { + av_log(avctx, AV_LOG_TRACE, "%s: Size %dx%d or fcc %s empty\n", __func__, w, h, av_fourcc2str(fcc)); + return 0; + } + if ((s->quirks & FF_V4L2_QUIRK_ENUM_FRAMESIZES_BROKEN) != 0) { + av_log(avctx, AV_LOG_TRACE, "%s: Skipped (quirk): Size %dx%d, fcc %s\n", __func__, w, h, av_fourcc2str(fcc)); + return 0; + } + + for (i = 0;; ++i) { + struct v4l2_frmsizeenum fs = { + .index = i, + .pixel_format = fcc, + }; + + while (ioctl(s->fd, VIDIOC_ENUM_FRAMESIZES, &fs) != 0) { + const int err = AVERROR(errno); + if (err == AVERROR(EINTR)) + continue; + if (i == 0 && err == AVERROR(ENOTTY)) { + av_log(avctx, AV_LOG_DEBUG, "Framesize enum not supported\n"); + return 0; + } + if (err != AVERROR(EINVAL)) { + av_log(avctx, AV_LOG_ERROR, "Failed to enum framesizes: %s", av_err2str(err)); + return err; + } + av_log(avctx, AV_LOG_WARNING, "Failed to find Size=%dx%d, fmt=%s in %u frame size enums\n", + w, h, av_fourcc2str(fcc), i); + return err; + } + + switch (fs.type) { + case V4L2_FRMSIZE_TYPE_DISCRETE: + av_log(avctx, AV_LOG_TRACE, "%s[%d]: Discrete: %dx%d\n", __func__, i, + fs.discrete.width,fs.discrete.height); + if (w == fs.discrete.width && h == fs.discrete.height) + return 0; + break; + case V4L2_FRMSIZE_TYPE_STEPWISE: + av_log(avctx, AV_LOG_TRACE, "%s[%d]: Stepwise: Min: %dx%d Max: %dx%d, Step: %dx%d\n", __func__, i, + fs.stepwise.min_width, fs.stepwise.min_height, + fs.stepwise.max_width, fs.stepwise.max_height, + fs.stepwise.step_width,fs.stepwise.step_height); + if (w >= fs.stepwise.min_width && w <= fs.stepwise.max_width && + h >= fs.stepwise.min_height && h <= fs.stepwise.max_height && + (w - fs.stepwise.min_width) % fs.stepwise.step_width == 0 && + (h - fs.stepwise.min_height) % fs.stepwise.step_height == 0) + return 0; + break; + case V4L2_FRMSIZE_TYPE_CONTINUOUS: + av_log(avctx, AV_LOG_TRACE, "%s[%d]: Continuous: Min: %dx%d Max: %dx%d, Step: %dx%d\n", __func__, i, + fs.stepwise.min_width, fs.stepwise.min_height, + fs.stepwise.max_width, fs.stepwise.max_height, + fs.stepwise.step_width,fs.stepwise.step_height); + if (w >= fs.stepwise.min_width && w <= fs.stepwise.max_width && + h >= fs.stepwise.min_height && h <= fs.stepwise.max_height) + return 0; + break; + default: + av_log(avctx, AV_LOG_ERROR, "Unexpected framesize enum: %d", fs.type); + return AVERROR(EINVAL); + } + } +} + +static int +get_quirks(AVCodecContext * const avctx, V4L2m2mContext * const s) +{ + struct v4l2_capability cap; + + memset(&cap, 0, sizeof(cap)); + while (ioctl(s->fd, VIDIOC_QUERYCAP, &cap) != 0) { + int err = errno; + if (err == EINTR) + continue; + av_log(avctx, AV_LOG_ERROR, "V4L2: Failed to get capabilities: %s\n", strerror(err)); + return AVERROR(err); + } + + // Could be made table driven if we have a few more but right now there + // seems no point + + // Meson (amlogic) always gives a resolution changed event after output + // streamon and userspace must (re)allocate capture buffers and streamon + // capture to clear the event even if the capture buffers were the right + // size in the first place. + if (strcmp(cap.driver, "meson-vdec") == 0) + s->quirks |= FF_V4L2_QUIRK_REINIT_ALWAYS | FF_V4L2_QUIRK_ENUM_FRAMESIZES_BROKEN; + + av_log(avctx, AV_LOG_DEBUG, "Driver '%s': Quirks=%#x\n", cap.driver, s->quirks); + return 0; +} + +// This heuristic is for H264 but use for everything +static uint32_t max_coded_size(const AVCodecContext * const avctx) +{ + uint32_t wxh = avctx->coded_width * avctx->coded_height; + uint32_t size; + + size = wxh * 3 / 2; + // H.264 Annex A table A-1 gives minCR which is either 2 or 4 + // unfortunately that doesn't yield an actually useful limit + // and it should be noted that frame 0 is special cased to allow + // a bigger number which really isn't helpful for us. So just pick + // frame_size / 2 + size /= 2; + // Add 64k to allow for any overheads and/or encoder hopefulness + // with small WxH + return size + (1 << 16); +} + +static void +parse_extradata(AVCodecContext * const avctx, V4L2m2mContext * const s) +{ + s->reorder_size = 0; + + if (!avctx->extradata || !avctx->extradata_size) + return; + + switch (avctx->codec_id) { +#if CONFIG_H264_DECODER + case AV_CODEC_ID_H264: + { + H264ParamSets ps = {{NULL}}; + int is_avc = 0; + int nal_length_size = 0; + int ret; + + ret = ff_h264_decode_extradata(avctx->extradata, avctx->extradata_size, + &ps, &is_avc, &nal_length_size, + avctx->err_recognition, avctx); + if (ret > 0) { + const SPS * sps = NULL; + unsigned int i; + for (i = 0; i != MAX_SPS_COUNT; ++i) { + if (ps.sps_list[i]) { + sps = (const SPS *)ps.sps_list[i]->data; + break; + } + } + if (sps) { + avctx->profile = ff_h264_get_profile(sps); + avctx->level = sps->level_idc; + s->reorder_size = sps->num_reorder_frames; + } + } + ff_h264_ps_uninit(&ps); + break; + } +#endif +#if CONFIG_HEVC_DECODER + case AV_CODEC_ID_HEVC: + { + HEVCParamSets ps = {{NULL}}; + HEVCSEI sei = {{{{0}}}}; + int is_nalff = 0; + int nal_length_size = 0; + int ret; + + ret = ff_hevc_decode_extradata(avctx->extradata, avctx->extradata_size, + &ps, &sei, &is_nalff, &nal_length_size, + avctx->err_recognition, 0, avctx); + if (ret > 0) { + const HEVCSPS * sps = NULL; + unsigned int i; + for (i = 0; i != HEVC_MAX_SPS_COUNT; ++i) { + if (ps.sps_list[i]) { + sps = (const HEVCSPS *)ps.sps_list[i]->data; + break; + } + } + if (sps) { + avctx->profile = sps->ptl.general_ptl.profile_idc; + avctx->level = sps->ptl.general_ptl.level_idc; + s->reorder_size = sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering; + } + } + ff_hevc_ps_uninit(&ps); + ff_hevc_reset_sei(&sei); + break; + } +#endif + default: + break; + } +} static av_cold int v4l2_decode_init(AVCodecContext *avctx) { V4L2Context *capture, *output; V4L2m2mContext *s; V4L2m2mPriv *priv = avctx->priv_data; + int gf_pix_fmt; int ret; + av_log(avctx, AV_LOG_TRACE, "<<< %s\n", __func__); + + if (avctx->codec_id == AV_CODEC_ID_H264) { + if (avctx->ticks_per_frame == 1) { + if(avctx->time_base.den < INT_MAX/2) { + avctx->time_base.den *= 2; + } else + avctx->time_base.num /= 2; + } + avctx->ticks_per_frame = 2; + } + ret = ff_v4l2_m2m_create_context(priv, &s); if (ret < 0) return ret; + parse_extradata(avctx, s); + + xlat_init(&s->xlat); + pts_stats_init(&s->pts_stat, avctx, "decoder"); + capture = &s->capture; output = &s->output; @@ -196,14 +1104,65 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx) * by the v4l2 driver; this event will trigger a full pipeline reconfig and * the proper values will be retrieved from the kernel driver. */ - output->height = capture->height = avctx->coded_height; - output->width = capture->width = avctx->coded_width; +// output->height = capture->height = avctx->coded_height; +// output->width = capture->width = avctx->coded_width; + output->height = capture->height = 0; + output->width = capture->width = 0; output->av_codec_id = avctx->codec_id; output->av_pix_fmt = AV_PIX_FMT_NONE; + output->min_buf_size = max_coded_size(avctx); capture->av_codec_id = AV_CODEC_ID_RAWVIDEO; capture->av_pix_fmt = avctx->pix_fmt; + capture->min_buf_size = 0; + + /* the client requests the codec to generate DRM frames: + * - data[0] will therefore point to the returned AVDRMFrameDescriptor + * check the ff_v4l2_buffer_to_avframe conversion function. + * - the DRM frame format is passed in the DRM frame descriptor layer. + * check the v4l2_get_drm_frame function. + */ + + avctx->sw_pix_fmt = avctx->pix_fmt; + gf_pix_fmt = ff_get_format(avctx, avctx->codec->pix_fmts); + av_log(avctx, AV_LOG_DEBUG, "avctx requested=%d (%s) %dx%d; get_format requested=%d (%s)\n", + avctx->pix_fmt, av_get_pix_fmt_name(avctx->pix_fmt), + avctx->coded_width, avctx->coded_height, + gf_pix_fmt, av_get_pix_fmt_name(gf_pix_fmt)); + + if (gf_pix_fmt == AV_PIX_FMT_DRM_PRIME || avctx->pix_fmt == AV_PIX_FMT_DRM_PRIME) { + avctx->pix_fmt = AV_PIX_FMT_DRM_PRIME; + s->output_drm = 1; + } + else { + capture->av_pix_fmt = gf_pix_fmt; + s->output_drm = 0; + } + + s->db_ctl = NULL; + if (priv->dmabuf_alloc != NULL && strcmp(priv->dmabuf_alloc, "v4l2") != 0) { + if (strcmp(priv->dmabuf_alloc, "cma") == 0) + s->db_ctl = dmabufs_ctl_new(); + else { + av_log(avctx, AV_LOG_ERROR, "Unknown dmabuf alloc method: '%s'\n", priv->dmabuf_alloc); + return AVERROR(EINVAL); + } + if (!s->db_ctl) { + av_log(avctx, AV_LOG_ERROR, "Can't open dmabuf provider '%s'\n", priv->dmabuf_alloc); + return AVERROR(ENOMEM); + } + } + + s->device_ref = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_DRM); + if (!s->device_ref) { + ret = AVERROR(ENOMEM); + return ret; + } + + ret = av_hwdevice_ctx_init(s->device_ref); + if (ret < 0) + return ret; s->avctx = avctx; ret = ff_v4l2_m2m_codec_init(priv); @@ -212,12 +1171,88 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx) return ret; } - return v4l2_prepare_decoder(s); + if (avctx->extradata && + (ret = copy_extradata(avctx, avctx->extradata, avctx->extradata_size, &s->extdata_data, &s->extdata_size)) != 0) { + av_log(avctx, AV_LOG_ERROR, "Failed to copy extradata from context: %s\n", av_err2str(ret)); +#if DUMP_FAILED_EXTRADATA + log_dump(avctx, AV_LOG_INFO, avctx->extradata, avctx->extradata_size); +#endif + return ret; + } + + if ((ret = v4l2_prepare_decoder(s)) < 0) + return ret; + + if ((ret = get_quirks(avctx, s)) != 0) + return ret; + + if ((ret = check_size(avctx, s)) != 0) + return ret; + + if ((ret = check_profile(avctx, s)) != 0) { + av_log(avctx, AV_LOG_WARNING, "Profile %d not supported by decode\n", avctx->profile); + return ret; + } + return 0; } static av_cold int v4l2_decode_close(AVCodecContext *avctx) { - return ff_v4l2_m2m_codec_end(avctx->priv_data); + int rv; + av_log(avctx, AV_LOG_TRACE, "<<< %s\n", __func__); + rv = ff_v4l2_m2m_codec_end(avctx->priv_data); + av_log(avctx, AV_LOG_TRACE, ">>> %s: rv=%d\n", __func__, rv); + return rv; +} + +static void v4l2_decode_flush(AVCodecContext *avctx) +{ + // An alternatve and more drastic form of flush is to simply do this: + // v4l2_decode_close(avctx); + // v4l2_decode_init(avctx); + // The downside is that this keeps a decoder open until all the frames + // associated with it have been returned. This is a bit wasteful on + // possibly limited h/w resources and fails on a Pi for this reason unless + // more GPU mem is allocated than is the default. + + V4L2m2mPriv * const priv = avctx->priv_data; + V4L2m2mContext * const s = priv->context; + V4L2Context * const output = &s->output; + V4L2Context * const capture = &s->capture; + + av_log(avctx, AV_LOG_TRACE, "<<< %s: streamon=%d\n", __func__, output->streamon); + + // Reflushing everything is benign, quick and avoids having to worry about + // states like EOS processing so don't try to optimize out (having got it + // wrong once) + + ff_v4l2_context_set_status(output, VIDIOC_STREAMOFF); + + // Clear any buffered input packet + av_packet_unref(&s->buf_pkt); + + // Clear a pending EOS + if (ff_v4l2_ctx_eos(capture)) { + // Arguably we could delay this but this is easy and doesn't require + // thought or extra vars + ff_v4l2_context_set_status(capture, VIDIOC_STREAMOFF); + ff_v4l2_context_set_status(capture, VIDIOC_STREAMON); + } + + // V4L2 makes no guarantees about whether decoded frames are flushed or not + // so mark all frames we are tracking to be discarded if they appear + xlat_flush(&s->xlat); + + // resend extradata + s->extdata_sent = 0; + // clear status vars + s->running = 0; + s->draining = 0; + output->done = 0; + capture->done = 0; + + // Stream on will occur when we actually submit a new frame + av_log(avctx, AV_LOG_TRACE, ">>> %s\n", __func__); } #define OFFSET(x) offsetof(V4L2m2mPriv, x) @@ -227,9 +1262,16 @@ static const AVOption options[] = { V4L_M2M_DEFAULT_OPTS, { "num_capture_buffers", "Number of buffers in the capture context", OFFSET(num_capture_buffers), AV_OPT_TYPE_INT, {.i64 = 20}, 2, INT_MAX, FLAGS }, + { "pixel_format", "Pixel format to be used by the decoder", OFFSET(pix_fmt), AV_OPT_TYPE_PIXEL_FMT, {.i64 = AV_PIX_FMT_NONE}, AV_PIX_FMT_NONE, AV_PIX_FMT_NB, FLAGS }, + { "dmabuf_alloc", "Dmabuf alloc method", OFFSET(dmabuf_alloc), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS }, { NULL}, }; +static const AVCodecHWConfigInternal *v4l2_m2m_hw_configs[] = { + HW_CONFIG_INTERNAL(DRM_PRIME), + NULL +}; + #define M2MDEC_CLASS(NAME) \ static const AVClass v4l2_m2m_ ## NAME ## _dec_class = { \ .class_name = #NAME "_v4l2m2m_decoder", \ @@ -250,11 +1292,17 @@ static const AVOption options[] = { .init = v4l2_decode_init, \ FF_CODEC_RECEIVE_FRAME_CB(v4l2_receive_frame), \ .close = v4l2_decode_close, \ + .flush = v4l2_decode_flush, \ .bsfs = bsf_name, \ .p.capabilities = AV_CODEC_CAP_HARDWARE | AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING, \ .caps_internal = FF_CODEC_CAP_NOT_INIT_THREADSAFE | \ FF_CODEC_CAP_SETS_PKT_DTS | FF_CODEC_CAP_INIT_CLEANUP, \ .p.wrapper_name = "v4l2m2m", \ + .p.pix_fmts = (const enum AVPixelFormat[]) { AV_PIX_FMT_DRM_PRIME, \ + AV_PIX_FMT_NV12, \ + AV_PIX_FMT_YUV420P, \ + AV_PIX_FMT_NONE}, \ + .hw_configs = v4l2_m2m_hw_configs, \ } M2MDEC(h264, "H.264", AV_CODEC_ID_H264, "h264_mp4toannexb"); diff --git a/libavcodec/v4l2_m2m_enc.c b/libavcodec/v4l2_m2m_enc.c index 9a0837ecf3..524e9424a5 100644 --- a/libavcodec/v4l2_m2m_enc.c +++ b/libavcodec/v4l2_m2m_enc.c @@ -24,6 +24,8 @@ #include #include #include +#include + #include "encode.h" #include "libavcodec/avcodec.h" #include "libavutil/pixdesc.h" @@ -38,6 +40,34 @@ #define MPEG_CID(x) V4L2_CID_MPEG_VIDEO_##x #define MPEG_VIDEO(x) V4L2_MPEG_VIDEO_##x +// P030 should be defined in drm_fourcc.h and hopefully will be sometime +// in the future but until then... +#ifndef DRM_FORMAT_P030 +#define DRM_FORMAT_P030 fourcc_code('P', '0', '3', '0') +#endif + +#ifndef DRM_FORMAT_NV15 +#define DRM_FORMAT_NV15 fourcc_code('N', 'V', '1', '5') +#endif + +#ifndef DRM_FORMAT_NV20 +#define DRM_FORMAT_NV20 fourcc_code('N', 'V', '2', '0') +#endif + +#ifndef V4L2_CID_CODEC_BASE +#define V4L2_CID_CODEC_BASE V4L2_CID_MPEG_BASE +#endif + +// V4L2_PIX_FMT_NV12_10_COL128 and V4L2_PIX_FMT_NV12_COL128 should be defined +// in videodev2.h hopefully will be sometime in the future but until then... +#ifndef V4L2_PIX_FMT_NV12_10_COL128 +#define V4L2_PIX_FMT_NV12_10_COL128 v4l2_fourcc('N', 'C', '3', '0') +#endif + +#ifndef V4L2_PIX_FMT_NV12_COL128 +#define V4L2_PIX_FMT_NV12_COL128 v4l2_fourcc('N', 'C', '1', '2') /* 12 Y/CbCr 4:2:0 128 pixel wide column */ +#endif + static inline void v4l2_set_timeperframe(V4L2m2mContext *s, unsigned int num, unsigned int den) { struct v4l2_streamparm parm = { 0 }; @@ -148,15 +178,14 @@ static inline int v4l2_mpeg4_profile_from_ff(int p) static int v4l2_check_b_frame_support(V4L2m2mContext *s) { if (s->avctx->max_b_frames) - av_log(s->avctx, AV_LOG_WARNING, "Encoder does not support b-frames yet\n"); + av_log(s->avctx, AV_LOG_WARNING, "Encoder does not support %d b-frames yet\n", s->avctx->max_b_frames); - v4l2_set_ext_ctrl(s, MPEG_CID(B_FRAMES), 0, "number of B-frames", 0); + v4l2_set_ext_ctrl(s, MPEG_CID(B_FRAMES), s->avctx->max_b_frames, "number of B-frames", 1); v4l2_get_ext_ctrl(s, MPEG_CID(B_FRAMES), &s->avctx->max_b_frames, "number of B-frames", 0); if (s->avctx->max_b_frames == 0) return 0; avpriv_report_missing_feature(s->avctx, "DTS/PTS calculation for V4L2 encoding"); - return AVERROR_PATCHWELCOME; } @@ -271,17 +300,208 @@ static int v4l2_prepare_encoder(V4L2m2mContext *s) return 0; } +static int avdrm_to_v4l2(struct v4l2_format * const format, const AVFrame * const frame) +{ + const AVDRMFrameDescriptor *const src = (const AVDRMFrameDescriptor *)frame->data[0]; + + const uint32_t drm_fmt = src->layers[0].format; + // Treat INVALID as LINEAR + const uint64_t mod = src->objects[0].format_modifier == DRM_FORMAT_MOD_INVALID ? + DRM_FORMAT_MOD_LINEAR : src->objects[0].format_modifier; + uint32_t pix_fmt = 0; + uint32_t w = 0; + uint32_t h = 0; + uint32_t bpl = src->layers[0].planes[0].pitch; + + // We really don't expect multiple layers + // All formats that we currently cope with are single object + + if (src->nb_layers != 1 || src->nb_objects != 1) + return AVERROR(EINVAL); + + switch (drm_fmt) { + case DRM_FORMAT_YUV420: + if (mod == DRM_FORMAT_MOD_LINEAR) { + if (src->layers[0].nb_planes != 3) + break; + pix_fmt = V4L2_PIX_FMT_YUV420; + h = src->layers[0].planes[1].offset / bpl; + w = bpl; + } + break; + + case DRM_FORMAT_NV12: + if (mod == DRM_FORMAT_MOD_LINEAR) { + if (src->layers[0].nb_planes != 2) + break; + pix_fmt = V4L2_PIX_FMT_NV12; + h = src->layers[0].planes[1].offset / bpl; + w = bpl; + } + else if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) { + if (src->layers[0].nb_planes != 2) + break; + pix_fmt = V4L2_PIX_FMT_NV12_COL128; + w = bpl; + h = src->layers[0].planes[1].offset / 128; + bpl = fourcc_mod_broadcom_param(mod); + } + break; + + case DRM_FORMAT_P030: + if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) { + if (src->layers[0].nb_planes != 2) + break; + pix_fmt = V4L2_PIX_FMT_NV12_10_COL128; + w = bpl / 2; // Matching lie to how we construct this + h = src->layers[0].planes[1].offset / 128; + bpl = fourcc_mod_broadcom_param(mod); + } + break; + + default: + break; + } + + if (!pix_fmt) + return AVERROR(EINVAL); + + if (V4L2_TYPE_IS_MULTIPLANAR(format->type)) { + struct v4l2_pix_format_mplane *const pix = &format->fmt.pix_mp; + + pix->width = w; + pix->height = h; + pix->pixelformat = pix_fmt; + pix->plane_fmt[0].bytesperline = bpl; + pix->num_planes = 1; + } + else { + struct v4l2_pix_format *const pix = &format->fmt.pix; + + pix->width = w; + pix->height = h; + pix->pixelformat = pix_fmt; + pix->bytesperline = bpl; + } + + return 0; +} + +// Do we have similar enough formats to be usable? +static int fmt_eq(const struct v4l2_format * const a, const struct v4l2_format * const b) +{ + if (a->type != b->type) + return 0; + + if (V4L2_TYPE_IS_MULTIPLANAR(a->type)) { + const struct v4l2_pix_format_mplane *const pa = &a->fmt.pix_mp; + const struct v4l2_pix_format_mplane *const pb = &b->fmt.pix_mp; + unsigned int i; + if (pa->pixelformat != pb->pixelformat || + pa->num_planes != pb->num_planes) + return 0; + for (i = 0; i != pa->num_planes; ++i) { + if (pa->plane_fmt[i].bytesperline != pb->plane_fmt[i].bytesperline) + return 0; + } + } + else { + const struct v4l2_pix_format *const pa = &a->fmt.pix; + const struct v4l2_pix_format *const pb = &b->fmt.pix; + if (pa->pixelformat != pb->pixelformat || + pa->bytesperline != pb->bytesperline) + return 0; + } + return 1; +} + +static inline int q_full(const V4L2Context *const output) +{ + return ff_v4l2_context_q_count(output) == output->num_buffers; +} + static int v4l2_send_frame(AVCodecContext *avctx, const AVFrame *frame) { V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context; V4L2Context *const output = &s->output; + int rv; + const int needs_slot = q_full(output); + + av_log(avctx, AV_LOG_TRACE, "<<< %s; needs_slot=%d\n", __func__, needs_slot); + + // Signal EOF if needed (doesn't need q slot) + if (!frame) { + av_log(avctx, AV_LOG_TRACE, "--- %s: EOS\n", __func__); + return ff_v4l2_context_enqueue_frame(output, frame); + } + + if ((rv = ff_v4l2_dq_all(output, needs_slot? 500 : 0)) != 0) { + // We should be able to return AVERROR(EAGAIN) to indicate buffer + // exhaustion, but ffmpeg currently treats that as fatal. + av_log(avctx, AV_LOG_WARNING, "Failed to get buffer for src frame: %s\n", av_err2str(rv)); + return rv; + } + + if (s->input_drm && !output->streamon) { + struct v4l2_format req_format = {.type = output->format.type}; + + // Set format when we first get a buffer + if ((rv = avdrm_to_v4l2(&req_format, frame)) != 0) { + av_log(avctx, AV_LOG_ERROR, "Failed to get V4L2 format from DRM_PRIME frame\n"); + return rv; + } + + ff_v4l2_context_release(output); + + output->format = req_format; + + if ((rv = ff_v4l2_context_set_format(output)) != 0) { + av_log(avctx, AV_LOG_ERROR, "Failed to set V4L2 format\n"); + return rv; + } + + if (!fmt_eq(&req_format, &output->format)) { + av_log(avctx, AV_LOG_ERROR, "Format mismatch after setup\n"); + return AVERROR(EINVAL); + } + + output->selection.top = frame->crop_top; + output->selection.left = frame->crop_left; + output->selection.width = av_frame_cropped_width(frame); + output->selection.height = av_frame_cropped_height(frame); + + if ((rv = ff_v4l2_context_init(output)) != 0) { + av_log(avctx, AV_LOG_ERROR, "Failed to (re)init context\n"); + return rv; + } + + { + struct v4l2_selection selection = { + .type = V4L2_BUF_TYPE_VIDEO_OUTPUT, + .target = V4L2_SEL_TGT_CROP, + .r = output->selection + }; + if (ioctl(s->fd, VIDIOC_S_SELECTION, &selection) != 0) { + av_log(avctx, AV_LOG_WARNING, "S_SELECTION (CROP) %dx%d @ %d,%d failed: %s\n", + selection.r.width, selection.r.height, selection.r.left, selection.r.top, + av_err2str(AVERROR(errno))); + } + av_log(avctx, AV_LOG_TRACE, "S_SELECTION (CROP) %dx%d @ %d,%d OK\n", + selection.r.width, selection.r.height, selection.r.left, selection.r.top); + } + } #ifdef V4L2_CID_MPEG_VIDEO_FORCE_KEY_FRAME - if (frame && frame->pict_type == AV_PICTURE_TYPE_I) + if (frame->pict_type == AV_PICTURE_TYPE_I) v4l2_set_ext_ctrl(s, MPEG_CID(FORCE_KEY_FRAME), 0, "force key frame", 1); #endif - return ff_v4l2_context_enqueue_frame(output, frame); + rv = ff_v4l2_context_enqueue_frame(output, frame); + if (rv) { + av_log(avctx, AV_LOG_ERROR, "Enqueue frame failed: %s\n", av_err2str(rv)); + } + + return rv; } static int v4l2_receive_packet(AVCodecContext *avctx, AVPacket *avpkt) @@ -292,6 +512,11 @@ static int v4l2_receive_packet(AVCodecContext *avctx, AVPacket *avpkt) AVFrame *frame = s->frame; int ret; + av_log(avctx, AV_LOG_TRACE, "<<< %s: qlen out %d cap %d\n", __func__, + ff_v4l2_context_q_count(output), ff_v4l2_context_q_count(capture)); + + ff_v4l2_dq_all(output, 0); + if (s->draining) goto dequeue; @@ -328,7 +553,115 @@ static int v4l2_receive_packet(AVCodecContext *avctx, AVPacket *avpkt) } dequeue: - return ff_v4l2_context_dequeue_packet(capture, avpkt); + // Dequeue a frame + for (;;) { + int t = q_full(output) ? -1 : s->draining ? 300 : 0; + int rv2; + + // If output is full wait for either a packet or output to become not full + ret = ff_v4l2_context_dequeue_packet(capture, avpkt, t); + + // If output was full retry packet dequeue + t = (ret != AVERROR(EAGAIN) || t != -1) ? 0 : 300; + rv2 = ff_v4l2_dq_all(output, t); + if (t == 0 || rv2 != 0) + break; + } + if (ret) + return (s->draining && ret == AVERROR(EAGAIN)) ? AVERROR_EOF : ret; + + if (capture->first_buf == 1) { + uint8_t * data; + const int len = avpkt->size; + + // 1st buffer after streamon should be SPS/PPS + capture->first_buf = 2; + + // Clear both possible stores so there is no chance of confusion + av_freep(&s->extdata_data); + s->extdata_size = 0; + av_freep(&avctx->extradata); + avctx->extradata_size = 0; + + if ((data = av_malloc(len + AV_INPUT_BUFFER_PADDING_SIZE)) == NULL) + goto fail_no_mem; + + memcpy(data, avpkt->data, len); + av_packet_unref(avpkt); + + // We need to copy the header, but keep local if not global + if ((avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) != 0) { + avctx->extradata = data; + avctx->extradata_size = len; + } + else { + s->extdata_data = data; + s->extdata_size = len; + } + + ret = ff_v4l2_context_dequeue_packet(capture, avpkt, 0); + ff_v4l2_dq_all(output, 0); + if (ret) + return ret; + } + + // First frame must be key so mark as such even if encoder forgot + if (capture->first_buf == 2) { + avpkt->flags |= AV_PKT_FLAG_KEY; + + // Add any extradata to the 1st packet we emit as we cannot create it at init + if (avctx->extradata_size > 0 && avctx->extradata) { + void * const side = av_packet_new_side_data(avpkt, + AV_PKT_DATA_NEW_EXTRADATA, + avctx->extradata_size); + if (!side) + goto fail_no_mem; + + memcpy(side, avctx->extradata, avctx->extradata_size); + } + } + + // Add SPS/PPS to the start of every key frame if non-global headers + if ((avpkt->flags & AV_PKT_FLAG_KEY) != 0 && s->extdata_size != 0) { + const size_t newlen = s->extdata_size + avpkt->size; + AVBufferRef * const buf = av_buffer_alloc(newlen + AV_INPUT_BUFFER_PADDING_SIZE); + + if (buf == NULL) + goto fail_no_mem; + + memcpy(buf->data, s->extdata_data, s->extdata_size); + memcpy(buf->data + s->extdata_size, avpkt->data, avpkt->size); + + av_buffer_unref(&avpkt->buf); + avpkt->buf = buf; + avpkt->data = buf->data; + avpkt->size = newlen; + } + else if (ff_v4l2_context_q_count(capture) < 2) { + // Avoid running out of capture buffers + // In most cases the buffers will be returned quickly in which case + // we don't copy and can use the v4l2 buffers directly but sometimes + // ffmpeg seems to hold onto all of them for a long time (.mkv + // creation?) so avoid deadlock in those cases. + AVBufferRef * const buf = av_buffer_alloc(avpkt->size + AV_INPUT_BUFFER_PADDING_SIZE); + if (buf == NULL) + goto fail_no_mem; + + memcpy(buf->data, avpkt->data, avpkt->size); + av_buffer_unref(&avpkt->buf); // Will recycle the V4L2 buffer + + avpkt->buf = buf; + avpkt->data = buf->data; + } + + capture->first_buf = 0; + return 0; + +fail_no_mem: + av_log(avctx, AV_LOG_ERROR, "Rx pkt failed: No memory\n"); + ret = AVERROR(ENOMEM); + av_packet_unref(avpkt); + return ret; } static av_cold int v4l2_encode_init(AVCodecContext *avctx) @@ -340,6 +673,8 @@ static av_cold int v4l2_encode_init(AVCodecContext *avctx) uint32_t v4l2_fmt_output; int ret; + av_log(avctx, AV_LOG_INFO, " <<< %s: fmt=%d/%d\n", __func__, avctx->pix_fmt, avctx->sw_pix_fmt); + ret = ff_v4l2_m2m_create_context(priv, &s); if (ret < 0) return ret; @@ -347,13 +682,17 @@ static av_cold int v4l2_encode_init(AVCodecContext *avctx) capture = &s->capture; output = &s->output; + s->input_drm = (avctx->pix_fmt == AV_PIX_FMT_DRM_PRIME); + /* common settings output/capture */ output->height = capture->height = avctx->height; output->width = capture->width = avctx->width; /* output context */ output->av_codec_id = AV_CODEC_ID_RAWVIDEO; - output->av_pix_fmt = avctx->pix_fmt; + output->av_pix_fmt = !s->input_drm ? avctx->pix_fmt : + avctx->sw_pix_fmt != AV_PIX_FMT_NONE ? avctx->sw_pix_fmt : + AV_PIX_FMT_YUV420P; /* capture context */ capture->av_codec_id = avctx->codec_id; @@ -372,7 +711,7 @@ static av_cold int v4l2_encode_init(AVCodecContext *avctx) v4l2_fmt_output = output->format.fmt.pix.pixelformat; pix_fmt_output = ff_v4l2_format_v4l2_to_avfmt(v4l2_fmt_output, AV_CODEC_ID_RAWVIDEO); - if (pix_fmt_output != avctx->pix_fmt) { + if (!s->input_drm && pix_fmt_output != avctx->pix_fmt) { const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt_output); av_log(avctx, AV_LOG_ERROR, "Encoder requires %s pixel format.\n", desc->name); return AVERROR(EINVAL); @@ -390,9 +729,10 @@ static av_cold int v4l2_encode_close(AVCodecContext *avctx) #define FLAGS AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM #define V4L_M2M_CAPTURE_OPTS \ - V4L_M2M_DEFAULT_OPTS,\ + { "num_output_buffers", "Number of buffers in the output context",\ + OFFSET(num_output_buffers), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, FLAGS },\ { "num_capture_buffers", "Number of buffers in the capture context", \ - OFFSET(num_capture_buffers), AV_OPT_TYPE_INT, {.i64 = 4 }, 4, INT_MAX, FLAGS } + OFFSET(num_capture_buffers), AV_OPT_TYPE_INT, {.i64 = 8 }, 8, INT_MAX, FLAGS } static const AVOption mpeg4_options[] = { V4L_M2M_CAPTURE_OPTS, diff --git a/libavcodec/v4l2_req_decode_q.c b/libavcodec/v4l2_req_decode_q.c new file mode 100644 index 0000000000..5b3fb958fa --- /dev/null +++ b/libavcodec/v4l2_req_decode_q.c @@ -0,0 +1,84 @@ +#include +#include +#include + +#include "v4l2_req_decode_q.h" + +int decode_q_in_q(const req_decode_ent * const d) +{ + return d->in_q; +} + +void decode_q_add(req_decode_q * const q, req_decode_ent * const d) +{ + pthread_mutex_lock(&q->q_lock); + if (!q->head) { + q->head = d; + q->tail = d; + d->prev = NULL; + } + else { + q->tail->next = d; + d->prev = q->tail; + q->tail = d; + } + d->next = NULL; + d->in_q = 1; + pthread_mutex_unlock(&q->q_lock); +} + +// Remove entry from Q - if head wake-up anything that was waiting +void decode_q_remove(req_decode_q * const q, req_decode_ent * const d) +{ + int try_signal = 0; + + if (!d->in_q) + return; + + pthread_mutex_lock(&q->q_lock); + if (d->prev) + d->prev->next = d->next; + else { + try_signal = 1; // Only need to signal if we were head + q->head = d->next; + } + + if (d->next) + d->next->prev = d->prev; + else + q->tail = d->prev; + + // Not strictly needed but makes debug easier + d->next = NULL; + d->prev = NULL; + d->in_q = 0; + pthread_mutex_unlock(&q->q_lock); + + if (try_signal) + pthread_cond_broadcast(&q->q_cond); +} + +void decode_q_wait(req_decode_q * const q, req_decode_ent * const d) +{ + pthread_mutex_lock(&q->q_lock); + + while (q->head != d) + pthread_cond_wait(&q->q_cond, &q->q_lock); + + pthread_mutex_unlock(&q->q_lock); +} + +void decode_q_uninit(req_decode_q * const q) +{ + pthread_mutex_destroy(&q->q_lock); + pthread_cond_destroy(&q->q_cond); +} + +void decode_q_init(req_decode_q * const q) +{ + memset(q, 0, sizeof(*q)); + pthread_mutex_init(&q->q_lock, NULL); + pthread_cond_init(&q->q_cond, NULL); +} + + diff --git a/libavcodec/v4l2_req_decode_q.h b/libavcodec/v4l2_req_decode_q.h new file mode 100644 index 0000000000..af7bbe1de4 --- /dev/null +++ b/libavcodec/v4l2_req_decode_q.h @@ -0,0 +1,25 @@ +#ifndef AVCODEC_V4L2_REQ_DECODE_Q_H +#define AVCODEC_V4L2_REQ_DECODE_Q_H + +typedef struct req_decode_ent { + struct req_decode_ent * next; + struct req_decode_ent * prev; + int in_q; +} req_decode_ent; + +typedef struct req_decode_q { + pthread_mutex_t q_lock; + pthread_cond_t q_cond; + req_decode_ent * head; + req_decode_ent * tail; +} req_decode_q; + +int decode_q_in_q(const req_decode_ent * const d); +void decode_q_add(req_decode_q * const q, req_decode_ent * const d); +void decode_q_remove(req_decode_q * const q, req_decode_ent * const d); +void decode_q_wait(req_decode_q * const q, req_decode_ent * const d); +void decode_q_uninit(req_decode_q * const q); +void decode_q_init(req_decode_q * const q); + +#endif + diff --git a/libavcodec/v4l2_req_devscan.c b/libavcodec/v4l2_req_devscan.c new file mode 100644 index 0000000000..cfa94d55c4 --- /dev/null +++ b/libavcodec/v4l2_req_devscan.c @@ -0,0 +1,449 @@ +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +#include "v4l2_req_devscan.h" +#include "v4l2_req_utils.h" + +struct decdev { + enum v4l2_buf_type src_type; + uint32_t src_fmt_v4l2; + const char * vname; + const char * mname; +}; + +struct devscan { + struct decdev env; + unsigned int dev_size; + unsigned int dev_count; + struct decdev *devs; +}; + +static int video_src_pixfmt_supported(uint32_t fmt) +{ + return 1; +} + +static void v4l2_setup_format(struct v4l2_format *format, unsigned int type, + unsigned int width, unsigned int height, + unsigned int pixelformat) +{ + unsigned int sizeimage; + + memset(format, 0, sizeof(*format)); + format->type = type; + + sizeimage = V4L2_TYPE_IS_OUTPUT(type) ? 4 * 1024 * 1024 : 0; + + if (V4L2_TYPE_IS_MULTIPLANAR(type)) { + format->fmt.pix_mp.width = width; + format->fmt.pix_mp.height = height; + format->fmt.pix_mp.plane_fmt[0].sizeimage = sizeimage; + format->fmt.pix_mp.pixelformat = pixelformat; + } else { + format->fmt.pix.width = width; + format->fmt.pix.height = height; + format->fmt.pix.sizeimage = sizeimage; + format->fmt.pix.pixelformat = pixelformat; + } +} + +static int v4l2_set_format(int video_fd, unsigned int type, unsigned int pixelformat, + unsigned int width, unsigned int height) +{ + struct v4l2_format format; + + v4l2_setup_format(&format, type, width, height, pixelformat); + + return ioctl(video_fd, VIDIOC_S_FMT, &format) ? -errno : 0; +} + +static int v4l2_query_capabilities(int video_fd, unsigned int *capabilities) +{ + struct v4l2_capability capability = { 0 }; + int rc; + + rc = ioctl(video_fd, VIDIOC_QUERYCAP, &capability); + if (rc < 0) + return -errno; + + if (capabilities != NULL) { + if ((capability.capabilities & V4L2_CAP_DEVICE_CAPS) != 0) + *capabilities = capability.device_caps; + else + *capabilities = capability.capabilities; + } + + return 0; +} + +static int devscan_add(struct devscan *const scan, + enum v4l2_buf_type src_type, + uint32_t src_fmt_v4l2, + const char * vname, + const char * mname) +{ + struct decdev *d; + + if (scan->dev_size <= scan->dev_count) { + unsigned int n = !scan->dev_size ? 4 : scan->dev_size * 2; + d = realloc(scan->devs, n * sizeof(*d)); + if (!d) + return -ENOMEM; + scan->devs = d; + scan->dev_size = n; + } + + d = scan->devs + scan->dev_count; + d->src_type = src_type; + d->src_fmt_v4l2 = src_fmt_v4l2; + d->vname = strdup(vname); + if (!d->vname) + return -ENOMEM; + d->mname = strdup(mname); + if (!d->mname) { + free((char *)d->vname); + return -ENOMEM; + } + ++scan->dev_count; + return 0; +} + +void devscan_delete(struct devscan **const pScan) +{ + unsigned int i; + struct devscan * const scan = *pScan; + + if (!scan) + return; + *pScan = NULL; + + for (i = 0; i < scan->dev_count; ++i) { + free((char*)scan->devs[i].mname); + free((char*)scan->devs[i].vname); + } + free(scan->devs); + free(scan); +} + +#define REQ_BUF_CAPS (\ + V4L2_BUF_CAP_SUPPORTS_DMABUF |\ + V4L2_BUF_CAP_SUPPORTS_REQUESTS |\ + V4L2_BUF_CAP_SUPPORTS_M2M_HOLD_CAPTURE_BUF) + +static void probe_formats(void * const dc, + struct devscan *const scan, + const int fd, + const unsigned int type_v4l2, + const char *const mpath, + const char *const vpath) +{ + unsigned int i; + for (i = 0;; ++i) { + struct v4l2_fmtdesc fmtdesc = { + .index = i, + .type = type_v4l2 + }; + struct v4l2_requestbuffers rbufs = { + .count = 0, + .type = type_v4l2, + .memory = V4L2_MEMORY_MMAP + }; + while (ioctl(fd, VIDIOC_ENUM_FMT, &fmtdesc)) { + if (errno == EINTR) + continue; + if (errno != EINVAL) + request_err(dc, "Enum[%d] failed for type=%d\n", i, type_v4l2); + return; + } + if (!video_src_pixfmt_supported(fmtdesc.pixelformat)) + continue; + + if (v4l2_set_format(fd, type_v4l2, fmtdesc.pixelformat, 720, 480)) { + request_debug(dc, "Set failed for type=%d, pf=%.4s\n", type_v4l2, (char*)&fmtdesc.pixelformat); + continue; + } + + while (ioctl(fd, VIDIOC_REQBUFS, &rbufs)) { + if (errno != EINTR) { + request_debug(dc, "%s: Reqbufs failed\n", vpath); + continue; + } + } + + if ((rbufs.capabilities & REQ_BUF_CAPS) != REQ_BUF_CAPS) { + request_debug(dc, "%s: Buf caps %#x insufficient\n", vpath, rbufs.capabilities); + continue; + } + + request_debug(dc, "Adding: %s,%s pix=%#x, type=%d\n", + mpath, vpath, fmtdesc.pixelformat, type_v4l2); + devscan_add(scan, type_v4l2, fmtdesc.pixelformat, vpath, mpath); + } +} + + +static int probe_video_device(void * const dc, + struct udev_device *const device, + struct devscan *const scan, + const char *const mpath) +{ + int ret; + unsigned int capabilities = 0; + int video_fd = -1; + + const char *path = udev_device_get_devnode(device); + if (!path) { + request_err(dc, "%s: get video device devnode failed\n", __func__); + ret = -EINVAL; + goto fail; + } + + video_fd = open(path, O_RDWR, 0); + if (video_fd == -1) { + ret = -errno; + request_err(dc, "%s: opening %s failed, %s (%d)\n", __func__, path, strerror(errno), errno); + goto fail; + } + + ret = v4l2_query_capabilities(video_fd, &capabilities); + if (ret < 0) { + request_err(dc, "%s: get video capability failed, %s (%d)\n", __func__, strerror(-ret), -ret); + goto fail; + } + + request_debug(dc, "%s: path=%s capabilities=%#x\n", __func__, path, capabilities); + + if (!(capabilities & V4L2_CAP_STREAMING)) { + request_debug(dc, "%s: missing required streaming capability\n", __func__); + ret = -EINVAL; + goto fail; + } + + if (!(capabilities & (V4L2_CAP_VIDEO_M2M_MPLANE | V4L2_CAP_VIDEO_M2M))) { + request_debug(dc, "%s: missing required mem2mem capability\n", __func__); + ret = -EINVAL; + goto fail; + } + + /* Should check capture formats too... */ + if ((capabilities & V4L2_CAP_VIDEO_M2M) != 0) + probe_formats(dc, scan, video_fd, V4L2_BUF_TYPE_VIDEO_OUTPUT, mpath, path); + if ((capabilities & V4L2_CAP_VIDEO_M2M_MPLANE) != 0) + probe_formats(dc, scan, video_fd, V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE, mpath, path); + + close(video_fd); + return 0; + +fail: + if (video_fd >= 0) + close(video_fd); + return ret; +} + +static int probe_media_device(void * const dc, + struct udev_device *const device, + struct devscan *const scan) +{ + int ret; + int rv; + struct media_device_info device_info = { 0 }; + struct media_v2_topology topology = { 0 }; + struct media_v2_interface *interfaces = NULL; + struct udev *udev = udev_device_get_udev(device); + struct udev_device *video_device; + dev_t devnum; + int media_fd = -1; + + const char *path = udev_device_get_devnode(device); + if (!path) { + request_err(dc, "%s: get media device devnode failed\n", __func__); + ret = -EINVAL; + goto fail; + } + + media_fd = open(path, O_RDWR, 0); + if (media_fd < 0) { + ret = -errno; + request_err(dc, "%s: opening %s failed, %s (%d)\n", __func__, path, strerror(-ret), -ret); + goto fail; + } + + rv = ioctl(media_fd, MEDIA_IOC_DEVICE_INFO, &device_info); + if (rv < 0) { + ret = -errno; + request_err(dc, "%s: get media device info failed, %s (%d)\n", __func__, strerror(-ret), -ret); + goto fail; + } + + rv = ioctl(media_fd, MEDIA_IOC_G_TOPOLOGY, &topology); + if (rv < 0) { + ret = -errno; + request_err(dc, "%s: get media topology failed, %s (%d)\n", __func__, strerror(-ret), -ret); + goto fail; + } + + if (topology.num_interfaces <= 0) { + request_err(dc, "%s: media device has no interfaces\n", __func__); + ret = -EINVAL; + goto fail; + } + + interfaces = calloc(topology.num_interfaces, sizeof(*interfaces)); + if (!interfaces) { + request_err(dc, "%s: allocating media interface struct failed\n", __func__); + ret = -ENOMEM; + goto fail; + } + + topology.ptr_interfaces = (__u64)(uintptr_t)interfaces; + rv = ioctl(media_fd, MEDIA_IOC_G_TOPOLOGY, &topology); + if (rv < 0) { + ret = -errno; + request_err(dc, "%s: get media topology failed, %s (%d)\n", __func__, strerror(-ret), -ret); + goto fail; + } + + for (int i = 0; i < topology.num_interfaces; i++) { + if (interfaces[i].intf_type != MEDIA_INTF_T_V4L_VIDEO) + continue; + + devnum = makedev(interfaces[i].devnode.major, interfaces[i].devnode.minor); + video_device = udev_device_new_from_devnum(udev, 'c', devnum); + if (!video_device) { + ret = -errno; + request_err(dc, "%s: video_device[%d]=%p\n", __func__, i, video_device); + continue; + } + + ret = probe_video_device(dc, video_device, scan, path); + udev_device_unref(video_device); + + if (ret != 0) + goto fail; + } + +fail: + free(interfaces); + if (media_fd != -1) + close(media_fd); + return ret; +} + +const char *decdev_media_path(const struct decdev *const dev) +{ + return !dev ? NULL : dev->mname; +} + +const char *decdev_video_path(const struct decdev *const dev) +{ + return !dev ? NULL : dev->vname; +} + +enum v4l2_buf_type decdev_src_type(const struct decdev *const dev) +{ + return !dev ? 0 : dev->src_type; +} + +uint32_t decdev_src_pixelformat(const struct decdev *const dev) +{ + return !dev ? 0 : dev->src_fmt_v4l2; +} + + +const struct decdev *devscan_find(struct devscan *const scan, + const uint32_t src_fmt_v4l2) +{ + unsigned int i; + + if (scan->env.mname && scan->env.vname) + return &scan->env; + + if (!src_fmt_v4l2) + return scan->dev_count ? scan->devs + 0 : NULL; + + for (i = 0; i != scan->dev_count; ++i) { + if (scan->devs[i].src_fmt_v4l2 == src_fmt_v4l2) + return scan->devs + i; + } + return NULL; +} + +int devscan_build(void * const dc, struct devscan **pscan) +{ + int ret; + struct udev *udev; + struct udev_enumerate *enumerate; + struct udev_list_entry *devices; + struct udev_list_entry *entry; + struct udev_device *device; + struct devscan * scan; + + *pscan = NULL; + + scan = calloc(1, sizeof(*scan)); + if (!scan) { + ret = -ENOMEM; + goto fail; + } + + scan->env.mname = getenv("LIBVA_V4L2_REQUEST_MEDIA_PATH"); + scan->env.vname = getenv("LIBVA_V4L2_REQUEST_VIDEO_PATH"); + if (scan->env.mname && scan->env.vname) { + request_info(dc, "Media/video device env overrides found: %s,%s\n", + scan->env.mname, scan->env.vname); + *pscan = scan; + return 0; + } + + udev = udev_new(); + if (!udev) { + request_err(dc, "%s: allocating udev context failed\n", __func__); + ret = -ENOMEM; + goto fail; + } + + enumerate = udev_enumerate_new(udev); + if (!enumerate) { + request_err(dc, "%s: allocating udev enumerator failed\n", __func__); + ret = -ENOMEM; + goto fail; + } + + udev_enumerate_add_match_subsystem(enumerate, "media"); + udev_enumerate_scan_devices(enumerate); + + devices = udev_enumerate_get_list_entry(enumerate); + udev_list_entry_foreach(entry, devices) { + const char *path = udev_list_entry_get_name(entry); + if (!path) + continue; + + device = udev_device_new_from_syspath(udev, path); + if (!device) + continue; + + probe_media_device(dc, device, scan); + udev_device_unref(device); + } + + udev_enumerate_unref(enumerate); + + *pscan = scan; + return 0; + +fail: + udev_unref(udev); + devscan_delete(&scan); + return ret; +} + diff --git a/libavcodec/v4l2_req_devscan.h b/libavcodec/v4l2_req_devscan.h new file mode 100644 index 0000000000..956d9234f1 --- /dev/null +++ b/libavcodec/v4l2_req_devscan.h @@ -0,0 +1,23 @@ +#ifndef _DEVSCAN_H_ +#define _DEVSCAN_H_ + +#include + +struct devscan; +struct decdev; +enum v4l2_buf_type; + +/* These return pointers to data in the devscan structure and so are vaild + * for the lifetime of that + */ +const char *decdev_media_path(const struct decdev *const dev); +const char *decdev_video_path(const struct decdev *const dev); +enum v4l2_buf_type decdev_src_type(const struct decdev *const dev); +uint32_t decdev_src_pixelformat(const struct decdev *const dev); + +const struct decdev *devscan_find(struct devscan *const scan, const uint32_t src_fmt_v4l2); + +int devscan_build(void * const dc, struct devscan **pscan); +void devscan_delete(struct devscan **const pScan); + +#endif diff --git a/libavcodec/v4l2_req_dmabufs.c b/libavcodec/v4l2_req_dmabufs.c new file mode 100644 index 0000000000..acc0366e76 --- /dev/null +++ b/libavcodec/v4l2_req_dmabufs.c @@ -0,0 +1,369 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "v4l2_req_dmabufs.h" +#include "v4l2_req_utils.h" + +#define DMABUF_NAME1 "/dev/dma_heap/linux,cma" +#define DMABUF_NAME2 "/dev/dma_heap/reserved" + +#define TRACE_ALLOC 0 + +struct dmabufs_ctl; +struct dmabuf_h; + +struct dmabuf_fns { + int (*buf_alloc)(struct dmabufs_ctl * dbsc, struct dmabuf_h * dh, size_t size); + void (*buf_free)(struct dmabuf_h * dh); + int (*ctl_new)(struct dmabufs_ctl * dbsc); + void (*ctl_free)(struct dmabufs_ctl * dbsc); +}; + +struct dmabufs_ctl { + atomic_int ref_count; + int fd; + size_t page_size; + void * v; + const struct dmabuf_fns * fns; +}; + +struct dmabuf_h { + int fd; + size_t size; + size_t len; + void * mapptr; + void * v; + const struct dmabuf_fns * fns; +}; + +#if TRACE_ALLOC +static unsigned int total_bufs = 0; +static size_t total_size = 0; +#endif + +struct dmabuf_h * dmabuf_import_mmap(void * mapptr, size_t size) +{ + struct dmabuf_h *dh; + + if (mapptr == MAP_FAILED) + return NULL; + + dh = malloc(sizeof(*dh)); + if (!dh) + return NULL; + + *dh = (struct dmabuf_h) { + .fd = -1, + .size = size, + .mapptr = mapptr + }; + + return dh; +} + +struct dmabuf_h * dmabuf_import(int fd, size_t size) +{ + struct dmabuf_h *dh; + + fd = dup(fd); + if (fd < 0 || size == 0) + return NULL; + + dh = malloc(sizeof(*dh)); + if (!dh) { + close(fd); + return NULL; + } + + *dh = (struct dmabuf_h) { + .fd = fd, + .size = size, + .mapptr = MAP_FAILED + }; + +#if TRACE_ALLOC + ++total_bufs; + total_size += dh->size; + request_log("%s: Import: %zd, total=%zd, bufs=%d\n", __func__, dh->size, total_size, total_bufs); +#endif + + return dh; +} + +struct dmabuf_h * dmabuf_realloc(struct dmabufs_ctl * dbsc, struct dmabuf_h * old, size_t size) +{ + struct dmabuf_h * dh; + if (old != NULL) { + if (old->size >= size) { + return old; + } + dmabuf_free(old); + } + + if (size == 0 || + (dh = malloc(sizeof(*dh))) == NULL) + return NULL; + + *dh = (struct dmabuf_h){ + .fd = -1, + .mapptr = MAP_FAILED, + .fns = dbsc->fns + }; + + if (dh->fns->buf_alloc(dbsc, dh, size) != 0) + goto fail; + + +#if TRACE_ALLOC + ++total_bufs; + total_size += dh->size; + request_log("%s: Alloc: %zd, total=%zd, bufs=%d\n", __func__, dh->size, total_size, total_bufs); +#endif + + return dh; + +fail: + free(dh); + return NULL; +} + +int dmabuf_sync(struct dmabuf_h * const dh, unsigned int flags) +{ + struct dma_buf_sync sync = { + .flags = flags + }; + if (dh->fd == -1) + return 0; + while (ioctl(dh->fd, DMA_BUF_IOCTL_SYNC, &sync) == -1) { + const int err = errno; + if (errno == EINTR) + continue; + request_log("%s: ioctl failed: flags=%#x\n", __func__, flags); + return -err; + } + return 0; +} + +int dmabuf_write_start(struct dmabuf_h * const dh) +{ + return dmabuf_sync(dh, DMA_BUF_SYNC_START | DMA_BUF_SYNC_WRITE); +} + +int dmabuf_write_end(struct dmabuf_h * const dh) +{ + return dmabuf_sync(dh, DMA_BUF_SYNC_END | DMA_BUF_SYNC_WRITE); +} + +int dmabuf_read_start(struct dmabuf_h * const dh) +{ + if (!dmabuf_map(dh)) + return -1; + return dmabuf_sync(dh, DMA_BUF_SYNC_START | DMA_BUF_SYNC_READ); +} + +int dmabuf_read_end(struct dmabuf_h * const dh) +{ + return dmabuf_sync(dh, DMA_BUF_SYNC_END | DMA_BUF_SYNC_READ); +} + + +void * dmabuf_map(struct dmabuf_h * const dh) +{ + if (!dh) + return NULL; + if (dh->mapptr != MAP_FAILED) + return dh->mapptr; + dh->mapptr = mmap(NULL, dh->size, + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, + dh->fd, 0); + if (dh->mapptr == MAP_FAILED) { + request_log("%s: Map failed\n", __func__); + return NULL; + } + return dh->mapptr; +} + +int dmabuf_fd(const struct dmabuf_h * const dh) +{ + if (!dh) + return -1; + return dh->fd; +} + +size_t dmabuf_size(const struct dmabuf_h * const dh) +{ + if (!dh) + return 0; + return dh->size; +} + +size_t dmabuf_len(const struct dmabuf_h * const dh) +{ + if (!dh) + return 0; + return dh->len; +} + +void dmabuf_len_set(struct dmabuf_h * const dh, const size_t len) +{ + dh->len = len; +} + +void dmabuf_free(struct dmabuf_h * dh) +{ + if (!dh) + return; + +#if TRACE_ALLOC + --total_bufs; + total_size -= dh->size; + request_log("%s: Free: %zd, total=%zd, bufs=%d\n", __func__, dh->size, total_size, total_bufs); +#endif + + dh->fns->buf_free(dh); + + if (dh->mapptr != MAP_FAILED && dh->mapptr != NULL) + munmap(dh->mapptr, dh->size); + if (dh->fd != -1) + while (close(dh->fd) == -1 && errno == EINTR) + /* loop */; + free(dh); +} + +static struct dmabufs_ctl * dmabufs_ctl_new2(const struct dmabuf_fns * const fns) +{ + struct dmabufs_ctl * dbsc = calloc(1, sizeof(*dbsc)); + + if (!dbsc) + return NULL; + + dbsc->fd = -1; + dbsc->fns = fns; + dbsc->page_size = (size_t)sysconf(_SC_PAGE_SIZE); + + if (fns->ctl_new(dbsc) != 0) + goto fail; + + return dbsc; + +fail: + free(dbsc); + return NULL; +} + +static void dmabufs_ctl_free(struct dmabufs_ctl * const dbsc) +{ + request_debug(NULL, "Free dmabuf ctl\n"); + + dbsc->fns->ctl_free(dbsc); + + free(dbsc); +} + +void dmabufs_ctl_unref(struct dmabufs_ctl ** const pDbsc) +{ + struct dmabufs_ctl * const dbsc = *pDbsc; + + if (!dbsc) + return; + *pDbsc = NULL; + + if (atomic_fetch_sub(&dbsc->ref_count, 1) != 0) + return; + + dmabufs_ctl_free(dbsc); +} + +struct dmabufs_ctl * dmabufs_ctl_ref(struct dmabufs_ctl * const dbsc) +{ + atomic_fetch_add(&dbsc->ref_count, 1); + return dbsc; +} + +//----------------------------------------------------------------------------- +// +// Alloc dmabuf via CMA + +static int ctl_cma_new(struct dmabufs_ctl * dbsc) +{ + while ((dbsc->fd = open(DMABUF_NAME1, O_RDWR)) == -1 && + errno == EINTR) + /* Loop */; + + if (dbsc->fd == -1) { + while ((dbsc->fd = open(DMABUF_NAME2, O_RDWR)) == -1 && + errno == EINTR) + /* Loop */; + if (dbsc->fd == -1) { + request_log("Unable to open either %s or %s\n", + DMABUF_NAME1, DMABUF_NAME2); + return -1; + } + } + return 0; +} + +static void ctl_cma_free(struct dmabufs_ctl * dbsc) +{ + if (dbsc->fd != -1) + while (close(dbsc->fd) == -1 && errno == EINTR) + /* loop */; + +} + +static int buf_cma_alloc(struct dmabufs_ctl * const dbsc, struct dmabuf_h * dh, size_t size) +{ + struct dma_heap_allocation_data data = { + .len = (size + dbsc->page_size - 1) & ~(dbsc->page_size - 1), + .fd = 0, + .fd_flags = O_RDWR, + .heap_flags = 0 + }; + + while (ioctl(dbsc->fd, DMA_HEAP_IOCTL_ALLOC, &data)) { + int err = errno; + request_log("Failed to alloc %" PRIu64 " from dma-heap(fd=%d): %d (%s)\n", + (uint64_t)data.len, + dbsc->fd, + err, + strerror(err)); + if (err == EINTR) + continue; + return -err; + } + + dh->fd = data.fd; + dh->size = (size_t)data.len; + return 0; +} + +static void buf_cma_free(struct dmabuf_h * dh) +{ + // Nothing needed +} + +static const struct dmabuf_fns dmabuf_cma_fns = { + .buf_alloc = buf_cma_alloc, + .buf_free = buf_cma_free, + .ctl_new = ctl_cma_new, + .ctl_free = ctl_cma_free, +}; + +struct dmabufs_ctl * dmabufs_ctl_new(void) +{ + request_debug(NULL, "Dmabufs using CMA\n");; + return dmabufs_ctl_new2(&dmabuf_cma_fns); +} + diff --git a/libavcodec/v4l2_req_dmabufs.h b/libavcodec/v4l2_req_dmabufs.h new file mode 100644 index 0000000000..381ba2708d --- /dev/null +++ b/libavcodec/v4l2_req_dmabufs.h @@ -0,0 +1,44 @@ +#ifndef DMABUFS_H +#define DMABUFS_H + +#include + +struct dmabufs_ctl; +struct dmabuf_h; + +struct dmabufs_ctl * dmabufs_ctl_new(void); +void dmabufs_ctl_unref(struct dmabufs_ctl ** const pdbsc); +struct dmabufs_ctl * dmabufs_ctl_ref(struct dmabufs_ctl * const dbsc); + +// Need not preserve old contents +// On NULL return old buffer is freed +struct dmabuf_h * dmabuf_realloc(struct dmabufs_ctl * dbsc, struct dmabuf_h *, size_t size); + +static inline struct dmabuf_h * dmabuf_alloc(struct dmabufs_ctl * dbsc, size_t size) { + return dmabuf_realloc(dbsc, NULL, size); +} +/* Create from existing fd - dups(fd) */ +struct dmabuf_h * dmabuf_import(int fd, size_t size); +/* Import an MMAP - return NULL if mapptr = MAP_FAIL */ +struct dmabuf_h * dmabuf_import_mmap(void * mapptr, size_t size); + +void * dmabuf_map(struct dmabuf_h * const dh); + +/* flags from linux/dmabuf.h DMA_BUF_SYNC_xxx */ +int dmabuf_sync(struct dmabuf_h * const dh, unsigned int flags); + +int dmabuf_write_start(struct dmabuf_h * const dh); +int dmabuf_write_end(struct dmabuf_h * const dh); +int dmabuf_read_start(struct dmabuf_h * const dh); +int dmabuf_read_end(struct dmabuf_h * const dh); + +int dmabuf_fd(const struct dmabuf_h * const dh); +/* Allocated size */ +size_t dmabuf_size(const struct dmabuf_h * const dh); +/* Bytes in use */ +size_t dmabuf_len(const struct dmabuf_h * const dh); +/* Set bytes in use */ +void dmabuf_len_set(struct dmabuf_h * const dh, const size_t len); +void dmabuf_free(struct dmabuf_h * dh); + +#endif diff --git a/libavcodec/v4l2_req_hevc_v1.c b/libavcodec/v4l2_req_hevc_v1.c new file mode 100644 index 0000000000..169b532832 --- /dev/null +++ b/libavcodec/v4l2_req_hevc_v1.c @@ -0,0 +1,3 @@ +#define HEVC_CTRLS_VERSION 1 +#include "v4l2_req_hevc_vx.c" + diff --git a/libavcodec/v4l2_req_hevc_v2.c b/libavcodec/v4l2_req_hevc_v2.c new file mode 100644 index 0000000000..42af98e156 --- /dev/null +++ b/libavcodec/v4l2_req_hevc_v2.c @@ -0,0 +1,3 @@ +#define HEVC_CTRLS_VERSION 2 +#include "v4l2_req_hevc_vx.c" + diff --git a/libavcodec/v4l2_req_hevc_v3.c b/libavcodec/v4l2_req_hevc_v3.c new file mode 100644 index 0000000000..dcc8d95632 --- /dev/null +++ b/libavcodec/v4l2_req_hevc_v3.c @@ -0,0 +1,3 @@ +#define HEVC_CTRLS_VERSION 3 +#include "v4l2_req_hevc_vx.c" + diff --git a/libavcodec/v4l2_req_hevc_v4.c b/libavcodec/v4l2_req_hevc_v4.c new file mode 100644 index 0000000000..c35579d8e0 --- /dev/null +++ b/libavcodec/v4l2_req_hevc_v4.c @@ -0,0 +1,3 @@ +#define HEVC_CTRLS_VERSION 4 +#include "v4l2_req_hevc_vx.c" + diff --git a/libavcodec/v4l2_req_hevc_vx.c b/libavcodec/v4l2_req_hevc_vx.c new file mode 100644 index 0000000000..e1bd5c6a1f --- /dev/null +++ b/libavcodec/v4l2_req_hevc_vx.c @@ -0,0 +1,1362 @@ +// File included by v4l2_req_hevc_v* - not compiled on its own + +#include "decode.h" +#include "hevcdec.h" +#include "hwconfig.h" +#include "internal.h" +#include "thread.h" + +#if HEVC_CTRLS_VERSION == 1 +#include "hevc-ctrls-v1.h" + +// Fixup renamed entries +#define V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT + +#elif HEVC_CTRLS_VERSION == 2 +#include "hevc-ctrls-v2.h" +#elif HEVC_CTRLS_VERSION == 3 +#include "hevc-ctrls-v3.h" +#elif HEVC_CTRLS_VERSION == 4 +#include +#if !defined(V4L2_CID_STATELESS_HEVC_SPS) +#include "hevc-ctrls-v4.h" +#endif +#else +#error Unknown HEVC_CTRLS_VERSION +#endif + +#ifndef V4L2_CID_STATELESS_HEVC_SPS +#define V4L2_CID_STATELESS_HEVC_SPS V4L2_CID_MPEG_VIDEO_HEVC_SPS +#define V4L2_CID_STATELESS_HEVC_PPS V4L2_CID_MPEG_VIDEO_HEVC_PPS +#define V4L2_CID_STATELESS_HEVC_SLICE_PARAMS V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS +#define V4L2_CID_STATELESS_HEVC_SCALING_MATRIX V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX +#define V4L2_CID_STATELESS_HEVC_DECODE_PARAMS V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS +#define V4L2_CID_STATELESS_HEVC_DECODE_MODE V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE +#define V4L2_CID_STATELESS_HEVC_START_CODE V4L2_CID_MPEG_VIDEO_HEVC_START_CODE + +#define V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED +#define V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_FRAME_BASED +#define V4L2_STATELESS_HEVC_START_CODE_NONE V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE +#define V4L2_STATELESS_HEVC_START_CODE_ANNEX_B V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B +#endif + +#include "v4l2_request_hevc.h" + +#include "libavutil/hwcontext_drm.h" + +#include +#include + +#include "v4l2_req_devscan.h" +#include "v4l2_req_dmabufs.h" +#include "v4l2_req_pollqueue.h" +#include "v4l2_req_media.h" +#include "v4l2_req_utils.h" + +// Attached to buf[0] in frame +// Pooled in hwcontext so generally create once - 1/frame +typedef struct V4L2MediaReqDescriptor { + AVDRMFrameDescriptor drm; + + // Media + uint64_t timestamp; + struct qent_dst * qe_dst; + + // Decode only - should be NULL by the time we emit the frame + struct req_decode_ent decode_ent; + + struct media_request *req; + struct qent_src *qe_src; + +#if HEVC_CTRLS_VERSION >= 2 + struct v4l2_ctrl_hevc_decode_params dec; +#endif + + size_t num_slices; + size_t alloced_slices; + struct v4l2_ctrl_hevc_slice_params * slice_params; + struct slice_info * slices; + + size_t num_offsets; + size_t alloced_offsets; + uint32_t *offsets; + +} V4L2MediaReqDescriptor; + +struct slice_info { + const uint8_t * ptr; + size_t len; // bytes + size_t n_offsets; +}; + +// Handy container for accumulating controls before setting +struct req_controls { + int has_scaling; + struct timeval tv; + struct v4l2_ctrl_hevc_sps sps; + struct v4l2_ctrl_hevc_pps pps; + struct v4l2_ctrl_hevc_scaling_matrix scaling_matrix; +}; + +//static uint8_t nalu_slice_start_code[] = { 0x00, 0x00, 0x01 }; + + +// Get an FFmpeg format from the v4l2 format +static enum AVPixelFormat pixel_format_from_format(const struct v4l2_format *const format) +{ + switch (V4L2_TYPE_IS_MULTIPLANAR(format->type) ? + format->fmt.pix_mp.pixelformat : format->fmt.pix.pixelformat) { + case V4L2_PIX_FMT_YUV420: + return AV_PIX_FMT_YUV420P; + case V4L2_PIX_FMT_NV12: + return AV_PIX_FMT_NV12; +#if CONFIG_SAND + case V4L2_PIX_FMT_NV12_COL128: + return AV_PIX_FMT_RPI4_8; + case V4L2_PIX_FMT_NV12_10_COL128: + return AV_PIX_FMT_RPI4_10; +#endif + default: + break; + } + return AV_PIX_FMT_NONE; +} + +static inline uint64_t frame_capture_dpb(const AVFrame * const frame) +{ + const V4L2MediaReqDescriptor *const rd = (V4L2MediaReqDescriptor *)frame->data[0]; + return rd->timestamp; +} + +static inline void frame_set_capture_dpb(AVFrame * const frame, const uint64_t dpb_stamp) +{ + V4L2MediaReqDescriptor *const rd = (V4L2MediaReqDescriptor *)frame->data[0]; + rd->timestamp = dpb_stamp; +} + +static void fill_pred_table(const HEVCContext *h, struct v4l2_hevc_pred_weight_table *table) +{ + int32_t luma_weight_denom, chroma_weight_denom; + const SliceHeader *sh = &h->sh; + + if (sh->slice_type == HEVC_SLICE_I || + (sh->slice_type == HEVC_SLICE_P && !h->ps.pps->weighted_pred_flag) || + (sh->slice_type == HEVC_SLICE_B && !h->ps.pps->weighted_bipred_flag)) + return; + + table->luma_log2_weight_denom = sh->luma_log2_weight_denom; + + if (h->ps.sps->chroma_format_idc) + table->delta_chroma_log2_weight_denom = sh->chroma_log2_weight_denom - sh->luma_log2_weight_denom; + + luma_weight_denom = (1 << sh->luma_log2_weight_denom); + chroma_weight_denom = (1 << sh->chroma_log2_weight_denom); + + for (int i = 0; i < 15 && i < sh->nb_refs[L0]; i++) { + table->delta_luma_weight_l0[i] = sh->luma_weight_l0[i] - luma_weight_denom; + table->luma_offset_l0[i] = sh->luma_offset_l0[i]; + table->delta_chroma_weight_l0[i][0] = sh->chroma_weight_l0[i][0] - chroma_weight_denom; + table->delta_chroma_weight_l0[i][1] = sh->chroma_weight_l0[i][1] - chroma_weight_denom; + table->chroma_offset_l0[i][0] = sh->chroma_offset_l0[i][0]; + table->chroma_offset_l0[i][1] = sh->chroma_offset_l0[i][1]; + } + + if (sh->slice_type != HEVC_SLICE_B) + return; + + for (int i = 0; i < 15 && i < sh->nb_refs[L1]; i++) { + table->delta_luma_weight_l1[i] = sh->luma_weight_l1[i] - luma_weight_denom; + table->luma_offset_l1[i] = sh->luma_offset_l1[i]; + table->delta_chroma_weight_l1[i][0] = sh->chroma_weight_l1[i][0] - chroma_weight_denom; + table->delta_chroma_weight_l1[i][1] = sh->chroma_weight_l1[i][1] - chroma_weight_denom; + table->chroma_offset_l1[i][0] = sh->chroma_offset_l1[i][0]; + table->chroma_offset_l1[i][1] = sh->chroma_offset_l1[i][1]; + } +} + +#if HEVC_CTRLS_VERSION <= 2 +static int find_frame_rps_type(const HEVCContext *h, uint64_t timestamp) +{ + const HEVCFrame *frame; + int i; + + for (i = 0; i < h->rps[ST_CURR_BEF].nb_refs; i++) { + frame = h->rps[ST_CURR_BEF].ref[i]; + if (frame && timestamp == frame_capture_dpb(frame->frame)) + return V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_BEFORE; + } + + for (i = 0; i < h->rps[ST_CURR_AFT].nb_refs; i++) { + frame = h->rps[ST_CURR_AFT].ref[i]; + if (frame && timestamp == frame_capture_dpb(frame->frame)) + return V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_AFTER; + } + + for (i = 0; i < h->rps[LT_CURR].nb_refs; i++) { + frame = h->rps[LT_CURR].ref[i]; + if (frame && timestamp == frame_capture_dpb(frame->frame)) + return V4L2_HEVC_DPB_ENTRY_RPS_LT_CURR; + } + + return 0; +} +#endif + +static unsigned int +get_ref_pic_index(const HEVCContext *h, const HEVCFrame *frame, + const struct v4l2_hevc_dpb_entry * const entries, + const unsigned int num_entries) +{ + uint64_t timestamp; + + if (!frame) + return 0; + + timestamp = frame_capture_dpb(frame->frame); + + for (unsigned int i = 0; i < num_entries; i++) { + if (entries[i].timestamp == timestamp) + return i; + } + + return 0; +} + +static const uint8_t * ptr_from_index(const uint8_t * b, unsigned int idx) +{ + unsigned int z = 0; + while (idx--) { + if (*b++ == 0) { + ++z; + if (z >= 2 && *b == 3) { + ++b; + z = 0; + } + } + else { + z = 0; + } + } + return b; +} + +static int slice_add(V4L2MediaReqDescriptor * const rd) +{ + if (rd->num_slices >= rd->alloced_slices) { + struct v4l2_ctrl_hevc_slice_params * p2; + struct slice_info * s2; + size_t n2 = rd->alloced_slices == 0 ? 8 : rd->alloced_slices * 2; + + p2 = av_realloc_array(rd->slice_params, n2, sizeof(*p2)); + if (p2 == NULL) + return AVERROR(ENOMEM); + rd->slice_params = p2; + + s2 = av_realloc_array(rd->slices, n2, sizeof(*s2)); + if (s2 == NULL) + return AVERROR(ENOMEM); + rd->slices = s2; + + rd->alloced_slices = n2; + } + ++rd->num_slices; + return 0; +} + +static int offsets_add(V4L2MediaReqDescriptor *const rd, const size_t n, const unsigned * const offsets) +{ + if (rd->num_offsets + n > rd->alloced_offsets) { + size_t n2 = rd->alloced_slices == 0 ? 128 : rd->alloced_slices * 2; + void * p2; + while (rd->num_offsets + n > n2) + n2 *= 2; + if ((p2 = av_realloc_array(rd->offsets, n2, sizeof(*rd->offsets))) == NULL) + return AVERROR(ENOMEM); + rd->offsets = p2; + rd->alloced_offsets = n2; + } + for (size_t i = 0; i != n; ++i) + rd->offsets[rd->num_offsets++] = offsets[i] - 1; + return 0; +} + +static unsigned int +fill_dpb_entries(const HEVCContext * const h, struct v4l2_hevc_dpb_entry * const entries) +{ + unsigned int i; + unsigned int n = 0; + const HEVCFrame * const pic = h->ref; + + for (i = 0; i < FF_ARRAY_ELEMS(h->DPB); i++) { + const HEVCFrame * const frame = &h->DPB[i]; + if (frame != pic && (frame->flags & (HEVC_FRAME_FLAG_LONG_REF | HEVC_FRAME_FLAG_SHORT_REF))) { + struct v4l2_hevc_dpb_entry * const entry = entries + n++; + + entry->timestamp = frame_capture_dpb(frame->frame); +#if HEVC_CTRLS_VERSION <= 2 + entry->rps = find_frame_rps_type(h, entry->timestamp); +#else + entry->flags = (frame->flags & HEVC_FRAME_FLAG_LONG_REF) == 0 ? 0 : + V4L2_HEVC_DPB_ENTRY_LONG_TERM_REFERENCE; +#endif + entry->field_pic = frame->frame->interlaced_frame; + +#if HEVC_CTRLS_VERSION <= 3 + /* TODO: Interleaved: Get the POC for each field. */ + entry->pic_order_cnt[0] = frame->poc; + entry->pic_order_cnt[1] = frame->poc; +#else + entry->pic_order_cnt_val = frame->poc; +#endif + } + } + return n; +} + +static void fill_slice_params(const HEVCContext * const h, +#if HEVC_CTRLS_VERSION >= 2 + const struct v4l2_ctrl_hevc_decode_params * const dec, +#endif + struct v4l2_ctrl_hevc_slice_params *slice_params, + uint32_t bit_size, uint32_t bit_offset) +{ + const SliceHeader * const sh = &h->sh; +#if HEVC_CTRLS_VERSION >= 2 + const struct v4l2_hevc_dpb_entry *const dpb = dec->dpb; + const unsigned int dpb_n = dec->num_active_dpb_entries; +#else + struct v4l2_hevc_dpb_entry *const dpb = slice_params->dpb; + unsigned int dpb_n; +#endif + unsigned int i; + RefPicList *rpl; + + *slice_params = (struct v4l2_ctrl_hevc_slice_params) { + .bit_size = bit_size, +#if HEVC_CTRLS_VERSION <= 3 + .data_bit_offset = bit_offset, +#else + .data_byte_offset = bit_offset / 8 + 1, +#endif + /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ + .slice_segment_addr = sh->slice_segment_addr, + + /* ISO/IEC 23008-2, ITU-T Rec. H.265: NAL unit header */ + .nal_unit_type = h->nal_unit_type, + .nuh_temporal_id_plus1 = h->temporal_id + 1, + + /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ + .slice_type = sh->slice_type, + .colour_plane_id = sh->colour_plane_id, + .slice_pic_order_cnt = h->ref->poc, + .num_ref_idx_l0_active_minus1 = sh->nb_refs[L0] ? sh->nb_refs[L0] - 1 : 0, + .num_ref_idx_l1_active_minus1 = sh->nb_refs[L1] ? sh->nb_refs[L1] - 1 : 0, + .collocated_ref_idx = sh->slice_temporal_mvp_enabled_flag ? sh->collocated_ref_idx : 0, + .five_minus_max_num_merge_cand = sh->slice_type == HEVC_SLICE_I ? 0 : 5 - sh->max_num_merge_cand, + .slice_qp_delta = sh->slice_qp_delta, + .slice_cb_qp_offset = sh->slice_cb_qp_offset, + .slice_cr_qp_offset = sh->slice_cr_qp_offset, + .slice_act_y_qp_offset = 0, + .slice_act_cb_qp_offset = 0, + .slice_act_cr_qp_offset = 0, + .slice_beta_offset_div2 = sh->beta_offset / 2, + .slice_tc_offset_div2 = sh->tc_offset / 2, + + /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture timing SEI message */ + .pic_struct = h->sei.picture_timing.picture_struct, + +#if HEVC_CTRLS_VERSION < 2 + /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ + .num_rps_poc_st_curr_before = h->rps[ST_CURR_BEF].nb_refs, + .num_rps_poc_st_curr_after = h->rps[ST_CURR_AFT].nb_refs, + .num_rps_poc_lt_curr = h->rps[LT_CURR].nb_refs, +#endif + }; + + if (sh->slice_sample_adaptive_offset_flag[0]) + slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA; + + if (sh->slice_sample_adaptive_offset_flag[1]) + slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA; + + if (sh->slice_temporal_mvp_enabled_flag) + slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED; + + if (sh->mvd_l1_zero_flag) + slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO; + + if (sh->cabac_init_flag) + slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT; + + if (sh->collocated_list == L0) + slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0; + + if (sh->disable_deblocking_filter_flag) + slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED; + + if (sh->slice_loop_filter_across_slices_enabled_flag) + slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED; + + if (sh->dependent_slice_segment_flag) + slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT; + +#if HEVC_CTRLS_VERSION < 2 + dpb_n = fill_dpb_entries(h, dpb); + slice_params->num_active_dpb_entries = dpb_n; +#endif + + if (sh->slice_type != HEVC_SLICE_I) { + rpl = &h->ref->refPicList[0]; + for (i = 0; i < rpl->nb_refs; i++) + slice_params->ref_idx_l0[i] = get_ref_pic_index(h, rpl->ref[i], dpb, dpb_n); + } + + if (sh->slice_type == HEVC_SLICE_B) { + rpl = &h->ref->refPicList[1]; + for (i = 0; i < rpl->nb_refs; i++) + slice_params->ref_idx_l1[i] = get_ref_pic_index(h, rpl->ref[i], dpb, dpb_n); + } + + fill_pred_table(h, &slice_params->pred_weight_table); + + slice_params->num_entry_point_offsets = sh->num_entry_point_offsets; +#if HEVC_CTRLS_VERSION <= 3 + if (slice_params->num_entry_point_offsets > 256) { + slice_params->num_entry_point_offsets = 256; + av_log(NULL, AV_LOG_ERROR, "%s: Currently only 256 entry points are supported, but slice has %d entry points.\n", __func__, sh->num_entry_point_offsets); + } + + for (i = 0; i < slice_params->num_entry_point_offsets; i++) + slice_params->entry_point_offset_minus1[i] = sh->entry_point_offset[i] - 1; +#endif +} + +#if HEVC_CTRLS_VERSION >= 2 +static void +fill_decode_params(const HEVCContext * const h, + struct v4l2_ctrl_hevc_decode_params * const dec) +{ + unsigned int i; + + *dec = (struct v4l2_ctrl_hevc_decode_params){ + .pic_order_cnt_val = h->poc, + .num_poc_st_curr_before = h->rps[ST_CURR_BEF].nb_refs, + .num_poc_st_curr_after = h->rps[ST_CURR_AFT].nb_refs, + .num_poc_lt_curr = h->rps[LT_CURR].nb_refs, + }; + + dec->num_active_dpb_entries = fill_dpb_entries(h, dec->dpb); + + // The docn does seem to ask that we fit our 32 bit signed POC into + // a U8 so... (To be fair 16 bits would be enough) + // Luckily we (Pi) don't use these fields + for (i = 0; i != h->rps[ST_CURR_BEF].nb_refs; ++i) + dec->poc_st_curr_before[i] = h->rps[ST_CURR_BEF].ref[i]->poc; + for (i = 0; i != h->rps[ST_CURR_AFT].nb_refs; ++i) + dec->poc_st_curr_after[i] = h->rps[ST_CURR_AFT].ref[i]->poc; + for (i = 0; i != h->rps[LT_CURR].nb_refs; ++i) + dec->poc_lt_curr[i] = h->rps[LT_CURR].ref[i]->poc; + + if (IS_IRAP(h)) + dec->flags |= V4L2_HEVC_DECODE_PARAM_FLAG_IRAP_PIC; + if (IS_IDR(h)) + dec->flags |= V4L2_HEVC_DECODE_PARAM_FLAG_IDR_PIC; + if (h->sh.no_output_of_prior_pics_flag) + dec->flags |= V4L2_HEVC_DECODE_PARAM_FLAG_NO_OUTPUT_OF_PRIOR; + +} +#endif + +static void fill_sps(struct v4l2_ctrl_hevc_sps *ctrl, const HEVCSPS *sps) +{ + /* ISO/IEC 23008-2, ITU-T Rec. H.265: Sequence parameter set */ + *ctrl = (struct v4l2_ctrl_hevc_sps) { + .chroma_format_idc = sps->chroma_format_idc, + .pic_width_in_luma_samples = sps->width, + .pic_height_in_luma_samples = sps->height, + .bit_depth_luma_minus8 = sps->bit_depth - 8, + .bit_depth_chroma_minus8 = sps->bit_depth - 8, + .log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_poc_lsb - 4, + .sps_max_dec_pic_buffering_minus1 = sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering - 1, + .sps_max_num_reorder_pics = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics, + .sps_max_latency_increase_plus1 = sps->temporal_layer[sps->max_sub_layers - 1].max_latency_increase + 1, + .log2_min_luma_coding_block_size_minus3 = sps->log2_min_cb_size - 3, + .log2_diff_max_min_luma_coding_block_size = sps->log2_diff_max_min_coding_block_size, + .log2_min_luma_transform_block_size_minus2 = sps->log2_min_tb_size - 2, + .log2_diff_max_min_luma_transform_block_size = sps->log2_max_trafo_size - sps->log2_min_tb_size, + .max_transform_hierarchy_depth_inter = sps->max_transform_hierarchy_depth_inter, + .max_transform_hierarchy_depth_intra = sps->max_transform_hierarchy_depth_intra, + .pcm_sample_bit_depth_luma_minus1 = sps->pcm.bit_depth - 1, + .pcm_sample_bit_depth_chroma_minus1 = sps->pcm.bit_depth_chroma - 1, + .log2_min_pcm_luma_coding_block_size_minus3 = sps->pcm.log2_min_pcm_cb_size - 3, + .log2_diff_max_min_pcm_luma_coding_block_size = sps->pcm.log2_max_pcm_cb_size - sps->pcm.log2_min_pcm_cb_size, + .num_short_term_ref_pic_sets = sps->nb_st_rps, + .num_long_term_ref_pics_sps = sps->num_long_term_ref_pics_sps, + .chroma_format_idc = sps->chroma_format_idc, + .sps_max_sub_layers_minus1 = sps->max_sub_layers - 1, + }; + + if (sps->separate_colour_plane_flag) + ctrl->flags |= V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE; + + if (sps->scaling_list_enable_flag) + ctrl->flags |= V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED; + + if (sps->amp_enabled_flag) + ctrl->flags |= V4L2_HEVC_SPS_FLAG_AMP_ENABLED; + + if (sps->sao_enabled) + ctrl->flags |= V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET; + + if (sps->pcm_enabled_flag) + ctrl->flags |= V4L2_HEVC_SPS_FLAG_PCM_ENABLED; + + if (sps->pcm.loop_filter_disable_flag) + ctrl->flags |= V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED; + + if (sps->long_term_ref_pics_present_flag) + ctrl->flags |= V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT; + + if (sps->sps_temporal_mvp_enabled_flag) + ctrl->flags |= V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED; + + if (sps->sps_strong_intra_smoothing_enable_flag) + ctrl->flags |= V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED; +} + +static void fill_scaling_matrix(const ScalingList * const sl, + struct v4l2_ctrl_hevc_scaling_matrix * const sm) +{ + unsigned int i; + + for (i = 0; i < 6; i++) { + unsigned int j; + + for (j = 0; j < 16; j++) + sm->scaling_list_4x4[i][j] = sl->sl[0][i][j]; + for (j = 0; j < 64; j++) { + sm->scaling_list_8x8[i][j] = sl->sl[1][i][j]; + sm->scaling_list_16x16[i][j] = sl->sl[2][i][j]; + if (i < 2) + sm->scaling_list_32x32[i][j] = sl->sl[3][i * 3][j]; + } + sm->scaling_list_dc_coef_16x16[i] = sl->sl_dc[0][i]; + if (i < 2) + sm->scaling_list_dc_coef_32x32[i] = sl->sl_dc[1][i * 3]; + } +} + +static void fill_pps(struct v4l2_ctrl_hevc_pps * const ctrl, const HEVCPPS * const pps) +{ + uint64_t flags = 0; + + if (pps->dependent_slice_segments_enabled_flag) + flags |= V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED; + + if (pps->output_flag_present_flag) + flags |= V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT; + + if (pps->sign_data_hiding_flag) + flags |= V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED; + + if (pps->cabac_init_present_flag) + flags |= V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT; + + if (pps->constrained_intra_pred_flag) + flags |= V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED; + + if (pps->transform_skip_enabled_flag) + flags |= V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED; + + if (pps->cu_qp_delta_enabled_flag) + flags |= V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED; + + if (pps->pic_slice_level_chroma_qp_offsets_present_flag) + flags |= V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT; + + if (pps->weighted_pred_flag) + flags |= V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED; + + if (pps->weighted_bipred_flag) + flags |= V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED; + + if (pps->transquant_bypass_enable_flag) + flags |= V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED; + + if (pps->tiles_enabled_flag) + flags |= V4L2_HEVC_PPS_FLAG_TILES_ENABLED; + + if (pps->entropy_coding_sync_enabled_flag) + flags |= V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED; + + if (pps->loop_filter_across_tiles_enabled_flag) + flags |= V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED; + + if (pps->seq_loop_filter_across_slices_enabled_flag) + flags |= V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED; + + if (pps->deblocking_filter_override_enabled_flag) + flags |= V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED; + + if (pps->disable_dbf) + flags |= V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER; + + if (pps->lists_modification_present_flag) + flags |= V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT; + + if (pps->slice_header_extension_present_flag) + flags |= V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT; + + /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture parameter set */ + *ctrl = (struct v4l2_ctrl_hevc_pps) { + .num_extra_slice_header_bits = pps->num_extra_slice_header_bits, + .init_qp_minus26 = pps->pic_init_qp_minus26, + .diff_cu_qp_delta_depth = pps->diff_cu_qp_delta_depth, + .pps_cb_qp_offset = pps->cb_qp_offset, + .pps_cr_qp_offset = pps->cr_qp_offset, + .pps_beta_offset_div2 = pps->beta_offset / 2, + .pps_tc_offset_div2 = pps->tc_offset / 2, + .log2_parallel_merge_level_minus2 = pps->log2_parallel_merge_level - 2, + .flags = flags + }; + + + if (pps->tiles_enabled_flag) { + ctrl->num_tile_columns_minus1 = pps->num_tile_columns - 1; + ctrl->num_tile_rows_minus1 = pps->num_tile_rows - 1; + + for (int i = 0; i < pps->num_tile_columns; i++) + ctrl->column_width_minus1[i] = pps->column_width[i] - 1; + + for (int i = 0; i < pps->num_tile_rows; i++) + ctrl->row_height_minus1[i] = pps->row_height[i] - 1; + } +} + +// Called before finally returning the frame to the user +// Set corrupt flag here as this is actually the frame structure that +// is going to the user (in MT land each thread has its own pool) +static int frame_post_process(void *logctx, AVFrame *frame) +{ + V4L2MediaReqDescriptor *rd = (V4L2MediaReqDescriptor*)frame->data[0]; + +// av_log(NULL, AV_LOG_INFO, "%s\n", __func__); + frame->flags &= ~AV_FRAME_FLAG_CORRUPT; + if (rd->qe_dst) { + MediaBufsStatus stat = qent_dst_wait(rd->qe_dst); + if (stat != MEDIABUFS_STATUS_SUCCESS) { + av_log(logctx, AV_LOG_ERROR, "%s: Decode fail\n", __func__); + frame->flags |= AV_FRAME_FLAG_CORRUPT; + } + } + + return 0; +} + +static inline struct timeval cvt_dpb_to_tv(uint64_t t) +{ + t /= 1000; + return (struct timeval){ + .tv_usec = t % 1000000, + .tv_sec = t / 1000000 + }; +} + +static inline uint64_t cvt_timestamp_to_dpb(const unsigned int t) +{ + return (uint64_t)t * 1000; +} + +static int v4l2_request_hevc_start_frame(AVCodecContext *avctx, + av_unused const uint8_t *buffer, + av_unused uint32_t size) +{ + const HEVCContext *h = avctx->priv_data; + V4L2MediaReqDescriptor *const rd = (V4L2MediaReqDescriptor *)h->ref->frame->data[0]; + V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data; + +// av_log(NULL, AV_LOG_INFO, "%s\n", __func__); + decode_q_add(&ctx->decode_q, &rd->decode_ent); + + rd->num_slices = 0; + ctx->timestamp++; + rd->timestamp = cvt_timestamp_to_dpb(ctx->timestamp); + + { + FrameDecodeData * const fdd = (FrameDecodeData*)h->ref->frame->private_ref->data; + fdd->post_process = frame_post_process; + } + + // qe_dst needs to be bound to the data buffer and only returned when that is + if (!rd->qe_dst) + { + if ((rd->qe_dst = mediabufs_dst_qent_alloc(ctx->mbufs, ctx->dbufs)) == NULL) { + av_log(avctx, AV_LOG_ERROR, "%s: Failed to get dst buffer\n", __func__); + return AVERROR(ENOMEM); + } + } + + ff_thread_finish_setup(avctx); // Allow next thread to enter rpi_hevc_start_frame + + return 0; +} + +// Object fd & size will be zapped by this & need setting later +static int drm_from_format(AVDRMFrameDescriptor * const desc, const struct v4l2_format * const format) +{ + AVDRMLayerDescriptor *layer = &desc->layers[0]; + unsigned int width; + unsigned int height; + unsigned int bpl; + uint32_t pixelformat; + + if (V4L2_TYPE_IS_MULTIPLANAR(format->type)) { + width = format->fmt.pix_mp.width; + height = format->fmt.pix_mp.height; + pixelformat = format->fmt.pix_mp.pixelformat; + bpl = format->fmt.pix_mp.plane_fmt[0].bytesperline; + } + else { + width = format->fmt.pix.width; + height = format->fmt.pix.height; + pixelformat = format->fmt.pix.pixelformat; + bpl = format->fmt.pix.bytesperline; + } + + switch (pixelformat) { + case V4L2_PIX_FMT_NV12: + layer->format = DRM_FORMAT_NV12; + desc->objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR; + break; +#if CONFIG_SAND + case V4L2_PIX_FMT_NV12_COL128: + layer->format = DRM_FORMAT_NV12; + desc->objects[0].format_modifier = DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT(bpl); + break; + case V4L2_PIX_FMT_NV12_10_COL128: + layer->format = DRM_FORMAT_P030; + desc->objects[0].format_modifier = DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT(bpl); + break; +#endif +#ifdef DRM_FORMAT_MOD_ALLWINNER_TILED + case V4L2_PIX_FMT_SUNXI_TILED_NV12: + layer->format = DRM_FORMAT_NV12; + desc->objects[0].format_modifier = DRM_FORMAT_MOD_ALLWINNER_TILED; + break; +#endif +#if defined(V4L2_PIX_FMT_NV15) && defined(DRM_FORMAT_NV15) + case V4L2_PIX_FMT_NV15: + layer->format = DRM_FORMAT_NV15; + desc->objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR; + break; +#endif + case V4L2_PIX_FMT_NV16: + layer->format = DRM_FORMAT_NV16; + desc->objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR; + break; +#if defined(V4L2_PIX_FMT_NV20) && defined(DRM_FORMAT_NV20) + case V4L2_PIX_FMT_NV20: + layer->format = DRM_FORMAT_NV20; + desc->objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR; + break; +#endif + default: + return -1; + } + + desc->nb_objects = 1; + desc->objects[0].fd = -1; + desc->objects[0].size = 0; + + desc->nb_layers = 1; + layer->nb_planes = 2; + + layer->planes[0].object_index = 0; + layer->planes[0].offset = 0; + layer->planes[0].pitch = bpl; +#if CONFIG_SAND + if (pixelformat == V4L2_PIX_FMT_NV12_COL128) { + layer->planes[1].object_index = 0; + layer->planes[1].offset = height * 128; + layer->planes[0].pitch = width; + layer->planes[1].pitch = width; + } + else if (pixelformat == V4L2_PIX_FMT_NV12_10_COL128) { + layer->planes[1].object_index = 0; + layer->planes[1].offset = height * 128; + layer->planes[0].pitch = width * 2; // Lies but it keeps DRM import happy + layer->planes[1].pitch = width * 2; + } + else +#endif + { + layer->planes[1].object_index = 0; + layer->planes[1].offset = layer->planes[0].pitch * height; + layer->planes[1].pitch = layer->planes[0].pitch; + } + + return 0; +} + +static int +set_req_ctls(V4L2RequestContextHEVC *ctx, struct media_request * const mreq, + struct req_controls *const controls, +#if HEVC_CTRLS_VERSION >= 2 + struct v4l2_ctrl_hevc_decode_params * const dec, +#endif + struct v4l2_ctrl_hevc_slice_params * const slices, const unsigned int slice_count, + void * const offsets, const size_t offset_count) +{ + int rv; +#if HEVC_CTRLS_VERSION >= 2 + unsigned int n = 3; +#else + unsigned int n = 2; +#endif + + struct v4l2_ext_control control[6] = { + { + .id = V4L2_CID_STATELESS_HEVC_SPS, + .ptr = &controls->sps, + .size = sizeof(controls->sps), + }, + { + .id = V4L2_CID_STATELESS_HEVC_PPS, + .ptr = &controls->pps, + .size = sizeof(controls->pps), + }, +#if HEVC_CTRLS_VERSION >= 2 + { + .id = V4L2_CID_STATELESS_HEVC_DECODE_PARAMS, + .ptr = dec, + .size = sizeof(*dec), + }, +#endif + }; + + if (slices) + control[n++] = (struct v4l2_ext_control) { + .id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS, + .ptr = slices, + .size = sizeof(*slices) * slice_count, + }; + + if (controls->has_scaling) + control[n++] = (struct v4l2_ext_control) { + .id = V4L2_CID_STATELESS_HEVC_SCALING_MATRIX, + .ptr = &controls->scaling_matrix, + .size = sizeof(controls->scaling_matrix), + }; + +#if HEVC_CTRLS_VERSION >= 4 + if (offsets) + control[n++] = (struct v4l2_ext_control) { + .id = V4L2_CID_STATELESS_HEVC_ENTRY_POINT_OFFSETS, + .ptr = offsets, + .size = sizeof(((struct V4L2MediaReqDescriptor *)0)->offsets[0]) * offset_count, + }; +#endif + + rv = mediabufs_ctl_set_ext_ctrls(ctx->mbufs, mreq, control, n); + + return rv; +} + +// This only works because we started out from a single coded frame buffer +// that will remain intact until after end_frame +static int v4l2_request_hevc_decode_slice(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size) +{ + const HEVCContext * const h = avctx->priv_data; + V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data; + V4L2MediaReqDescriptor * const rd = (V4L2MediaReqDescriptor*)h->ref->frame->data[0]; + int bcount = get_bits_count(&h->HEVClc->gb); + uint32_t boff = (ptr_from_index(buffer, bcount/8 + 1) - (buffer + bcount/8 + 1)) * 8 + bcount; + + const unsigned int n = rd->num_slices; + const unsigned int block_start = (n / ctx->max_slices) * ctx->max_slices; + + int rv; + struct slice_info * si; + + // This looks dodgy but we know that FFmpeg has parsed this from a buffer + // that contains the entire frame including the start code + if (ctx->start_code == V4L2_STATELESS_HEVC_START_CODE_ANNEX_B) { + buffer -= 3; + size += 3; + boff += 24; + if (buffer[0] != 0 || buffer[1] != 0 || buffer[2] != 1) { + av_log(avctx, AV_LOG_ERROR, "Start code requested but missing %02x:%02x:%02x\n", + buffer[0], buffer[1], buffer[2]); + } + } + + if (ctx->decode_mode == V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED) { + if (rd->slices == NULL) { + if ((rd->slices = av_mallocz(sizeof(*rd->slices))) == NULL) + return AVERROR(ENOMEM); + rd->slices->ptr = buffer; + rd->num_slices = 1; + } + rd->slices->len = buffer - rd->slices->ptr + size; + return 0; + } + + if ((rv = slice_add(rd)) != 0) + return rv; + + si = rd->slices + n; + si->ptr = buffer; + si->len = size; + si->n_offsets = rd->num_offsets; + + if (n != block_start) { + struct slice_info *const si0 = rd->slices + block_start; + const size_t offset = (buffer - si0->ptr); + boff += offset * 8; + size += offset; + si0->len = si->len + offset; + } + +#if HEVC_CTRLS_VERSION >= 2 + if (n == 0) + fill_decode_params(h, &rd->dec); + fill_slice_params(h, &rd->dec, rd->slice_params + n, size * 8, boff); +#else + fill_slice_params(h, rd->slice_params + n, size * 8, boff); +#endif + if (ctx->max_offsets != 0 && + (rv = offsets_add(rd, h->sh.num_entry_point_offsets, h->sh.entry_point_offset)) != 0) + return rv; + + return 0; +} + +static void v4l2_request_hevc_abort_frame(AVCodecContext * const avctx) +{ + const HEVCContext * const h = avctx->priv_data; + if (h->ref != NULL) { + V4L2MediaReqDescriptor *const rd = (V4L2MediaReqDescriptor *)h->ref->frame->data[0]; + V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data; + + media_request_abort(&rd->req); + mediabufs_src_qent_abort(ctx->mbufs, &rd->qe_src); + + decode_q_remove(&ctx->decode_q, &rd->decode_ent); + } +} + +static int send_slice(AVCodecContext * const avctx, + V4L2MediaReqDescriptor * const rd, + struct req_controls *const controls, + const unsigned int i, const unsigned int j) +{ + V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data; + + const int is_last = (j == rd->num_slices); + struct slice_info *const si = rd->slices + i; + struct media_request * req = NULL; + struct qent_src * src = NULL; + MediaBufsStatus stat; + void * offsets = rd->offsets + rd->slices[i].n_offsets; + size_t n_offsets = (is_last ? rd->num_offsets : rd->slices[j].n_offsets) - rd->slices[i].n_offsets; + + if ((req = media_request_get(ctx->mpool)) == NULL) { + av_log(avctx, AV_LOG_ERROR, "%s: Failed to alloc media request\n", __func__); + return AVERROR(ENOMEM); + } + + if (set_req_ctls(ctx, req, + controls, +#if HEVC_CTRLS_VERSION >= 2 + &rd->dec, +#endif + rd->slice_params + i, j - i, + offsets, n_offsets)) { + av_log(avctx, AV_LOG_ERROR, "%s: Failed to set req ctls\n", __func__); + goto fail1; + } + + if ((src = mediabufs_src_qent_get(ctx->mbufs)) == NULL) { + av_log(avctx, AV_LOG_ERROR, "%s: Failed to get src buffer\n", __func__); + goto fail1; + } + + if (qent_src_data_copy(src, 0, si->ptr, si->len, ctx->dbufs) != 0) { + av_log(avctx, AV_LOG_ERROR, "%s: Failed data copy\n", __func__); + goto fail2; + } + + if (qent_src_params_set(src, &controls->tv)) { + av_log(avctx, AV_LOG_ERROR, "%s: Failed src param set\n", __func__); + goto fail2; + } + + stat = mediabufs_start_request(ctx->mbufs, &req, &src, + i == 0 ? rd->qe_dst : NULL, + is_last); + + if (stat != MEDIABUFS_STATUS_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "%s: Failed to start request\n", __func__); + return AVERROR_UNKNOWN; + } + return 0; + +fail2: + mediabufs_src_qent_abort(ctx->mbufs, &src); +fail1: + media_request_abort(&req); + return AVERROR_UNKNOWN; +} + +static int v4l2_request_hevc_end_frame(AVCodecContext *avctx) +{ + const HEVCContext * const h = avctx->priv_data; + V4L2MediaReqDescriptor *rd = (V4L2MediaReqDescriptor*)h->ref->frame->data[0]; + V4L2RequestContextHEVC *ctx = avctx->internal->hwaccel_priv_data; + struct req_controls rc; + unsigned int i; + int rv; + + // It is possible, though maybe a bug, to get an end_frame without + // a previous start_frame. If we do then give up. + if (!decode_q_in_q(&rd->decode_ent)) { + av_log(avctx, AV_LOG_DEBUG, "%s: Frame not in decode Q\n", __func__); + return AVERROR_INVALIDDATA; + } + + { + const ScalingList *sl = h->ps.pps->scaling_list_data_present_flag ? + &h->ps.pps->scaling_list : + h->ps.sps->scaling_list_enable_flag ? + &h->ps.sps->scaling_list : NULL; + + + memset(&rc, 0, sizeof(rc)); + rc.tv = cvt_dpb_to_tv(rd->timestamp); + fill_sps(&rc.sps, h->ps.sps); + fill_pps(&rc.pps, h->ps.pps); + if (sl) { + rc.has_scaling = 1; + fill_scaling_matrix(sl, &rc.scaling_matrix); + } + } + + decode_q_wait(&ctx->decode_q, &rd->decode_ent); + + // qe_dst needs to be bound to the data buffer and only returned when that is + // Alloc almost certainly wants to be serialised if there is any chance of blocking + // so we get the next frame to be free in the thread that needs it for decode first. + // + // In our current world this probably isn't a concern but put it here anyway + if (!rd->qe_dst) + { + if ((rd->qe_dst = mediabufs_dst_qent_alloc(ctx->mbufs, ctx->dbufs)) == NULL) { + av_log(avctx, AV_LOG_ERROR, "%s: Failed to get dst buffer\n", __func__); + rv = AVERROR(ENOMEM); + goto fail; + } + } + + // Send as slices + for (i = 0; i < rd->num_slices; i += ctx->max_slices) { + const unsigned int e = FFMIN(rd->num_slices, i + ctx->max_slices); + if ((rv = send_slice(avctx, rd, &rc, i, e)) != 0) + goto fail; + } + + // Set the drm_prime desriptor + drm_from_format(&rd->drm, mediabufs_dst_fmt(ctx->mbufs)); + rd->drm.objects[0].fd = dmabuf_fd(qent_dst_dmabuf(rd->qe_dst, 0)); + rd->drm.objects[0].size = dmabuf_size(qent_dst_dmabuf(rd->qe_dst, 0)); + + decode_q_remove(&ctx->decode_q, &rd->decode_ent); + return 0; + +fail: + decode_q_remove(&ctx->decode_q, &rd->decode_ent); + return rv; +} + +static inline int +ctrl_valid(const struct v4l2_query_ext_ctrl * const c, const int64_t v) +{ + return v >= c->minimum && v <= c->maximum; +} + +// Initial check & init +static int +probe(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx) +{ + const HEVCContext *h = avctx->priv_data; + const HEVCSPS * const sps = h->ps.sps; + struct v4l2_ctrl_hevc_sps ctrl_sps; + unsigned int i; + + // Check for var slice array + struct v4l2_query_ext_ctrl qc[] = { + { .id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS }, + { .id = V4L2_CID_STATELESS_HEVC_DECODE_MODE, }, + { .id = V4L2_CID_STATELESS_HEVC_SPS }, + { .id = V4L2_CID_STATELESS_HEVC_PPS }, + { .id = V4L2_CID_STATELESS_HEVC_SCALING_MATRIX }, +#if HEVC_CTRLS_VERSION >= 2 + { .id = V4L2_CID_STATELESS_HEVC_DECODE_PARAMS }, +#endif + }; + // Order & size must match! + static const size_t ctrl_sizes[] = { + sizeof(struct v4l2_ctrl_hevc_slice_params), + sizeof(int32_t), + sizeof(struct v4l2_ctrl_hevc_sps), + sizeof(struct v4l2_ctrl_hevc_pps), + sizeof(struct v4l2_ctrl_hevc_scaling_matrix), +#if HEVC_CTRLS_VERSION >= 2 + sizeof(struct v4l2_ctrl_hevc_decode_params), +#endif + }; + const unsigned int noof_ctrls = FF_ARRAY_ELEMS(qc); + +#if HEVC_CTRLS_VERSION == 2 + if (mediabufs_ctl_driver_version(ctx->mbufs) >= MEDIABUFS_DRIVER_VERSION(5, 18, 0)) + return AVERROR(EINVAL); +#elif HEVC_CTRLS_VERSION == 3 + if (mediabufs_ctl_driver_version(ctx->mbufs) < MEDIABUFS_DRIVER_VERSION(5, 18, 0)) + return AVERROR(EINVAL); +#endif + + mediabufs_ctl_query_ext_ctrls(ctx->mbufs, qc, noof_ctrls); + i = 0; +#if HEVC_CTRLS_VERSION >= 4 + // Skip slice check if no slice mode + if (qc[1].type != 0 && !ctrl_valid(qc + 1, V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED)) + i = 1; +#else + // Fail frame mode silently for anything prior to V4 + if (qc[1].type == 0 || !ctrl_valid(qc + 1, V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED)) + return AVERROR(EINVAL); +#endif + for (; i != noof_ctrls; ++i) { + if (qc[i].type == 0) { + av_log(avctx, AV_LOG_DEBUG, "Probed V%d control %#x missing\n", HEVC_CTRLS_VERSION, qc[i].id); + return AVERROR(EINVAL); + } + if (ctrl_sizes[i] != (size_t)qc[i].elem_size) { + av_log(avctx, AV_LOG_DEBUG, "Probed V%d control %d size mismatch %zu != %zu\n", + HEVC_CTRLS_VERSION, i, ctrl_sizes[i], (size_t)qc[i].elem_size); + return AVERROR(EINVAL); + } + } + + fill_sps(&ctrl_sps, sps); + + if (mediabufs_set_ext_ctrl(ctx->mbufs, NULL, V4L2_CID_STATELESS_HEVC_SPS, &ctrl_sps, sizeof(ctrl_sps))) { + av_log(avctx, AV_LOG_ERROR, "Failed to set initial SPS\n"); + return AVERROR(EINVAL); + } + + return 0; +} + +// Final init +static int +set_controls(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx) +{ + int ret; + + struct v4l2_query_ext_ctrl querys[] = { + { .id = V4L2_CID_STATELESS_HEVC_DECODE_MODE, }, + { .id = V4L2_CID_STATELESS_HEVC_START_CODE, }, + { .id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS, }, +#if HEVC_CTRLS_VERSION >= 4 + { .id = V4L2_CID_STATELESS_HEVC_ENTRY_POINT_OFFSETS, }, +#endif + }; + + struct v4l2_ext_control ctrls[] = { + { .id = V4L2_CID_STATELESS_HEVC_DECODE_MODE, }, + { .id = V4L2_CID_STATELESS_HEVC_START_CODE, }, + }; + + mediabufs_ctl_query_ext_ctrls(ctx->mbufs, querys, FF_ARRAY_ELEMS(querys)); + + ctx->max_slices = (!(querys[2].flags & V4L2_CTRL_FLAG_DYNAMIC_ARRAY) || + querys[2].nr_of_dims != 1 || querys[2].dims[0] == 0) ? + 1 : querys[2].dims[0]; + av_log(avctx, AV_LOG_DEBUG, "%s: Max slices %d\n", __func__, ctx->max_slices); + +#if HEVC_CTRLS_VERSION >= 4 + ctx->max_offsets = (querys[3].type == 0 || querys[3].nr_of_dims != 1) ? + 0 : querys[3].dims[0]; + av_log(avctx, AV_LOG_DEBUG, "%s: Entry point offsets %d\n", __func__, ctx->max_offsets); +#else + ctx->max_offsets = 0; +#endif + + if (querys[0].default_value == V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED || + querys[0].default_value == V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED) + ctx->decode_mode = querys[0].default_value; + else if (ctrl_valid(querys + 0, V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED)) + ctx->decode_mode = V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED; + else if (ctrl_valid(querys + 0, V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED)) + ctx->decode_mode = V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED; + else { + av_log(avctx, AV_LOG_ERROR, "%s: unsupported decode mode\n", __func__); + return AVERROR(EINVAL); + } + + if (querys[1].default_value == V4L2_STATELESS_HEVC_START_CODE_NONE || + querys[1].default_value == V4L2_STATELESS_HEVC_START_CODE_ANNEX_B) + ctx->start_code = querys[1].default_value; + else if (ctrl_valid(querys + 1, V4L2_STATELESS_HEVC_START_CODE_ANNEX_B)) + ctx->start_code = V4L2_STATELESS_HEVC_START_CODE_ANNEX_B; + else if (ctrl_valid(querys + 1, V4L2_STATELESS_HEVC_START_CODE_NONE)) + ctx->start_code = V4L2_STATELESS_HEVC_START_CODE_NONE; + else { + av_log(avctx, AV_LOG_ERROR, "%s: unsupported start code\n", __func__); + return AVERROR(EINVAL); + } + + // If we are in slice mode & START_CODE_NONE supported then pick that + // as it doesn't require the slightly dodgy look backwards in our raw buffer + if (ctx->decode_mode == V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED && + ctrl_valid(querys + 1, V4L2_STATELESS_HEVC_START_CODE_NONE)) + ctx->start_code = V4L2_STATELESS_HEVC_START_CODE_NONE; + + ctrls[0].value = ctx->decode_mode; + ctrls[1].value = ctx->start_code; + + ret = mediabufs_ctl_set_ext_ctrls(ctx->mbufs, NULL, ctrls, FF_ARRAY_ELEMS(ctrls)); + return !ret ? 0 : AVERROR(-ret); +} + +static void v4l2_req_frame_free(void *opaque, uint8_t *data) +{ + AVCodecContext *avctx = opaque; + V4L2MediaReqDescriptor * const rd = (V4L2MediaReqDescriptor*)data; + + av_log(NULL, AV_LOG_DEBUG, "%s: avctx=%p data=%p\n", __func__, avctx, data); + + qent_dst_unref(&rd->qe_dst); + + // We don't expect req or qe_src to be set + if (rd->req || rd->qe_src) + av_log(NULL, AV_LOG_ERROR, "%s: qe_src %p or req %p not NULL\n", __func__, rd->req, rd->qe_src); + + av_freep(&rd->slices); + av_freep(&rd->slice_params); + av_freep(&rd->offsets); + + av_free(rd); +} + +static AVBufferRef *v4l2_req_frame_alloc(void *opaque, int size) +{ + AVCodecContext *avctx = opaque; +// V4L2RequestContextHEVC *ctx = avctx->internal->hwaccel_priv_data; +// V4L2MediaReqDescriptor *req; + AVBufferRef *ref; + uint8_t *data; +// int ret; + + data = av_mallocz(size); + if (!data) + return NULL; + + av_log(avctx, AV_LOG_DEBUG, "%s: avctx=%p size=%d data=%p\n", __func__, avctx, size, data); + ref = av_buffer_create(data, size, v4l2_req_frame_free, avctx, 0); + if (!ref) { + av_freep(&data); + return NULL; + } + return ref; +} + +#if 0 +static void v4l2_req_pool_free(void *opaque) +{ + av_log(NULL, AV_LOG_DEBUG, "%s: opaque=%p\n", __func__, opaque); +} + +static void v4l2_req_hwframe_ctx_free(AVHWFramesContext *hwfc) +{ + av_log(NULL, AV_LOG_DEBUG, "%s: hwfc=%p pool=%p\n", __func__, hwfc, hwfc->pool); + + av_buffer_pool_uninit(&hwfc->pool); +} +#endif + +static int frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx) +{ + V4L2RequestContextHEVC *ctx = avctx->internal->hwaccel_priv_data; + AVHWFramesContext *hwfc = (AVHWFramesContext*)hw_frames_ctx->data; + const struct v4l2_format *vfmt = mediabufs_dst_fmt(ctx->mbufs); + + hwfc->format = AV_PIX_FMT_DRM_PRIME; + hwfc->sw_format = pixel_format_from_format(vfmt); + if (V4L2_TYPE_IS_MULTIPLANAR(vfmt->type)) { + hwfc->width = vfmt->fmt.pix_mp.width; + hwfc->height = vfmt->fmt.pix_mp.height; + } else { + hwfc->width = vfmt->fmt.pix.width; + hwfc->height = vfmt->fmt.pix.height; + } +#if 0 + hwfc->pool = av_buffer_pool_init2(sizeof(V4L2MediaReqDescriptor), avctx, v4l2_req_frame_alloc, v4l2_req_pool_free); + if (!hwfc->pool) + return AVERROR(ENOMEM); + + hwfc->free = v4l2_req_hwframe_ctx_free; + + hwfc->initial_pool_size = 1; + + switch (avctx->codec_id) { + case AV_CODEC_ID_VP9: + hwfc->initial_pool_size += 8; + break; + case AV_CODEC_ID_VP8: + hwfc->initial_pool_size += 3; + break; + default: + hwfc->initial_pool_size += 2; + } +#endif + av_log(avctx, AV_LOG_DEBUG, "%s: avctx=%p ctx=%p hw_frames_ctx=%p hwfc=%p pool=%p width=%d height=%d initial_pool_size=%d\n", __func__, avctx, ctx, hw_frames_ctx, hwfc, hwfc->pool, hwfc->width, hwfc->height, hwfc->initial_pool_size); + + return 0; +} + +static int alloc_frame(AVCodecContext * avctx, AVFrame *frame) +{ + int rv; + + frame->buf[0] = v4l2_req_frame_alloc(avctx, sizeof(V4L2MediaReqDescriptor)); + if (!frame->buf[0]) + return AVERROR(ENOMEM); + + frame->data[0] = frame->buf[0]->data; + + frame->hw_frames_ctx = av_buffer_ref(avctx->hw_frames_ctx); + + if ((rv = ff_attach_decode_data(frame)) != 0) { + av_log(avctx, AV_LOG_ERROR, "Failed to attach decode data to frame\n"); + av_frame_unref(frame); + return rv; + } + + return 0; +} + +const v4l2_req_decode_fns V(ff_v4l2_req_hevc) = { + .src_pix_fmt_v4l2 = V4L2_PIX_FMT_HEVC_SLICE, + .name = "V4L2 HEVC stateless V" STR(HEVC_CTRLS_VERSION), + .probe = probe, + .set_controls = set_controls, + + .start_frame = v4l2_request_hevc_start_frame, + .decode_slice = v4l2_request_hevc_decode_slice, + .end_frame = v4l2_request_hevc_end_frame, + .abort_frame = v4l2_request_hevc_abort_frame, + .frame_params = frame_params, + .alloc_frame = alloc_frame, +}; + diff --git a/libavcodec/v4l2_req_media.c b/libavcodec/v4l2_req_media.c new file mode 100644 index 0000000000..1a9944774a --- /dev/null +++ b/libavcodec/v4l2_req_media.c @@ -0,0 +1,1802 @@ +/* + * Copyright (C) 2018 Paul Kocialkowski + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "v4l2_req_dmabufs.h" +#include "v4l2_req_media.h" +#include "v4l2_req_pollqueue.h" +#include "v4l2_req_utils.h" +#include "weak_link.h" + + +/* floor(log2(x)) */ +static unsigned int log2_size(size_t x) +{ + unsigned int n = 0; + + if (x & ~0xffff) { + n += 16; + x >>= 16; + } + if (x & ~0xff) { + n += 8; + x >>= 8; + } + if (x & ~0xf) { + n += 4; + x >>= 4; + } + if (x & ~3) { + n += 2; + x >>= 2; + } + return (x & ~1) ? n + 1 : n; +} + +static size_t round_up_size(const size_t x) +{ + /* Admit no size < 256 */ + const unsigned int n = x < 256 ? 8 : log2_size(x) - 1; + + return x >= (3 << n) ? 4 << n : (3 << n); +} + +struct media_request; + +struct media_pool { + int fd; + sem_t sem; + pthread_mutex_t lock; + struct media_request * free_reqs; + struct pollqueue * pq; +}; + +struct media_request { + struct media_request * next; + struct media_pool * mp; + int fd; + struct polltask * pt; +}; + +static inline enum v4l2_memory +mediabufs_memory_to_v4l2(const enum mediabufs_memory m) +{ + return (enum v4l2_memory)m; +} + +const char * +mediabufs_memory_name(const enum mediabufs_memory m) +{ + switch (m) { + case MEDIABUFS_MEMORY_UNSET: + return "Unset"; + case MEDIABUFS_MEMORY_MMAP: + return "MMap"; + case MEDIABUFS_MEMORY_USERPTR: + return "UserPtr"; + case MEDIABUFS_MEMORY_OVERLAY: + return "Overlay"; + case MEDIABUFS_MEMORY_DMABUF: + return "DMABuf"; + default: + break; + } + return "Unknown"; +} + + +static inline int do_trywait(sem_t *const sem) +{ + while (sem_trywait(sem)) { + if (errno != EINTR) + return -errno; + } + return 0; +} + +static inline int do_wait(sem_t *const sem) +{ + while (sem_wait(sem)) { + if (errno != EINTR) + return -errno; + } + return 0; +} + +static int request_buffers(int video_fd, unsigned int type, + enum mediabufs_memory memory, unsigned int buffers_count) +{ + struct v4l2_requestbuffers buffers; + int rc; + + memset(&buffers, 0, sizeof(buffers)); + buffers.type = type; + buffers.memory = mediabufs_memory_to_v4l2(memory); + buffers.count = buffers_count; + + rc = ioctl(video_fd, VIDIOC_REQBUFS, &buffers); + if (rc < 0) { + rc = -errno; + request_log("Unable to request %d type %d buffers: %s\n", buffers_count, type, strerror(-rc)); + return rc; + } + + return 0; +} + + +static int set_stream(int video_fd, unsigned int type, bool enable) +{ + enum v4l2_buf_type buf_type = type; + int rc; + + rc = ioctl(video_fd, enable ? VIDIOC_STREAMON : VIDIOC_STREAMOFF, + &buf_type); + if (rc < 0) { + rc = -errno; + request_log("Unable to %sable stream: %s\n", + enable ? "en" : "dis", strerror(-rc)); + return rc; + } + + return 0; +} + + + +struct media_request * media_request_get(struct media_pool * const mp) +{ + struct media_request *req = NULL; + + /* Timeout handled by poll code */ + if (do_wait(&mp->sem)) + return NULL; + + pthread_mutex_lock(&mp->lock); + req = mp->free_reqs; + if (req) { + mp->free_reqs = req->next; + req->next = NULL; + } + pthread_mutex_unlock(&mp->lock); + return req; +} + +int media_request_fd(const struct media_request * const req) +{ + return req->fd; +} + +int media_request_start(struct media_request * const req) +{ + while (ioctl(req->fd, MEDIA_REQUEST_IOC_QUEUE, NULL) == -1) + { + const int err = errno; + if (err == EINTR) + continue; + request_log("%s: Failed to Q media: (%d) %s\n", __func__, err, strerror(err)); + return -err; + } + + pollqueue_add_task(req->pt, 2000); + return 0; +} + +static void media_request_done(void *v, short revents) +{ + struct media_request *const req = v; + struct media_pool *const mp = req->mp; + + /* ** Not sure what to do about timeout */ + + if (ioctl(req->fd, MEDIA_REQUEST_IOC_REINIT, NULL) < 0) + request_log("Unable to reinit media request: %s\n", + strerror(errno)); + + pthread_mutex_lock(&mp->lock); + req->next = mp->free_reqs; + mp->free_reqs = req; + pthread_mutex_unlock(&mp->lock); + sem_post(&mp->sem); +} + +int media_request_abort(struct media_request ** const preq) +{ + struct media_request * const req = *preq; + + if (req == NULL) + return 0; + *preq = NULL; + + media_request_done(req, 0); + return 0; +} + +static void delete_req_chain(struct media_request * const chain) +{ + struct media_request * next = chain; + while (next) { + struct media_request * const req = next; + next = req->next; + if (req->pt) + polltask_delete(&req->pt); + if (req->fd != -1) + close(req->fd); + free(req); + } +} + +struct media_pool * media_pool_new(const char * const media_path, + struct pollqueue * const pq, + const unsigned int n) +{ + struct media_pool * const mp = calloc(1, sizeof(*mp)); + unsigned int i; + + if (!mp) + goto fail0; + + mp->pq = pq; + pthread_mutex_init(&mp->lock, NULL); + mp->fd = open(media_path, O_RDWR | O_NONBLOCK); + if (mp->fd == -1) { + request_log("Failed to open '%s': %s\n", media_path, strerror(errno)); + goto fail1; + } + + for (i = 0; i != n; ++i) { + struct media_request * req = malloc(sizeof(*req)); + if (!req) + goto fail4; + + *req = (struct media_request){ + .next = mp->free_reqs, + .mp = mp, + .fd = -1 + }; + mp->free_reqs = req; + + if (ioctl(mp->fd, MEDIA_IOC_REQUEST_ALLOC, &req->fd) == -1) { + request_log("Failed to alloc request %d: %s\n", i, strerror(errno)); + goto fail4; + } + + req->pt = polltask_new(pq, req->fd, POLLPRI, media_request_done, req); + if (!req->pt) + goto fail4; + } + + sem_init(&mp->sem, 0, n); + + return mp; + +fail4: + delete_req_chain(mp->free_reqs); + close(mp->fd); + pthread_mutex_destroy(&mp->lock); +fail1: + free(mp); +fail0: + return NULL; +} + +void media_pool_delete(struct media_pool ** pMp) +{ + struct media_pool * const mp = *pMp; + + if (!mp) + return; + *pMp = NULL; + + delete_req_chain(mp->free_reqs); + close(mp->fd); + sem_destroy(&mp->sem); + pthread_mutex_destroy(&mp->lock); + free(mp); +} + + +#define INDEX_UNSET (~(uint32_t)0) + +enum qent_status { + QENT_NEW = 0, // Initial state - shouldn't last + QENT_FREE, // On free chain + QENT_PENDING, // User has ent + QENT_WAITING, // On inuse + QENT_DONE, // Frame rx + QENT_ERROR, // Error + QENT_IMPORT +}; + +struct qent_base { + atomic_int ref_count; + struct qent_base *next; + struct qent_base *prev; + enum qent_status status; + enum mediabufs_memory memtype; + uint32_t index; + struct dmabuf_h *dh[VIDEO_MAX_PLANES]; + struct timeval timestamp; +}; + +struct qent_src { + struct qent_base base; + int fixed_size; +}; + +struct qent_dst { + struct qent_base base; + bool waiting; + pthread_mutex_t lock; + pthread_cond_t cond; + struct ff_weak_link_client * mbc_wl; +}; + +struct qe_list_head { + struct qent_base *head; + struct qent_base *tail; +}; + +struct buf_pool { + enum mediabufs_memory memtype; + pthread_mutex_t lock; + sem_t free_sem; + struct qe_list_head free; + struct qe_list_head inuse; +}; + + +static inline struct qent_dst *base_to_dst(struct qent_base *be) +{ + return (struct qent_dst *)be; +} + +static inline struct qent_src *base_to_src(struct qent_base *be) +{ + return (struct qent_src *)be; +} + + +#define QENT_BASE_INITIALIZER(mtype) {\ + .ref_count = ATOMIC_VAR_INIT(0),\ + .status = QENT_NEW,\ + .memtype = (mtype),\ + .index = INDEX_UNSET\ +} + +static void qe_base_uninit(struct qent_base *const be) +{ + unsigned int i; + for (i = 0; i != VIDEO_MAX_PLANES; ++i) { + dmabuf_free(be->dh[i]); + be->dh[i] = NULL; + } +} + +static void qe_src_free(struct qent_src *const be_src) +{ + if (!be_src) + return; + qe_base_uninit(&be_src->base); + free(be_src); +} + +static struct qent_src * qe_src_new(enum mediabufs_memory mtype) +{ + struct qent_src *const be_src = malloc(sizeof(*be_src)); + if (!be_src) + return NULL; + *be_src = (struct qent_src){ + .base = QENT_BASE_INITIALIZER(mtype) + }; + return be_src; +} + +static void qe_dst_free(struct qent_dst *const be_dst) +{ + if (!be_dst) + return; + + ff_weak_link_unref(&be_dst->mbc_wl); + pthread_cond_destroy(&be_dst->cond); + pthread_mutex_destroy(&be_dst->lock); + qe_base_uninit(&be_dst->base); + free(be_dst); +} + +static struct qent_dst* qe_dst_new(struct ff_weak_link_master * const wl, const enum mediabufs_memory memtype) +{ + struct qent_dst *const be_dst = malloc(sizeof(*be_dst)); + if (!be_dst) + return NULL; + *be_dst = (struct qent_dst){ + .base = QENT_BASE_INITIALIZER(memtype), + .lock = PTHREAD_MUTEX_INITIALIZER, + .cond = PTHREAD_COND_INITIALIZER, + .mbc_wl = ff_weak_link_ref(wl) + }; + return be_dst; +} + +static void ql_add_tail(struct qe_list_head * const ql, struct qent_base * be) +{ + if (ql->tail) + ql->tail->next = be; + else + ql->head = be; + be->prev = ql->tail; + be->next = NULL; + ql->tail = be; +} + +static struct qent_base * ql_extract(struct qe_list_head * const ql, struct qent_base * be) +{ + if (!be) + return NULL; + + if (be->next) + be->next->prev = be->prev; + else + ql->tail = be->prev; + if (be->prev) + be->prev->next = be->next; + else + ql->head = be->next; + be->next = NULL; + be->prev = NULL; + return be; +} + + +static void bq_put_free(struct buf_pool *const bp, struct qent_base * be) +{ + ql_add_tail(&bp->free, be); +} + +static struct qent_base * bq_get_free(struct buf_pool *const bp) +{ + return ql_extract(&bp->free, bp->free.head); +} + +static struct qent_base * bq_extract_inuse(struct buf_pool *const bp, struct qent_base *const be) +{ + return ql_extract(&bp->inuse, be); +} + +static struct qent_base * bq_get_inuse(struct buf_pool *const bp) +{ + return ql_extract(&bp->inuse, bp->inuse.head); +} + +static void bq_free_all_free_src(struct buf_pool *const bp) +{ + struct qent_base *be; + while ((be = bq_get_free(bp)) != NULL) + qe_src_free(base_to_src(be)); +} + +static void bq_free_all_inuse_src(struct buf_pool *const bp) +{ + struct qent_base *be; + while ((be = bq_get_inuse(bp)) != NULL) + qe_src_free(base_to_src(be)); +} + +static void bq_free_all_free_dst(struct buf_pool *const bp) +{ + struct qent_base *be; + while ((be = bq_get_free(bp)) != NULL) + qe_dst_free(base_to_dst(be)); +} + +static void queue_put_free(struct buf_pool *const bp, struct qent_base *be) +{ + unsigned int i; + + pthread_mutex_lock(&bp->lock); + /* Clear out state vars */ + be->timestamp.tv_sec = 0; + be->timestamp.tv_usec = 0; + be->status = QENT_FREE; + for (i = 0; i < VIDEO_MAX_PLANES && be->dh[i]; ++i) + dmabuf_len_set(be->dh[i], 0); + bq_put_free(bp, be); + pthread_mutex_unlock(&bp->lock); + sem_post(&bp->free_sem); +} + +static bool queue_is_inuse(const struct buf_pool *const bp) +{ + return bp->inuse.tail != NULL; +} + +static void queue_put_inuse(struct buf_pool *const bp, struct qent_base *be) +{ + if (!be) + return; + pthread_mutex_lock(&bp->lock); + ql_add_tail(&bp->inuse, be); + be->status = QENT_WAITING; + pthread_mutex_unlock(&bp->lock); +} + +static struct qent_base *queue_get_free(struct buf_pool *const bp) +{ + struct qent_base *buf; + + if (do_wait(&bp->free_sem)) + return NULL; + pthread_mutex_lock(&bp->lock); + buf = bq_get_free(bp); + pthread_mutex_unlock(&bp->lock); + return buf; +} + +static struct qent_base *queue_tryget_free(struct buf_pool *const bp) +{ + struct qent_base *buf; + + if (do_trywait(&bp->free_sem)) + return NULL; + pthread_mutex_lock(&bp->lock); + buf = bq_get_free(bp); + pthread_mutex_unlock(&bp->lock); + return buf; +} + +static struct qent_base * queue_find_extract_index(struct buf_pool *const bp, const unsigned int index) +{ + struct qent_base *be; + + pthread_mutex_lock(&bp->lock); + /* Expect 1st in Q, but allow anywhere */ + for (be = bp->inuse.head; be; be = be->next) { + if (be->index == index) { + bq_extract_inuse(bp, be); + break; + } + } + pthread_mutex_unlock(&bp->lock); + + return be; +} + +static void queue_delete(struct buf_pool *const bp) +{ + sem_destroy(&bp->free_sem); + pthread_mutex_destroy(&bp->lock); + free(bp); +} + +static struct buf_pool* queue_new(const int vfd) +{ + struct buf_pool *bp = calloc(1, sizeof(*bp)); + if (!bp) + return NULL; + pthread_mutex_init(&bp->lock, NULL); + sem_init(&bp->free_sem, 0, 0); + return bp; +} + + +struct mediabufs_ctl { + atomic_int ref_count; /* 0 is single ref for easier atomics */ + void * dc; + int vfd; + bool stream_on; + bool polling; + bool dst_fixed; // Dst Q is fixed size + pthread_mutex_t lock; + struct buf_pool * src; + struct buf_pool * dst; + struct polltask * pt; + struct pollqueue * pq; + struct ff_weak_link_master * this_wlm; + + enum mediabufs_memory src_memtype; + enum mediabufs_memory dst_memtype; + struct v4l2_format src_fmt; + struct v4l2_format dst_fmt; + struct v4l2_capability capability; +}; + +static int qe_v4l2_queue(struct qent_base *const be, + const int vfd, struct media_request *const mreq, + const struct v4l2_format *const fmt, + const bool is_dst, const bool hold_flag) +{ + struct v4l2_buffer buffer = { + .type = fmt->type, + .memory = mediabufs_memory_to_v4l2(be->memtype), + .index = be->index + }; + struct v4l2_plane planes[VIDEO_MAX_PLANES] = {{0}}; + + if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) { + unsigned int i; + for (i = 0; i < VIDEO_MAX_PLANES && be->dh[i]; ++i) { + if (is_dst) + dmabuf_len_set(be->dh[i], 0); + + /* *** Really need a pixdesc rather than a format so we can fill in data_offset */ + planes[i].length = dmabuf_size(be->dh[i]); + planes[i].bytesused = dmabuf_len(be->dh[i]); + if (be->memtype == MEDIABUFS_MEMORY_DMABUF) + planes[i].m.fd = dmabuf_fd(be->dh[i]); + else + planes[i].m.mem_offset = 0; + } + buffer.m.planes = planes; + buffer.length = i; + } + else { + if (is_dst) + dmabuf_len_set(be->dh[0], 0); + + buffer.bytesused = dmabuf_len(be->dh[0]); + buffer.length = dmabuf_size(be->dh[0]); + if (be->memtype == MEDIABUFS_MEMORY_DMABUF) + buffer.m.fd = dmabuf_fd(be->dh[0]); + else + buffer.m.offset = 0; + } + + if (!is_dst && mreq) { + buffer.flags |= V4L2_BUF_FLAG_REQUEST_FD; + buffer.request_fd = media_request_fd(mreq); + if (hold_flag) + buffer.flags |= V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF; + } + + if (is_dst) + be->timestamp = (struct timeval){0,0}; + + buffer.timestamp = be->timestamp; + + while (ioctl(vfd, VIDIOC_QBUF, &buffer)) { + const int err = errno; + if (err != EINTR) { + request_log("%s: Failed to Q buffer: err=%d (%s)\n", __func__, err, strerror(err)); + return -err; + } + } + return 0; +} + +static struct qent_base * qe_dequeue(struct buf_pool *const bp, + const int vfd, + const struct v4l2_format * const f) +{ + struct qent_base *be; + int rc; + const bool mp = V4L2_TYPE_IS_MULTIPLANAR(f->type); + struct v4l2_plane planes[VIDEO_MAX_PLANES] = {{0}}; + struct v4l2_buffer buffer = { + .type = f->type, + .memory = mediabufs_memory_to_v4l2(bp->memtype) + }; + if (mp) { + buffer.length = f->fmt.pix_mp.num_planes; + buffer.m.planes = planes; + } + + while ((rc = ioctl(vfd, VIDIOC_DQBUF, &buffer)) != 0 && + errno == EINTR) + /* Loop */; + if (rc) { + request_log("Error DQing buffer type %d: %s\n", f->type, strerror(errno)); + return NULL; + } + + be = queue_find_extract_index(bp, buffer.index); + if (!be) { + request_log("Failed to find index %d in Q\n", buffer.index); + return NULL; + } + + if (mp) { + unsigned int i; + for (i = 0; i != buffer.length; ++i) + dmabuf_len_set(be->dh[i], V4L2_TYPE_IS_CAPTURE(f->type) ? planes[i].bytesused : 0); + } + else + dmabuf_len_set(be->dh[0], V4L2_TYPE_IS_CAPTURE(f->type) ? buffer.length : 0); + + be->timestamp = buffer.timestamp; + be->status = (buffer.flags & V4L2_BUF_FLAG_ERROR) ? QENT_ERROR : QENT_DONE; + return be; +} + +static void qe_dst_done(struct qent_dst * dst_be) +{ + pthread_mutex_lock(&dst_be->lock); + dst_be->waiting = false; + pthread_cond_broadcast(&dst_be->cond); + pthread_mutex_unlock(&dst_be->lock); + + qent_dst_unref(&dst_be); +} + +static bool qe_dst_waiting(struct qent_dst *const dst_be) +{ + bool waiting; + pthread_mutex_lock(&dst_be->lock); + waiting = dst_be->waiting; + dst_be->waiting = true; + pthread_mutex_unlock(&dst_be->lock); + return waiting; +} + + +static bool mediabufs_wants_poll(const struct mediabufs_ctl *const mbc) +{ + return queue_is_inuse(mbc->src) || queue_is_inuse(mbc->dst); +} + +static void mediabufs_poll_cb(void * v, short revents) +{ + struct mediabufs_ctl *mbc = v; + struct qent_src *src_be = NULL; + struct qent_dst *dst_be = NULL; + + if (!revents) + request_err(mbc->dc, "%s: Timeout\n", __func__); + + pthread_mutex_lock(&mbc->lock); + mbc->polling = false; + + if ((revents & POLLOUT) != 0) + src_be = base_to_src(qe_dequeue(mbc->src, mbc->vfd, &mbc->src_fmt)); + if ((revents & POLLIN) != 0) + dst_be = base_to_dst(qe_dequeue(mbc->dst, mbc->vfd, &mbc->dst_fmt)); + + /* Reschedule */ + if (mediabufs_wants_poll(mbc)) { + mbc->polling = true; + pollqueue_add_task(mbc->pt, 2000); + } + pthread_mutex_unlock(&mbc->lock); + + if (src_be) + queue_put_free(mbc->src, &src_be->base); + if (dst_be) + qe_dst_done(dst_be); +} + +int qent_src_params_set(struct qent_src *const be_src, const struct timeval * timestamp) +{ + struct qent_base *const be = &be_src->base; + + be->timestamp = *timestamp; + return 0; +} + +struct timeval qent_dst_timestamp_get(const struct qent_dst *const be_dst) +{ + return be_dst->base.timestamp; +} + +static int qent_base_realloc(struct qent_base *const be, const size_t len, struct dmabufs_ctl * dbsc) +{ + if (!be->dh[0] || len > dmabuf_size(be->dh[0])) { + size_t newsize = round_up_size(len); + request_log("%s: Overrun %zd > %zd; trying %zd\n", __func__, len, dmabuf_size(be->dh[0]), newsize); + if (!dbsc) { + request_log("%s: No dmbabuf_ctrl for realloc\n", __func__); + return -ENOMEM; + } + if ((be->dh[0] = dmabuf_realloc(dbsc, be->dh[0], newsize)) == NULL) { + request_log("%s: Realloc %zd failed\n", __func__, newsize); + return -ENOMEM; + } + } + return 0; +} + +int qent_src_alloc(struct qent_src *const be_src, const size_t len, struct dmabufs_ctl * dbsc) +{ + struct qent_base *const be = &be_src->base; + return qent_base_realloc(be, len, dbsc); +} + + +int qent_src_data_copy(struct qent_src *const be_src, const size_t offset, const void *const src, const size_t len, struct dmabufs_ctl * dbsc) +{ + void * dst; + struct qent_base *const be = &be_src->base; + int rv; + + // Realloc doesn't copy so don't alloc if offset != 0 + if ((rv = qent_base_realloc(be, offset + len, + be_src->fixed_size || offset ? NULL : dbsc)) != 0) + return rv; + + dmabuf_write_start(be->dh[0]); + dst = dmabuf_map(be->dh[0]); + if (!dst) + return -1; + memcpy((char*)dst + offset, src, len); + dmabuf_len_set(be->dh[0], len); + dmabuf_write_end(be->dh[0]); + return 0; +} + +const struct dmabuf_h * qent_dst_dmabuf(const struct qent_dst *const be_dst, unsigned int plane) +{ + const struct qent_base *const be = &be_dst->base; + + return (plane >= sizeof(be->dh)/sizeof(be->dh[0])) ? NULL : be->dh[plane]; +} + +int qent_dst_dup_fd(const struct qent_dst *const be_dst, unsigned int plane) +{ + return dup(dmabuf_fd(qent_dst_dmabuf(be_dst, plane))); +} + +MediaBufsStatus mediabufs_start_request(struct mediabufs_ctl *const mbc, + struct media_request **const pmreq, + struct qent_src **const psrc_be, + struct qent_dst *const dst_be, + const bool is_final) +{ + struct media_request * mreq = *pmreq; + struct qent_src *const src_be = *psrc_be; + + // Req & src are always both "consumed" + *pmreq = NULL; + *psrc_be = NULL; + + pthread_mutex_lock(&mbc->lock); + + if (!src_be) + goto fail1; + + if (dst_be) { + if (qe_dst_waiting(dst_be)) { + request_info(mbc->dc, "Request buffer already waiting on start\n"); + goto fail1; + } + dst_be->base.timestamp = (struct timeval){0,0}; + if (qe_v4l2_queue(&dst_be->base, mbc->vfd, NULL, &mbc->dst_fmt, true, false)) + goto fail1; + + qent_dst_ref(dst_be); + queue_put_inuse(mbc->dst, &dst_be->base); + } + + if (qe_v4l2_queue(&src_be->base, mbc->vfd, mreq, &mbc->src_fmt, false, !is_final)) + goto fail1; + queue_put_inuse(mbc->src, &src_be->base); + + if (!mbc->polling && mediabufs_wants_poll(mbc)) { + mbc->polling = true; + pollqueue_add_task(mbc->pt, 2000); + } + pthread_mutex_unlock(&mbc->lock); + + if (media_request_start(mreq)) + return MEDIABUFS_ERROR_OPERATION_FAILED; + + return MEDIABUFS_STATUS_SUCCESS; + +fail1: + media_request_abort(&mreq); + if (src_be) + queue_put_free(mbc->src, &src_be->base); + +// *** TODO: If src Q fails this doesnt unwind properly - separate dst Q from src Q + if (dst_be) { + dst_be->base.status = QENT_ERROR; + qe_dst_done(dst_be); + } + pthread_mutex_unlock(&mbc->lock); + return MEDIABUFS_ERROR_OPERATION_FAILED; +} + + +static int qe_alloc_from_fmt(struct qent_base *const be, + struct dmabufs_ctl *const dbsc, + const struct v4l2_format *const fmt) +{ + if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) { + unsigned int i; + for (i = 0; i != fmt->fmt.pix_mp.num_planes; ++i) { + be->dh[i] = dmabuf_realloc(dbsc, be->dh[i], + fmt->fmt.pix_mp.plane_fmt[i].sizeimage); + /* On failure tidy up and die */ + if (!be->dh[i]) { + while (i--) { + dmabuf_free(be->dh[i]); + be->dh[i] = NULL; + } + return -1; + } + } + } + else { +// be->dh[0] = dmabuf_alloc(dbsc, fmt->fmt.pix.sizeimage); + size_t size = fmt->fmt.pix.sizeimage; + be->dh[0] = dmabuf_realloc(dbsc, be->dh[0], size); + if (!be->dh[0]) + return -1; + } + return 0; +} + +static MediaBufsStatus fmt_set(struct v4l2_format *const fmt, const int fd, + const enum v4l2_buf_type buftype, + uint32_t pixfmt, + const unsigned int width, const unsigned int height, + const size_t bufsize) +{ + *fmt = (struct v4l2_format){.type = buftype}; + + if (V4L2_TYPE_IS_MULTIPLANAR(buftype)) { + fmt->fmt.pix_mp.width = width; + fmt->fmt.pix_mp.height = height; + fmt->fmt.pix_mp.pixelformat = pixfmt; + if (bufsize) { + fmt->fmt.pix_mp.num_planes = 1; + fmt->fmt.pix_mp.plane_fmt[0].sizeimage = bufsize; + } + } + else { + fmt->fmt.pix.width = width; + fmt->fmt.pix.height = height; + fmt->fmt.pix.pixelformat = pixfmt; + fmt->fmt.pix.sizeimage = bufsize; + } + + while (ioctl(fd, VIDIOC_S_FMT, fmt)) + if (errno != EINTR) + return MEDIABUFS_ERROR_OPERATION_FAILED; + + // Treat anything where we don't get at least what we asked for as a fail + if (V4L2_TYPE_IS_MULTIPLANAR(buftype)) { + if (fmt->fmt.pix_mp.width < width || + fmt->fmt.pix_mp.height < height || + fmt->fmt.pix_mp.pixelformat != pixfmt) { + return MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE; + } + } + else { + if (fmt->fmt.pix.width < width || + fmt->fmt.pix.height < height || + fmt->fmt.pix.pixelformat != pixfmt) { + return MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE; + } + } + + return MEDIABUFS_STATUS_SUCCESS; +} + +static MediaBufsStatus find_fmt_flags(struct v4l2_format *const fmt, + const int fd, + const unsigned int type_v4l2, + const uint32_t flags_must, + const uint32_t flags_not, + const unsigned int width, + const unsigned int height, + mediabufs_dst_fmt_accept_fn *const accept_fn, + void *const accept_v) +{ + unsigned int i; + + for (i = 0;; ++i) { + struct v4l2_fmtdesc fmtdesc = { + .index = i, + .type = type_v4l2 + }; + while (ioctl(fd, VIDIOC_ENUM_FMT, &fmtdesc)) { + if (errno != EINTR) + return MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE; + } + if ((fmtdesc.flags & flags_must) != flags_must || + (fmtdesc.flags & flags_not)) + continue; + if (!accept_fn(accept_v, &fmtdesc)) + continue; + + if (fmt_set(fmt, fd, fmtdesc.type, fmtdesc.pixelformat, + width, height, 0) == MEDIABUFS_STATUS_SUCCESS) + return MEDIABUFS_STATUS_SUCCESS; + } + return 0; +} + + +/* Wait for qent done */ + +MediaBufsStatus qent_dst_wait(struct qent_dst *const be_dst) +{ + struct qent_base *const be = &be_dst->base; + enum qent_status estat; + + pthread_mutex_lock(&be_dst->lock); + while (be_dst->waiting && + !pthread_cond_wait(&be_dst->cond, &be_dst->lock)) + /* Loop */; + estat = be->status; + pthread_mutex_unlock(&be_dst->lock); + + return estat == QENT_DONE ? MEDIABUFS_STATUS_SUCCESS : + estat == QENT_ERROR ? MEDIABUFS_ERROR_DECODING_ERROR : + MEDIABUFS_ERROR_OPERATION_FAILED; +} + +const uint8_t * qent_dst_data(struct qent_dst *const be_dst, unsigned int buf_no) +{ + struct qent_base *const be = &be_dst->base; + return dmabuf_map(be->dh[buf_no]); +} + +MediaBufsStatus qent_dst_read_start(struct qent_dst *const be_dst) +{ + struct qent_base *const be = &be_dst->base; + unsigned int i; + for (i = 0; i != VIDEO_MAX_PLANES && be->dh[i]; ++i) { + if (dmabuf_read_start(be->dh[i])) { + while (i--) + dmabuf_read_end(be->dh[i]); + return MEDIABUFS_ERROR_ALLOCATION_FAILED; + } + } + return MEDIABUFS_STATUS_SUCCESS; +} + +MediaBufsStatus qent_dst_read_stop(struct qent_dst *const be_dst) +{ + struct qent_base *const be = &be_dst->base; + unsigned int i; + MediaBufsStatus status = MEDIABUFS_STATUS_SUCCESS; + + for (i = 0; i != VIDEO_MAX_PLANES && be->dh[i]; ++i) { + if (dmabuf_read_end(be->dh[i])) + status = MEDIABUFS_ERROR_OPERATION_FAILED; + } + return status; +} + +struct qent_dst * qent_dst_ref(struct qent_dst * const be_dst) +{ + if (be_dst) + atomic_fetch_add(&be_dst->base.ref_count, 1); + return be_dst; +} + +void qent_dst_unref(struct qent_dst ** const pbe_dst) +{ + struct qent_dst * const be_dst = *pbe_dst; + struct mediabufs_ctl * mbc; + if (!be_dst) + return; + *pbe_dst = NULL; + + if (atomic_fetch_sub(&be_dst->base.ref_count, 1) != 0) + return; + + if ((mbc = ff_weak_link_lock(&be_dst->mbc_wl)) != NULL) { + queue_put_free(mbc->dst, &be_dst->base); + ff_weak_link_unlock(be_dst->mbc_wl); + } + else { + qe_dst_free(be_dst); + } +} + +MediaBufsStatus qent_dst_import_fd(struct qent_dst *const be_dst, + unsigned int plane, + int fd, size_t size) +{ + struct qent_base *const be = &be_dst->base; + struct dmabuf_h * dh; + + if (be->status != QENT_IMPORT || be->dh[plane]) + return MEDIABUFS_ERROR_OPERATION_FAILED; + + dh = dmabuf_import(fd, size); + if (!dh) + return MEDIABUFS_ERROR_ALLOCATION_FAILED; + + be->dh[plane] = dh; + return MEDIABUFS_STATUS_SUCCESS; +} + +// Returns noof buffers created, -ve for error +static int create_dst_bufs(struct mediabufs_ctl *const mbc, unsigned int n, struct qent_dst * const qes[]) +{ + unsigned int i; + + struct v4l2_create_buffers cbuf = { + .count = n, + .memory = mediabufs_memory_to_v4l2(mbc->dst->memtype), + .format = mbc->dst_fmt, + }; + + while (ioctl(mbc->vfd, VIDIOC_CREATE_BUFS, &cbuf)) { + const int err = -errno; + if (err != EINTR) { + request_err(mbc->dc, "%s: Failed to create V4L2 buffer\n", __func__); + return -err; + } + } + + if (cbuf.count != n) + request_warn(mbc->dc, "%s: Created %d of %d V4L2 buffers requested\n", __func__, cbuf.count, n); + + for (i = 0; i != cbuf.count; ++i) + qes[i]->base.index = cbuf.index + i; + + return cbuf.count; +} + +static MediaBufsStatus +qe_import_from_buf(struct mediabufs_ctl *const mbc, struct qent_base * const be, const struct v4l2_format *const fmt, + const unsigned int n, const bool x_dmabuf) +{ + struct v4l2_buffer buf = { + .index = n, + .type = fmt->type, + }; + struct v4l2_plane planes[VIDEO_MAX_PLANES]; + int ret; + + if (be->dh[0]) + return 0; + + if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) { + memset(planes, 0, sizeof(planes)); + buf.m.planes = planes; + buf.length = VIDEO_MAX_PLANES; + } + + if ((ret = ioctl(mbc->vfd, VIDIOC_QUERYBUF, &buf)) != 0) { + request_err(mbc->dc, "VIDIOC_QUERYBUF failed"); + return MEDIABUFS_ERROR_OPERATION_FAILED; + } + + if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) + { + unsigned int i; + for (i = 0; i != buf.length; ++i) { + if (x_dmabuf) { + struct v4l2_exportbuffer xbuf = { + .type = buf.type, + .index = buf.index, + .plane = i, + .flags = O_RDWR, // *** Arguably O_RDONLY would be fine + }; + if (ioctl(mbc->vfd, VIDIOC_EXPBUF, &xbuf) == 0) + be->dh[i] = dmabuf_import(xbuf.fd, planes[i].length); + } + else { + be->dh[i] = dmabuf_import_mmap( + mmap(NULL, planes[i].length, + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, + mbc->vfd, planes[i].m.mem_offset), + planes[i].length); + } + /* On failure tidy up and die */ + if (!be->dh[i]) { + while (i--) { + dmabuf_free(be->dh[i]); + be->dh[i] = NULL; + } + return MEDIABUFS_ERROR_OPERATION_FAILED; + } + } + } + else + { + if (x_dmabuf) { + struct v4l2_exportbuffer xbuf = { + .type = buf.type, + .index = buf.index, + .flags = O_RDWR, // *** Arguably O_RDONLY would be fine + }; + if (ioctl(mbc->vfd, VIDIOC_EXPBUF, &xbuf) == 0) + be->dh[0] = dmabuf_import(xbuf.fd, buf.length); + } + else { + be->dh[0] = dmabuf_import_mmap( + mmap(NULL, buf.length, + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, + mbc->vfd, buf.m.offset), + buf.length); + } + /* On failure tidy up and die */ + if (!be->dh[0]) { + return MEDIABUFS_ERROR_OPERATION_FAILED; + } + } + + return 0; +} + +struct qent_dst* mediabufs_dst_qent_alloc(struct mediabufs_ctl *const mbc, struct dmabufs_ctl *const dbsc) +{ + struct qent_dst * be_dst; + + if (mbc == NULL) { + be_dst = qe_dst_new(NULL, MEDIABUFS_MEMORY_DMABUF); + if (be_dst) + be_dst->base.status = QENT_IMPORT; + return be_dst; + } + + if (mbc->dst_fixed) { + be_dst = base_to_dst(queue_get_free(mbc->dst)); + if (!be_dst) + return NULL; + } + else { + be_dst = base_to_dst(queue_tryget_free(mbc->dst)); + if (!be_dst) { + be_dst = qe_dst_new(mbc->this_wlm, mbc->dst->memtype); + if (!be_dst) + return NULL; + + if (create_dst_bufs(mbc, 1, &be_dst) != 1) { + qe_dst_free(be_dst); + return NULL; + } + } + } + + if (mbc->dst->memtype == MEDIABUFS_MEMORY_MMAP) { + if (qe_import_from_buf(mbc, &be_dst->base, &mbc->dst_fmt, be_dst->base.index, true)) { + request_err(mbc->dc, "Failed to export as dmabuf\n"); + queue_put_free(mbc->dst, &be_dst->base); + return NULL; + } + } + else { + if (qe_alloc_from_fmt(&be_dst->base, dbsc, &mbc->dst_fmt)) { + /* Given how create buf works we can't uncreate it on alloc failure + * all we can do is put it on the free Q + */ + queue_put_free(mbc->dst, &be_dst->base); + return NULL; + } + } + + be_dst->base.status = QENT_PENDING; + atomic_store(&be_dst->base.ref_count, 0); + return be_dst; +} + +const struct v4l2_format *mediabufs_dst_fmt(struct mediabufs_ctl *const mbc) +{ + return &mbc->dst_fmt; +} + +MediaBufsStatus mediabufs_dst_fmt_set(struct mediabufs_ctl *const mbc, + const unsigned int width, + const unsigned int height, + mediabufs_dst_fmt_accept_fn *const accept_fn, + void *const accept_v) +{ + MediaBufsStatus status; + unsigned int i; + const enum v4l2_buf_type buf_type = mbc->dst_fmt.type; + static const struct { + unsigned int flags_must; + unsigned int flags_not; + } trys[] = { + {0, V4L2_FMT_FLAG_EMULATED}, + {V4L2_FMT_FLAG_EMULATED, 0}, + }; + for (i = 0; i != sizeof(trys)/sizeof(trys[0]); ++i) { + status = find_fmt_flags(&mbc->dst_fmt, mbc->vfd, + buf_type, + trys[i].flags_must, + trys[i].flags_not, + width, height, accept_fn, accept_v); + if (status != MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE) + return status; + } + + if (status != MEDIABUFS_STATUS_SUCCESS) + return status; + + /* Try to create a buffer - don't alloc */ + return status; +} + +// ** This is a mess if we get partial alloc but without any way to remove +// individual V4L2 Q members we are somewhat stuffed +MediaBufsStatus mediabufs_dst_slots_create(struct mediabufs_ctl *const mbc, const unsigned int n, const bool fixed, const enum mediabufs_memory memtype) +{ + unsigned int i; + int a = 0; + unsigned int qc; + struct qent_dst * qes[32]; + + if (n > 32) + return MEDIABUFS_ERROR_ALLOCATION_FAILED; + + mbc->dst->memtype = memtype; + + // Create qents first as it is hard to get rid of the V4L2 buffers on error + for (qc = 0; qc != n; ++qc) + { + if ((qes[qc] = qe_dst_new(mbc->this_wlm, mbc->dst->memtype)) == NULL) + goto fail; + } + + if ((a = create_dst_bufs(mbc, n, qes)) < 0) + goto fail; + + for (i = 0; i != a; ++i) + queue_put_free(mbc->dst, &qes[i]->base); + + if (a != n) + goto fail; + + mbc->dst_fixed = fixed; + return MEDIABUFS_STATUS_SUCCESS; + +fail: + for (i = (a < 0 ? 0 : a); i != qc; ++i) + qe_dst_free(qes[i]); + + return MEDIABUFS_ERROR_ALLOCATION_FAILED; +} + +struct qent_src *mediabufs_src_qent_get(struct mediabufs_ctl *const mbc) +{ + struct qent_base * buf = queue_get_free(mbc->src); + buf->status = QENT_PENDING; + return base_to_src(buf); +} + +void mediabufs_src_qent_abort(struct mediabufs_ctl *const mbc, struct qent_src **const pqe_src) +{ + struct qent_src *const qe_src = *pqe_src; + if (!qe_src) + return; + *pqe_src = NULL; + queue_put_free(mbc->src, &qe_src->base); +} + +static MediaBufsStatus +chk_memory_type(struct mediabufs_ctl *const mbc, + const struct v4l2_format * const f, + const enum mediabufs_memory m) +{ + struct v4l2_create_buffers cbuf = { + .count = 0, + .memory = V4L2_MEMORY_MMAP, + .format = *f + }; + + if (ioctl(mbc->vfd, VIDIOC_CREATE_BUFS, &cbuf) != 0) + return MEDIABUFS_ERROR_OPERATION_FAILED; + + switch (m) { + case MEDIABUFS_MEMORY_DMABUF: + // 0 = Unknown but assume not in that case + if ((cbuf.capabilities & V4L2_BUF_CAP_SUPPORTS_DMABUF) == 0) + return MEDIABUFS_ERROR_UNSUPPORTED_MEMORY; + break; + case MEDIABUFS_MEMORY_MMAP: + break; + default: + return MEDIABUFS_ERROR_UNSUPPORTED_MEMORY; + } + + return MEDIABUFS_STATUS_SUCCESS; +} + +MediaBufsStatus +mediabufs_src_chk_memtype(struct mediabufs_ctl *const mbc, const enum mediabufs_memory memtype) +{ + return chk_memory_type(mbc, &mbc->src_fmt, memtype); +} + +MediaBufsStatus +mediabufs_dst_chk_memtype(struct mediabufs_ctl *const mbc, const enum mediabufs_memory memtype) +{ + return chk_memory_type(mbc, &mbc->dst_fmt, memtype); +} + +/* src format must have been set up before this */ +MediaBufsStatus mediabufs_src_pool_create(struct mediabufs_ctl *const mbc, + struct dmabufs_ctl * const dbsc, + unsigned int n, const enum mediabufs_memory memtype) +{ + unsigned int i; + struct v4l2_requestbuffers req = { + .count = n, + .type = mbc->src_fmt.type, + .memory = mediabufs_memory_to_v4l2(memtype) + }; + + bq_free_all_free_src(mbc->src); + + while (ioctl(mbc->vfd, VIDIOC_REQBUFS, &req) == -1) { + if (errno != EINTR) { + request_err(mbc->dc, "%s: Failed to request src bufs\n", __func__); + return MEDIABUFS_ERROR_OPERATION_FAILED; + } + } + + if (n > req.count) { + request_info(mbc->dc, "Only allocated %d of %d src buffers requested\n", req.count, n); + n = req.count; + } + + for (i = 0; i != n; ++i) { + struct qent_src *const be_src = qe_src_new(memtype); + if (!be_src) { + request_err(mbc->dc, "Failed to create src be %d\n", i); + goto fail; + } + switch (memtype) { + case MEDIABUFS_MEMORY_MMAP: + if (qe_import_from_buf(mbc, &be_src->base, &mbc->src_fmt, i, false)) { + qe_src_free(be_src); + goto fail; + } + be_src->fixed_size = 1; + break; + case MEDIABUFS_MEMORY_DMABUF: + if (qe_alloc_from_fmt(&be_src->base, dbsc, &mbc->src_fmt)) { + qe_src_free(be_src); + goto fail; + } + be_src->fixed_size = !mediabufs_src_resizable(mbc); + break; + default: + request_err(mbc->dc, "Unexpected memorty type\n"); + goto fail; + } + be_src->base.index = i; + + queue_put_free(mbc->src, &be_src->base); + } + + mbc->src->memtype = memtype; + return MEDIABUFS_STATUS_SUCCESS; + +fail: + bq_free_all_free_src(mbc->src); + req.count = 0; + while (ioctl(mbc->vfd, VIDIOC_REQBUFS, &req) == -1 && + errno == EINTR) + /* Loop */; + + return MEDIABUFS_ERROR_OPERATION_FAILED; +} + + + +/* + * Set stuff order: + * Set src fmt + * Set parameters (sps) on vfd + * Negotiate dst format (dst_fmt_set) + * Create src buffers + * Alloc a dst buffer or Create dst slots +*/ +MediaBufsStatus mediabufs_stream_on(struct mediabufs_ctl *const mbc) +{ + if (mbc->stream_on) + return MEDIABUFS_STATUS_SUCCESS; + + if (set_stream(mbc->vfd, mbc->src_fmt.type, true) < 0) { + request_log("Failed to set stream on src type %d\n", mbc->src_fmt.type); + return MEDIABUFS_ERROR_OPERATION_FAILED; + } + + if (set_stream(mbc->vfd, mbc->dst_fmt.type, true) < 0) { + request_log("Failed to set stream on dst type %d\n", mbc->dst_fmt.type); + set_stream(mbc->vfd, mbc->src_fmt.type, false); + return MEDIABUFS_ERROR_OPERATION_FAILED; + } + + mbc->stream_on = true; + return MEDIABUFS_STATUS_SUCCESS; +} + +MediaBufsStatus mediabufs_stream_off(struct mediabufs_ctl *const mbc) +{ + MediaBufsStatus status = MEDIABUFS_STATUS_SUCCESS; + + if (!mbc->stream_on) + return MEDIABUFS_STATUS_SUCCESS; + + if (set_stream(mbc->vfd, mbc->dst_fmt.type, false) < 0) { + request_log("Failed to set stream off dst type %d\n", mbc->dst_fmt.type); + status = MEDIABUFS_ERROR_OPERATION_FAILED; + } + + if (set_stream(mbc->vfd, mbc->src_fmt.type, false) < 0) { + request_log("Failed to set stream off src type %d\n", mbc->src_fmt.type); + status = MEDIABUFS_ERROR_OPERATION_FAILED; + } + + mbc->stream_on = false; + return status; +} + +int mediabufs_ctl_set_ext_ctrls(struct mediabufs_ctl * mbc, struct media_request * const mreq, struct v4l2_ext_control control_array[], unsigned int n) +{ + struct v4l2_ext_controls controls = { + .controls = control_array, + .count = n + }; + + if (mreq) { + controls.which = V4L2_CTRL_WHICH_REQUEST_VAL; + controls.request_fd = media_request_fd(mreq); + } + + while (ioctl(mbc->vfd, VIDIOC_S_EXT_CTRLS, &controls)) + { + const int err = errno; + if (err != EINTR) { + request_err(mbc->dc, "Unable to set controls: %s\n", strerror(err)); + return -err; + } + } + + return 0; +} + +MediaBufsStatus mediabufs_set_ext_ctrl(struct mediabufs_ctl *const mbc, + struct media_request * const mreq, + unsigned int id, void *data, + unsigned int size) +{ + struct v4l2_ext_control control = { + .id = id, + .ptr = data, + .size = size + }; + + int rv = mediabufs_ctl_set_ext_ctrls(mbc, mreq, &control, 1); + return !rv ? MEDIABUFS_STATUS_SUCCESS : MEDIABUFS_ERROR_OPERATION_FAILED; +} + +MediaBufsStatus mediabufs_src_fmt_set(struct mediabufs_ctl *const mbc, + enum v4l2_buf_type buf_type, + const uint32_t pixfmt, + const uint32_t width, const uint32_t height, + const size_t bufsize) +{ + MediaBufsStatus rv = fmt_set(&mbc->src_fmt, mbc->vfd, buf_type, pixfmt, width, height, bufsize); + if (rv != MEDIABUFS_STATUS_SUCCESS) + request_err(mbc->dc, "Failed to set src buftype %d, format %#x %dx%d\n", buf_type, pixfmt, width, height); + + return rv; +} + +int mediabufs_ctl_query_ext_ctrls(struct mediabufs_ctl * mbc, struct v4l2_query_ext_ctrl ctrls[], unsigned int n) +{ + int rv = 0; + while (n--) { + while (ioctl(mbc->vfd, VIDIOC_QUERY_EXT_CTRL, ctrls)) { + const int err = errno; + if (err != EINTR) { + // Often used for probing - errors are to be expected + request_debug(mbc->dc, "Failed to query ext id=%#x, err=%d\n", ctrls->id, err); + ctrls->type = 0; // 0 is invalid + rv = -err; + break; + } + } + ++ctrls; + } + return rv; +} + +int mediabufs_src_resizable(const struct mediabufs_ctl *const mbc) +{ +#if 1 + return 0; +#else + // Single planar OUTPUT can only take exact size buffers + // Multiplanar will take larger than negotiated + return V4L2_TYPE_IS_MULTIPLANAR(mbc->src_fmt.type); +#endif +} + +static void mediabufs_ctl_delete(struct mediabufs_ctl *const mbc) +{ + if (!mbc) + return; + + // Break the weak link first + ff_weak_link_break(&mbc->this_wlm); + + polltask_delete(&mbc->pt); + + mediabufs_stream_off(mbc); + + // Empty v4l2 buffer stash + request_buffers(mbc->vfd, mbc->src_fmt.type, V4L2_MEMORY_MMAP, 0); + request_buffers(mbc->vfd, mbc->dst_fmt.type, V4L2_MEMORY_MMAP, 0); + + bq_free_all_free_src(mbc->src); + bq_free_all_inuse_src(mbc->src); + bq_free_all_free_dst(mbc->dst); + + { + struct qent_dst *dst_be; + while ((dst_be = base_to_dst(bq_get_inuse(mbc->dst))) != NULL) { + dst_be->base.timestamp = (struct timeval){0}; + dst_be->base.status = QENT_ERROR; + qe_dst_done(dst_be); + } + } + + queue_delete(mbc->dst); + queue_delete(mbc->src); + close(mbc->vfd); + pthread_mutex_destroy(&mbc->lock); + + free(mbc); +} + +struct mediabufs_ctl * mediabufs_ctl_ref(struct mediabufs_ctl *const mbc) +{ + atomic_fetch_add(&mbc->ref_count, 1); + return mbc; +} + +void mediabufs_ctl_unref(struct mediabufs_ctl **const pmbc) +{ + struct mediabufs_ctl *const mbc = *pmbc; + int n; + + if (!mbc) + return; + *pmbc = NULL; + n = atomic_fetch_sub(&mbc->ref_count, 1); + if (n) + return; + mediabufs_ctl_delete(mbc); +} + +unsigned int mediabufs_ctl_driver_version(struct mediabufs_ctl *const mbc) +{ + return mbc->capability.version; +} + +static int set_capabilities(struct mediabufs_ctl *const mbc) +{ + uint32_t caps; + + if (ioctl(mbc->vfd, VIDIOC_QUERYCAP, &mbc->capability)) { + int err = errno; + request_err(mbc->dc, "Failed to get capabilities: %s\n", strerror(err)); + return -err; + } + + caps = (mbc->capability.capabilities & V4L2_CAP_DEVICE_CAPS) != 0 ? + mbc->capability.device_caps : + mbc->capability.capabilities; + + if ((caps & V4L2_CAP_VIDEO_M2M_MPLANE) != 0) { + mbc->src_fmt.type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE; + mbc->dst_fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE; + } + else if ((caps & V4L2_CAP_VIDEO_M2M) != 0) { + mbc->src_fmt.type = V4L2_BUF_TYPE_VIDEO_OUTPUT; + mbc->dst_fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; + } + else { + request_err(mbc->dc, "No M2M capabilities (%#x)\n", caps); + return -EINVAL; + } + + return 0; +} + +/* One of these per context */ +struct mediabufs_ctl * mediabufs_ctl_new(void * const dc, const char * vpath, struct pollqueue *const pq) +{ + struct mediabufs_ctl *const mbc = calloc(1, sizeof(*mbc)); + + if (!mbc) + return NULL; + + mbc->dc = dc; + // Default mono planar + mbc->pq = pq; + pthread_mutex_init(&mbc->lock, NULL); + + /* Pick a default - could we scan for this? */ + if (vpath == NULL) + vpath = "/dev/media0"; + + while ((mbc->vfd = open(vpath, O_RDWR)) == -1) + { + const int err = errno; + if (err != EINTR) { + request_err(dc, "Failed to open video dev '%s': %s\n", vpath, strerror(err)); + goto fail0; + } + } + + if (set_capabilities(mbc)) { + request_err(dc, "Bad capabilities for video dev '%s'\n", vpath); + goto fail1; + } + + mbc->src = queue_new(mbc->vfd); + if (!mbc->src) + goto fail1; + mbc->dst = queue_new(mbc->vfd); + if (!mbc->dst) + goto fail2; + mbc->pt = polltask_new(pq, mbc->vfd, POLLIN | POLLOUT, mediabufs_poll_cb, mbc); + if (!mbc->pt) + goto fail3; + mbc->this_wlm = ff_weak_link_new(mbc); + if (!mbc->this_wlm) + goto fail4; + + /* Cannot add polltask now - polling with nothing pending + * generates infinite error polls + */ + return mbc; + +fail4: + polltask_delete(&mbc->pt); +fail3: + queue_delete(mbc->dst); +fail2: + queue_delete(mbc->src); +fail1: + close(mbc->vfd); +fail0: + free(mbc); + request_info(dc, "%s: FAILED\n", __func__); + return NULL; +} + + + diff --git a/libavcodec/v4l2_req_media.h b/libavcodec/v4l2_req_media.h new file mode 100644 index 0000000000..890947b2e2 --- /dev/null +++ b/libavcodec/v4l2_req_media.h @@ -0,0 +1,171 @@ +/* +e.h +* + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _MEDIA_H_ +#define _MEDIA_H_ + +#include +#include + +struct v4l2_format; +struct v4l2_fmtdesc; +struct v4l2_query_ext_ctrl; + +struct pollqueue; +struct media_request; +struct media_pool; + +typedef enum media_buf_status { + MEDIABUFS_STATUS_SUCCESS = 0, + MEDIABUFS_ERROR_OPERATION_FAILED, + MEDIABUFS_ERROR_DECODING_ERROR, + MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE, + MEDIABUFS_ERROR_UNSUPPORTED_RT_FORMAT, + MEDIABUFS_ERROR_ALLOCATION_FAILED, + MEDIABUFS_ERROR_UNSUPPORTED_MEMORY, +} MediaBufsStatus; + +struct media_pool * media_pool_new(const char * const media_path, + struct pollqueue * const pq, + const unsigned int n); +void media_pool_delete(struct media_pool ** pmp); + +// Obtain a media request +// Will block if none availible - has a 2sec timeout +struct media_request * media_request_get(struct media_pool * const mp); +int media_request_fd(const struct media_request * const req); + +// Start this request +// Request structure is returned to pool once done +int media_request_start(struct media_request * const req); + +// Return an *unstarted* media_request to the pool +// May later be upgraded to allow for aborting a started req +int media_request_abort(struct media_request ** const preq); + + +struct mediabufs_ctl; +struct qent_src; +struct qent_dst; +struct dmabuf_h; +struct dmabufs_ctl; + +// 1-1 mammping to V4L2 type - just defined separetely to avoid some include versioning difficulties +enum mediabufs_memory { + MEDIABUFS_MEMORY_UNSET = 0, + MEDIABUFS_MEMORY_MMAP = 1, + MEDIABUFS_MEMORY_USERPTR = 2, + MEDIABUFS_MEMORY_OVERLAY = 3, + MEDIABUFS_MEMORY_DMABUF = 4, +}; + +int qent_src_params_set(struct qent_src *const be, const struct timeval * timestamp); +struct timeval qent_dst_timestamp_get(const struct qent_dst *const be_dst); + +// prealloc +int qent_src_alloc(struct qent_src *const be_src, const size_t len, struct dmabufs_ctl * dbsc); +// dbsc may be NULL if realloc not required +int qent_src_data_copy(struct qent_src *const be_src, const size_t offset, const void *const src, const size_t len, struct dmabufs_ctl * dbsc); +const struct dmabuf_h * qent_dst_dmabuf(const struct qent_dst *const be, unsigned int plane); +int qent_dst_dup_fd(const struct qent_dst *const be, unsigned int plane); +MediaBufsStatus qent_dst_wait(struct qent_dst *const be); +void qent_dst_delete(struct qent_dst *const be); +// Returns a qent_dst to its mbc free Q or deletes it if the mbc is dead +void qent_dst_unref(struct qent_dst ** const pbe_dst); +struct qent_dst * qent_dst_ref(struct qent_dst * const be_dst); + +const uint8_t * qent_dst_data(struct qent_dst *const be, unsigned int buf_no); +MediaBufsStatus qent_dst_read_start(struct qent_dst *const be); +MediaBufsStatus qent_dst_read_stop(struct qent_dst *const be); +/* Import an fd unattached to any mediabuf */ +MediaBufsStatus qent_dst_import_fd(struct qent_dst *const be_dst, + unsigned int plane, + int fd, size_t size); + +const char * mediabufs_memory_name(const enum mediabufs_memory m); + +MediaBufsStatus mediabufs_start_request(struct mediabufs_ctl *const mbc, + struct media_request **const pmreq, + struct qent_src **const psrc_be, + struct qent_dst *const dst_be, + const bool is_final); +// Get / alloc a dst buffer & associate with a slot +// If the dst pool is empty then behaviour depends on the fixed flag passed to +// dst_slots_create. Default is !fixed = unlimited alloc +struct qent_dst* mediabufs_dst_qent_alloc(struct mediabufs_ctl *const mbc, + struct dmabufs_ctl *const dbsc); +// Create dst slots without alloc +// If fixed true then qent_alloc will only get slots from this pool and will +// block until a qent has been unrefed +MediaBufsStatus mediabufs_dst_slots_create(struct mediabufs_ctl *const mbc, const unsigned int n, const bool fixed, const enum mediabufs_memory memtype); + +MediaBufsStatus mediabufs_stream_on(struct mediabufs_ctl *const mbc); +MediaBufsStatus mediabufs_stream_off(struct mediabufs_ctl *const mbc); +const struct v4l2_format *mediabufs_dst_fmt(struct mediabufs_ctl *const mbc); + +typedef int mediabufs_dst_fmt_accept_fn(void * v, const struct v4l2_fmtdesc *fmtdesc); + +MediaBufsStatus mediabufs_dst_fmt_set(struct mediabufs_ctl *const mbc, + const unsigned int width, + const unsigned int height, + mediabufs_dst_fmt_accept_fn *const accept_fn, + void *const accept_v); +struct qent_src *mediabufs_src_qent_get(struct mediabufs_ctl *const mbc); +void mediabufs_src_qent_abort(struct mediabufs_ctl *const mbc, struct qent_src **const pqe_src); + +int mediabufs_ctl_set_ext_ctrls(struct mediabufs_ctl * mbc, struct media_request * const mreq, + struct v4l2_ext_control control_array[], unsigned int n); +MediaBufsStatus mediabufs_set_ext_ctrl(struct mediabufs_ctl *const mbc, + struct media_request * const mreq, + unsigned int id, void *data, + unsigned int size); +int mediabufs_ctl_query_ext_ctrls(struct mediabufs_ctl * mbc, struct v4l2_query_ext_ctrl ctrls[], unsigned int n); + +int mediabufs_src_resizable(const struct mediabufs_ctl *const mbc); + +MediaBufsStatus mediabufs_src_fmt_set(struct mediabufs_ctl *const mbc, + enum v4l2_buf_type buf_type, + const uint32_t pixfmt, + const uint32_t width, const uint32_t height, + const size_t bufsize); + +MediaBufsStatus mediabufs_src_pool_create(struct mediabufs_ctl *const rw, + struct dmabufs_ctl * const dbsc, + unsigned int n, + const enum mediabufs_memory memtype); + +// Want to have appropriate formats set first +MediaBufsStatus mediabufs_src_chk_memtype(struct mediabufs_ctl *const mbc, const enum mediabufs_memory memtype); +MediaBufsStatus mediabufs_dst_chk_memtype(struct mediabufs_ctl *const mbc, const enum mediabufs_memory memtype); + +#define MEDIABUFS_DRIVER_VERSION(a, b, c) (((a) << 16) | ((b) << 8) | (c)) +unsigned int mediabufs_ctl_driver_version(struct mediabufs_ctl *const mbc); + +struct mediabufs_ctl * mediabufs_ctl_new(void * const dc, + const char *vpath, struct pollqueue *const pq); +void mediabufs_ctl_unref(struct mediabufs_ctl **const pmbc); +struct mediabufs_ctl * mediabufs_ctl_ref(struct mediabufs_ctl *const mbc); + + +#endif diff --git a/libavcodec/v4l2_req_pollqueue.c b/libavcodec/v4l2_req_pollqueue.c new file mode 100644 index 0000000000..cc8a5d4001 --- /dev/null +++ b/libavcodec/v4l2_req_pollqueue.c @@ -0,0 +1,361 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "v4l2_req_pollqueue.h" +#include "v4l2_req_utils.h" + + +struct pollqueue; + +enum polltask_state { + POLLTASK_UNQUEUED = 0, + POLLTASK_QUEUED, + POLLTASK_RUNNING, + POLLTASK_Q_KILL, + POLLTASK_RUN_KILL, +}; + +struct polltask { + struct polltask *next; + struct polltask *prev; + struct pollqueue *q; + enum polltask_state state; + + int fd; + short events; + + void (*fn)(void *v, short revents); + void * v; + + uint64_t timeout; /* CLOCK_MONOTONIC time, 0 => never */ + sem_t kill_sem; +}; + +struct pollqueue { + atomic_int ref_count; + pthread_mutex_t lock; + + struct polltask *head; + struct polltask *tail; + + bool kill; + bool no_prod; + int prod_fd; + struct polltask *prod_pt; + pthread_t worker; +}; + +struct polltask *polltask_new(struct pollqueue *const pq, + const int fd, const short events, + void (*const fn)(void *v, short revents), + void *const v) +{ + struct polltask *pt; + + if (!events) + return NULL; + + pt = malloc(sizeof(*pt)); + if (!pt) + return NULL; + + *pt = (struct polltask){ + .next = NULL, + .prev = NULL, + .q = pollqueue_ref(pq), + .fd = fd, + .events = events, + .fn = fn, + .v = v + }; + + sem_init(&pt->kill_sem, 0, 0); + + return pt; +} + +static void pollqueue_rem_task(struct pollqueue *const pq, struct polltask *const pt) +{ + if (pt->prev) + pt->prev->next = pt->next; + else + pq->head = pt->next; + if (pt->next) + pt->next->prev = pt->prev; + else + pq->tail = pt->prev; + pt->next = NULL; + pt->prev = NULL; +} + +static void polltask_free(struct polltask * const pt) +{ + sem_destroy(&pt->kill_sem); + free(pt); +} + +static int pollqueue_prod(const struct pollqueue *const pq) +{ + static const uint64_t one = 1; + return write(pq->prod_fd, &one, sizeof(one)); +} + +void polltask_delete(struct polltask **const ppt) +{ + struct polltask *const pt = *ppt; + struct pollqueue * pq; + enum polltask_state state; + bool prodme; + + if (!pt) + return; + + pq = pt->q; + pthread_mutex_lock(&pq->lock); + state = pt->state; + pt->state = (state == POLLTASK_RUNNING) ? POLLTASK_RUN_KILL : POLLTASK_Q_KILL; + prodme = !pq->no_prod; + pthread_mutex_unlock(&pq->lock); + + if (state != POLLTASK_UNQUEUED) { + if (prodme) + pollqueue_prod(pq); + while (sem_wait(&pt->kill_sem) && errno == EINTR) + /* loop */; + } + + // Leave zapping the ref until we have DQed the PT as might well be + // legitimately used in it + *ppt = NULL; + polltask_free(pt); + pollqueue_unref(&pq); +} + +static uint64_t pollqueue_now(int timeout) +{ + struct timespec now; + uint64_t now_ms; + + if (clock_gettime(CLOCK_MONOTONIC, &now)) + return 0; + now_ms = (now.tv_nsec / 1000000) + (uint64_t)now.tv_sec * 1000 + timeout; + return now_ms ? now_ms : (uint64_t)1; +} + +void pollqueue_add_task(struct polltask *const pt, const int timeout) +{ + bool prodme = false; + struct pollqueue * const pq = pt->q; + + pthread_mutex_lock(&pq->lock); + if (pt->state != POLLTASK_Q_KILL && pt->state != POLLTASK_RUN_KILL) { + if (pq->tail) + pq->tail->next = pt; + else + pq->head = pt; + pt->prev = pq->tail; + pt->next = NULL; + pt->state = POLLTASK_QUEUED; + pt->timeout = timeout < 0 ? 0 : pollqueue_now(timeout); + pq->tail = pt; + prodme = !pq->no_prod; + } + pthread_mutex_unlock(&pq->lock); + if (prodme) + pollqueue_prod(pq); +} + +static void *poll_thread(void *v) +{ + struct pollqueue *const pq = v; + struct pollfd *a = NULL; + size_t asize = 0; + + pthread_mutex_lock(&pq->lock); + do { + unsigned int i; + unsigned int n = 0; + struct polltask *pt; + struct polltask *pt_next; + uint64_t now = pollqueue_now(0); + int timeout = -1; + int rv; + + for (pt = pq->head; pt; pt = pt_next) { + int64_t t; + + pt_next = pt->next; + + if (pt->state == POLLTASK_Q_KILL) { + pollqueue_rem_task(pq, pt); + sem_post(&pt->kill_sem); + continue; + } + + if (n >= asize) { + asize = asize ? asize * 2 : 4; + a = realloc(a, asize * sizeof(*a)); + if (!a) { + request_log("Failed to realloc poll array to %zd\n", asize); + goto fail_locked; + } + } + + a[n++] = (struct pollfd){ + .fd = pt->fd, + .events = pt->events + }; + + t = (int64_t)(pt->timeout - now); + if (pt->timeout && t < INT_MAX && + (timeout < 0 || (int)t < timeout)) + timeout = (t < 0) ? 0 : (int)t; + } + pthread_mutex_unlock(&pq->lock); + + if ((rv = poll(a, n, timeout)) == -1) { + if (errno != EINTR) { + request_log("Poll error: %s\n", strerror(errno)); + goto fail_unlocked; + } + } + + pthread_mutex_lock(&pq->lock); + now = pollqueue_now(0); + + /* Prodding in this loop is pointless and might lead to + * infinite looping + */ + pq->no_prod = true; + for (i = 0, pt = pq->head; i < n; ++i, pt = pt_next) { + pt_next = pt->next; + + /* Pending? */ + if (a[i].revents || + (pt->timeout && (int64_t)(now - pt->timeout) >= 0)) { + pollqueue_rem_task(pq, pt); + if (pt->state == POLLTASK_QUEUED) + pt->state = POLLTASK_RUNNING; + if (pt->state == POLLTASK_Q_KILL) + pt->state = POLLTASK_RUN_KILL; + pthread_mutex_unlock(&pq->lock); + + /* This can add new entries to the Q but as + * those are added to the tail our existing + * chain remains intact + */ + pt->fn(pt->v, a[i].revents); + + pthread_mutex_lock(&pq->lock); + if (pt->state == POLLTASK_RUNNING) + pt->state = POLLTASK_UNQUEUED; + if (pt->state == POLLTASK_RUN_KILL) + sem_post(&pt->kill_sem); + } + } + pq->no_prod = false; + + } while (!pq->kill); + +fail_locked: + pthread_mutex_unlock(&pq->lock); +fail_unlocked: + free(a); + return NULL; +} + +static void prod_fn(void *v, short revents) +{ + struct pollqueue *const pq = v; + char buf[8]; + if (revents) + read(pq->prod_fd, buf, 8); + if (!pq->kill) + pollqueue_add_task(pq->prod_pt, -1); +} + +struct pollqueue * pollqueue_new(void) +{ + struct pollqueue *pq = malloc(sizeof(*pq)); + if (!pq) + return NULL; + *pq = (struct pollqueue){ + .ref_count = ATOMIC_VAR_INIT(0), + .lock = PTHREAD_MUTEX_INITIALIZER, + .head = NULL, + .tail = NULL, + .kill = false, + .prod_fd = -1 + }; + + pq->prod_fd = eventfd(0, EFD_NONBLOCK); + if (pq->prod_fd == 1) + goto fail1; + pq->prod_pt = polltask_new(pq, pq->prod_fd, POLLIN, prod_fn, pq); + if (!pq->prod_pt) + goto fail2; + pollqueue_add_task(pq->prod_pt, -1); + if (pthread_create(&pq->worker, NULL, poll_thread, pq)) + goto fail3; + // Reset ref count which will have been inced by the add_task + atomic_store(&pq->ref_count, 0); + return pq; + +fail3: + polltask_free(pq->prod_pt); +fail2: + close(pq->prod_fd); +fail1: + free(pq); + return NULL; +} + +static void pollqueue_free(struct pollqueue *const pq) +{ + void *rv; + + pthread_mutex_lock(&pq->lock); + pq->kill = true; + pollqueue_prod(pq); + pthread_mutex_unlock(&pq->lock); + + pthread_join(pq->worker, &rv); + polltask_free(pq->prod_pt); + pthread_mutex_destroy(&pq->lock); + close(pq->prod_fd); + free(pq); +} + +struct pollqueue * pollqueue_ref(struct pollqueue *const pq) +{ + atomic_fetch_add(&pq->ref_count, 1); + return pq; +} + +void pollqueue_unref(struct pollqueue **const ppq) +{ + struct pollqueue * const pq = *ppq; + + if (!pq) + return; + *ppq = NULL; + + if (atomic_fetch_sub(&pq->ref_count, 1) != 0) + return; + + pollqueue_free(pq); +} + + + diff --git a/libavcodec/v4l2_req_pollqueue.h b/libavcodec/v4l2_req_pollqueue.h new file mode 100644 index 0000000000..e1182cb2fc --- /dev/null +++ b/libavcodec/v4l2_req_pollqueue.h @@ -0,0 +1,18 @@ +#ifndef POLLQUEUE_H_ +#define POLLQUEUE_H_ + +struct polltask; +struct pollqueue; + +struct polltask *polltask_new(struct pollqueue *const pq, + const int fd, const short events, + void (*const fn)(void *v, short revents), + void *const v); +void polltask_delete(struct polltask **const ppt); + +void pollqueue_add_task(struct polltask *const pt, const int timeout); +struct pollqueue * pollqueue_new(void); +void pollqueue_unref(struct pollqueue **const ppq); +struct pollqueue * pollqueue_ref(struct pollqueue *const pq); + +#endif /* POLLQUEUE_H_ */ diff --git a/libavcodec/v4l2_req_utils.h b/libavcodec/v4l2_req_utils.h new file mode 100644 index 0000000000..a31cc1f4ec --- /dev/null +++ b/libavcodec/v4l2_req_utils.h @@ -0,0 +1,27 @@ +#ifndef AVCODEC_V4L2_REQ_UTILS_H +#define AVCODEC_V4L2_REQ_UTILS_H + +#include +#include "libavutil/log.h" + +#define request_log(...) av_log(NULL, AV_LOG_INFO, __VA_ARGS__) + +#define request_err(_ctx, ...) av_log(_ctx, AV_LOG_ERROR, __VA_ARGS__) +#define request_warn(_ctx, ...) av_log(_ctx, AV_LOG_WARNING, __VA_ARGS__) +#define request_info(_ctx, ...) av_log(_ctx, AV_LOG_INFO, __VA_ARGS__) +#define request_debug(_ctx, ...) av_log(_ctx, AV_LOG_DEBUG, __VA_ARGS__) + +static inline char safechar(char c) { + return c > 0x20 && c < 0x7f ? c : '.'; +} + +static inline const char * strfourcc(char tbuf[5], uint32_t fcc) { + tbuf[0] = safechar((fcc >> 0) & 0xff); + tbuf[1] = safechar((fcc >> 8) & 0xff); + tbuf[2] = safechar((fcc >> 16) & 0xff); + tbuf[3] = safechar((fcc >> 24) & 0xff); + tbuf[4] = '\0'; + return tbuf; +} + +#endif diff --git a/libavcodec/v4l2_request_hevc.c b/libavcodec/v4l2_request_hevc.c new file mode 100644 index 0000000000..db7ed13b6d --- /dev/null +++ b/libavcodec/v4l2_request_hevc.c @@ -0,0 +1,348 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + + +#include "config.h" +#include "decode.h" +#include "hevcdec.h" +#include "hwconfig.h" +#include "internal.h" + +#include "v4l2_request_hevc.h" + +#include "libavutil/hwcontext_drm.h" +#include "libavutil/pixdesc.h" + +#include "v4l2_req_devscan.h" +#include "v4l2_req_dmabufs.h" +#include "v4l2_req_pollqueue.h" +#include "v4l2_req_media.h" +#include "v4l2_req_utils.h" + +static size_t bit_buf_size(unsigned int w, unsigned int h, unsigned int bits_minus8) +{ + const size_t wxh = w * h; + size_t bits_alloc; + + /* Annex A gives a min compression of 2 @ lvl 3.1 + * (wxh <= 983040) and min 4 thereafter but avoid + * the odity of 983041 having a lower limit than + * 983040. + * Multiply by 3/2 for 4:2:0 + */ + bits_alloc = wxh < 983040 ? wxh * 3 / 4 : + wxh < 983040 * 2 ? 983040 * 3 / 4 : + wxh * 3 / 8; + /* Allow for bit depth */ + bits_alloc += (bits_alloc * bits_minus8) / 8; + /* Add a few bytes (16k) for overhead */ + bits_alloc += 0x4000; + return bits_alloc; +} + +static int v4l2_req_hevc_start_frame(AVCodecContext *avctx, + av_unused const uint8_t *buffer, + av_unused uint32_t size) +{ + const V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data; + return ctx->fns->start_frame(avctx, buffer, size); +} + +static int v4l2_req_hevc_decode_slice(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size) +{ + V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data; + return ctx->fns->decode_slice(avctx, buffer, size); +} + +static int v4l2_req_hevc_end_frame(AVCodecContext *avctx) +{ + V4L2RequestContextHEVC *ctx = avctx->internal->hwaccel_priv_data; + return ctx->fns->end_frame(avctx); +} + +static void v4l2_req_hevc_abort_frame(AVCodecContext * const avctx) +{ + V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data; + ctx->fns->abort_frame(avctx); +} + +static int v4l2_req_hevc_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx) +{ + V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data; + return ctx->fns->frame_params(avctx, hw_frames_ctx); +} + +static int v4l2_req_hevc_alloc_frame(AVCodecContext * avctx, AVFrame *frame) +{ + V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data; + return ctx->fns->alloc_frame(avctx, frame); +} + + +static int v4l2_request_hevc_uninit(AVCodecContext *avctx) +{ + V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data; + + av_log(avctx, AV_LOG_DEBUG, "<<< %s\n", __func__); + + decode_q_wait(&ctx->decode_q, NULL); // Wait for all other threads to be out of decode + + mediabufs_ctl_unref(&ctx->mbufs); + media_pool_delete(&ctx->mpool); + pollqueue_unref(&ctx->pq); + dmabufs_ctl_unref(&ctx->dbufs); + devscan_delete(&ctx->devscan); + + decode_q_uninit(&ctx->decode_q); + +// if (avctx->hw_frames_ctx) { +// AVHWFramesContext *hwfc = (AVHWFramesContext*)avctx->hw_frames_ctx->data; +// av_buffer_pool_flush(hwfc->pool); +// } + return 0; +} + +static int dst_fmt_accept_cb(void * v, const struct v4l2_fmtdesc *fmtdesc) +{ + AVCodecContext *const avctx = v; + const HEVCContext *const h = avctx->priv_data; + + if (h->ps.sps->bit_depth == 8) { + if (fmtdesc->pixelformat == V4L2_PIX_FMT_NV12_COL128 || + fmtdesc->pixelformat == V4L2_PIX_FMT_NV12) { + return 1; + } + } + else if (h->ps.sps->bit_depth == 10) { + if (fmtdesc->pixelformat == V4L2_PIX_FMT_NV12_10_COL128) { + return 1; + } + } + return 0; +} + +static int v4l2_request_hevc_init(AVCodecContext *avctx) +{ + const HEVCContext *h = avctx->priv_data; + V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data; + const HEVCSPS * const sps = h->ps.sps; + int ret; + const struct decdev * decdev; + const uint32_t src_pix_fmt = V2(ff_v4l2_req_hevc, 4).src_pix_fmt_v4l2; // Assuming constant for all APIs but avoiding V4L2 includes + size_t src_size; + enum mediabufs_memory src_memtype; + enum mediabufs_memory dst_memtype; + + av_log(avctx, AV_LOG_DEBUG, "<<< %s\n", __func__); + + // Give up immediately if this is something that we have no code to deal with + if (h->ps.sps->chroma_format_idc != 1) { + av_log(avctx, AV_LOG_WARNING, "chroma_format_idc(%d) != 1: Not implemented\n", h->ps.sps->chroma_format_idc); + return AVERROR_PATCHWELCOME; + } + if (!(h->ps.sps->bit_depth == 10 || h->ps.sps->bit_depth == 8) || + h->ps.sps->bit_depth != h->ps.sps->bit_depth_chroma) { + av_log(avctx, AV_LOG_WARNING, "Bit depth Y:%d C:%d: Not implemented\n", h->ps.sps->bit_depth, h->ps.sps->bit_depth_chroma); + return AVERROR_PATCHWELCOME; + } + + if ((ret = devscan_build(avctx, &ctx->devscan)) != 0) { + av_log(avctx, AV_LOG_WARNING, "Failed to find any V4L2 devices\n"); + return (AVERROR(-ret)); + } + ret = AVERROR(ENOMEM); // Assume mem fail by default for these + + if ((decdev = devscan_find(ctx->devscan, src_pix_fmt)) == NULL) + { + av_log(avctx, AV_LOG_WARNING, "Failed to find a V4L2 device for H265\n"); + ret = AVERROR(ENODEV); + goto fail0; + } + av_log(avctx, AV_LOG_DEBUG, "Trying V4L2 devices: %s,%s\n", + decdev_media_path(decdev), decdev_video_path(decdev)); + + if ((ctx->dbufs = dmabufs_ctl_new()) == NULL) { + av_log(avctx, AV_LOG_DEBUG, "Unable to open dmabufs - try mmap buffers\n"); + src_memtype = MEDIABUFS_MEMORY_MMAP; + dst_memtype = MEDIABUFS_MEMORY_MMAP; + } + else { + av_log(avctx, AV_LOG_DEBUG, "Dmabufs opened - try dmabuf buffers\n"); + src_memtype = MEDIABUFS_MEMORY_DMABUF; + dst_memtype = MEDIABUFS_MEMORY_DMABUF; + } + + if ((ctx->pq = pollqueue_new()) == NULL) { + av_log(avctx, AV_LOG_ERROR, "Unable to create pollqueue\n"); + goto fail1; + } + + if ((ctx->mpool = media_pool_new(decdev_media_path(decdev), ctx->pq, 4)) == NULL) { + av_log(avctx, AV_LOG_ERROR, "Unable to create media pool\n"); + goto fail2; + } + + if ((ctx->mbufs = mediabufs_ctl_new(avctx, decdev_video_path(decdev), ctx->pq)) == NULL) { + av_log(avctx, AV_LOG_ERROR, "Unable to create media controls\n"); + goto fail3; + } + + // Ask for an initial bitbuf size of max size / 4 + // We will realloc if we need more + // Must use sps->h/w as avctx contains cropped size +retry_src_memtype: + src_size = bit_buf_size(sps->width, sps->height, sps->bit_depth - 8); + if (src_memtype == MEDIABUFS_MEMORY_DMABUF && mediabufs_src_resizable(ctx->mbufs)) + src_size /= 4; + // Kludge for conformance tests which break Annex A limits + else if (src_size < 0x40000) + src_size = 0x40000; + + if (mediabufs_src_fmt_set(ctx->mbufs, decdev_src_type(decdev), src_pix_fmt, + sps->width, sps->height, src_size)) { + char tbuf1[5]; + av_log(avctx, AV_LOG_ERROR, "Failed to set source format: %s %dx%d\n", strfourcc(tbuf1, src_pix_fmt), sps->width, sps->height); + goto fail4; + } + + if (mediabufs_src_chk_memtype(ctx->mbufs, src_memtype)) { + if (src_memtype == MEDIABUFS_MEMORY_DMABUF) { + src_memtype = MEDIABUFS_MEMORY_MMAP; + goto retry_src_memtype; + } + av_log(avctx, AV_LOG_ERROR, "Failed to get src memory type\n"); + goto fail4; + } + + if (V2(ff_v4l2_req_hevc, 4).probe(avctx, ctx) == 0) { + av_log(avctx, AV_LOG_DEBUG, "HEVC API version 4 probed successfully\n"); + ctx->fns = &V2(ff_v4l2_req_hevc, 4); + } +#if CONFIG_V4L2_REQ_HEVC_VX + else if (V2(ff_v4l2_req_hevc, 3).probe(avctx, ctx) == 0) { + av_log(avctx, AV_LOG_DEBUG, "HEVC API version 3 probed successfully\n"); + ctx->fns = &V2(ff_v4l2_req_hevc, 3); + } + else if (V2(ff_v4l2_req_hevc, 2).probe(avctx, ctx) == 0) { + av_log(avctx, AV_LOG_DEBUG, "HEVC API version 2 probed successfully\n"); + ctx->fns = &V2(ff_v4l2_req_hevc, 2); + } + else if (V2(ff_v4l2_req_hevc, 1).probe(avctx, ctx) == 0) { + av_log(avctx, AV_LOG_DEBUG, "HEVC API version 1 probed successfully\n"); + ctx->fns = &V2(ff_v4l2_req_hevc, 1); + } +#endif + else { + av_log(avctx, AV_LOG_ERROR, "No HEVC version probed successfully\n"); + ret = AVERROR(EINVAL); + goto fail4; + } + + if (mediabufs_dst_fmt_set(ctx->mbufs, sps->width, sps->height, dst_fmt_accept_cb, avctx)) { + char tbuf1[5]; + av_log(avctx, AV_LOG_ERROR, "Failed to set destination format: %s %dx%d\n", strfourcc(tbuf1, src_pix_fmt), sps->width, sps->height); + goto fail4; + } + + if (mediabufs_src_pool_create(ctx->mbufs, ctx->dbufs, 6, src_memtype)) { + av_log(avctx, AV_LOG_ERROR, "Failed to create source pool\n"); + goto fail4; + } + + { + unsigned int dst_slots = sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering + + avctx->thread_count + (avctx->extra_hw_frames > 0 ? avctx->extra_hw_frames : 6); + av_log(avctx, AV_LOG_DEBUG, "Slots=%d: Reordering=%d, threads=%d, hw+=%d\n", dst_slots, + sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering, + avctx->thread_count, avctx->extra_hw_frames); + + if (mediabufs_dst_chk_memtype(ctx->mbufs, dst_memtype)) { + if (dst_memtype != MEDIABUFS_MEMORY_DMABUF) { + av_log(avctx, AV_LOG_ERROR, "Failed to get dst memory type\n"); + goto fail4; + } + av_log(avctx, AV_LOG_DEBUG, "Dst DMABUF not supported - trying mmap\n"); + dst_memtype = MEDIABUFS_MEMORY_MMAP; + } + + // extra_hw_frames is -1 if unset + if (mediabufs_dst_slots_create(ctx->mbufs, dst_slots, (avctx->extra_hw_frames > 0), dst_memtype)) { + av_log(avctx, AV_LOG_ERROR, "Failed to create destination slots\n"); + goto fail4; + } + } + + if (mediabufs_stream_on(ctx->mbufs)) { + av_log(avctx, AV_LOG_ERROR, "Failed stream on\n"); + goto fail4; + } + + if ((ret = ff_decode_get_hw_frames_ctx(avctx, AV_HWDEVICE_TYPE_DRM)) != 0) { + av_log(avctx, AV_LOG_ERROR, "Failed to create frame ctx\n"); + goto fail4; + } + + if ((ret = ctx->fns->set_controls(avctx, ctx)) != 0) { + av_log(avctx, AV_LOG_ERROR, "Failed set controls\n"); + goto fail5; + } + + decode_q_init(&ctx->decode_q); + + // Set our s/w format + avctx->sw_pix_fmt = ((AVHWFramesContext *)avctx->hw_frames_ctx->data)->sw_format; + + av_log(avctx, AV_LOG_INFO, "Hwaccel %s; devices: %s,%s; buffers: src %s, dst %s; swfmt=%s\n", + ctx->fns->name, + decdev_media_path(decdev), decdev_video_path(decdev), + mediabufs_memory_name(src_memtype), mediabufs_memory_name(dst_memtype), + av_get_pix_fmt_name(avctx->sw_pix_fmt)); + + return 0; + +fail5: + av_buffer_unref(&avctx->hw_frames_ctx); +fail4: + mediabufs_ctl_unref(&ctx->mbufs); +fail3: + media_pool_delete(&ctx->mpool); +fail2: + pollqueue_unref(&ctx->pq); +fail1: + dmabufs_ctl_unref(&ctx->dbufs); +fail0: + devscan_delete(&ctx->devscan); + return ret; +} + +const AVHWAccel ff_hevc_v4l2request_hwaccel = { + .name = "hevc_v4l2request", + .type = AVMEDIA_TYPE_VIDEO, + .id = AV_CODEC_ID_HEVC, + .pix_fmt = AV_PIX_FMT_DRM_PRIME, + .alloc_frame = v4l2_req_hevc_alloc_frame, + .start_frame = v4l2_req_hevc_start_frame, + .decode_slice = v4l2_req_hevc_decode_slice, + .end_frame = v4l2_req_hevc_end_frame, + .abort_frame = v4l2_req_hevc_abort_frame, + .init = v4l2_request_hevc_init, + .uninit = v4l2_request_hevc_uninit, + .priv_data_size = sizeof(V4L2RequestContextHEVC), + .frame_params = v4l2_req_hevc_frame_params, + .caps_internal = HWACCEL_CAP_ASYNC_SAFE | HWACCEL_CAP_MT_SAFE, +}; diff --git a/libavcodec/v4l2_request_hevc.h b/libavcodec/v4l2_request_hevc.h new file mode 100644 index 0000000000..99c90064ea --- /dev/null +++ b/libavcodec/v4l2_request_hevc.h @@ -0,0 +1,102 @@ +#ifndef AVCODEC_V4L2_REQUEST_HEVC_H +#define AVCODEC_V4L2_REQUEST_HEVC_H + +#include +#include +#include "v4l2_req_decode_q.h" + +#ifndef DRM_FORMAT_NV15 +#define DRM_FORMAT_NV15 fourcc_code('N', 'V', '1', '5') +#endif + +#ifndef DRM_FORMAT_NV20 +#define DRM_FORMAT_NV20 fourcc_code('N', 'V', '2', '0') +#endif + +// P030 should be defined in drm_fourcc.h and hopefully will be sometime +// in the future but until then... +#ifndef DRM_FORMAT_P030 +#define DRM_FORMAT_P030 fourcc_code('P', '0', '3', '0') +#endif + +#ifndef DRM_FORMAT_NV15 +#define DRM_FORMAT_NV15 fourcc_code('N', 'V', '1', '5') +#endif + +#ifndef DRM_FORMAT_NV20 +#define DRM_FORMAT_NV20 fourcc_code('N', 'V', '2', '0') +#endif + +#include +#ifndef V4L2_CID_CODEC_BASE +#define V4L2_CID_CODEC_BASE V4L2_CID_MPEG_BASE +#endif + +// V4L2_PIX_FMT_NV12_10_COL128 and V4L2_PIX_FMT_NV12_COL128 should be defined +// in drm_fourcc.h hopefully will be sometime in the future but until then... +#ifndef V4L2_PIX_FMT_NV12_10_COL128 +#define V4L2_PIX_FMT_NV12_10_COL128 v4l2_fourcc('N', 'C', '3', '0') +#endif + +#ifndef V4L2_PIX_FMT_NV12_COL128 +#define V4L2_PIX_FMT_NV12_COL128 v4l2_fourcc('N', 'C', '1', '2') /* 12 Y/CbCr 4:2:0 128 pixel wide column */ +#endif + +#ifndef V4L2_CTRL_FLAG_DYNAMIC_ARRAY +#define V4L2_CTRL_FLAG_DYNAMIC_ARRAY 0x0800 +#endif + +#define VCAT(name, version) name##_v##version +#define V2(n,v) VCAT(n, v) +#define V(n) V2(n, HEVC_CTRLS_VERSION) + +#define S2(x) #x +#define STR(x) S2(x) + +// 1 per decoder +struct v4l2_req_decode_fns; + +typedef struct V4L2RequestContextHEVC { +// V4L2RequestContext base; + const struct v4l2_req_decode_fns * fns; + + unsigned int timestamp; // ?? maybe uint64_t + + int decode_mode; + int start_code; + unsigned int max_slices; // 0 => not wanted (frame mode) + unsigned int max_offsets; // 0 => not wanted + + req_decode_q decode_q; + + struct devscan *devscan; + struct dmabufs_ctl *dbufs; + struct pollqueue *pq; + struct media_pool * mpool; + struct mediabufs_ctl *mbufs; +} V4L2RequestContextHEVC; + +typedef struct v4l2_req_decode_fns { + int src_pix_fmt_v4l2; + const char * name; + + // Init setup + int (*probe)(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx); + int (*set_controls)(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx); + + // Passthrough of hwaccel fns + int (*start_frame)(AVCodecContext *avctx, const uint8_t *buf, uint32_t buf_size); + int (*decode_slice)(AVCodecContext *avctx, const uint8_t *buf, uint32_t buf_size); + int (*end_frame)(AVCodecContext *avctx); + void (*abort_frame)(AVCodecContext *avctx); + int (*frame_params)(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx); + int (*alloc_frame)(AVCodecContext * avctx, AVFrame *frame); +} v4l2_req_decode_fns; + + +extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 1); +extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 2); +extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 3); +extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 4); + +#endif diff --git a/libavcodec/weak_link.c b/libavcodec/weak_link.c new file mode 100644 index 0000000000..f234a985b9 --- /dev/null +++ b/libavcodec/weak_link.c @@ -0,0 +1,102 @@ +#include +#include +#include +#include "weak_link.h" + +struct ff_weak_link_master { + atomic_int ref_count; /* 0 is single ref for easier atomics */ + pthread_rwlock_t lock; + void * ptr; +}; + +static inline struct ff_weak_link_master * weak_link_x(struct ff_weak_link_client * c) +{ + return (struct ff_weak_link_master *)c; +} + +struct ff_weak_link_master * ff_weak_link_new(void * p) +{ + struct ff_weak_link_master * w = malloc(sizeof(*w)); + if (!w) + return NULL; + w->ptr = p; + if (pthread_rwlock_init(&w->lock, NULL)) { + free(w); + return NULL; + } + return w; +} + +static void weak_link_do_unref(struct ff_weak_link_master * const w) +{ + int n = atomic_fetch_sub(&w->ref_count, 1); + if (n) + return; + + pthread_rwlock_destroy(&w->lock); + free(w); +} + +// Unref & break link +void ff_weak_link_break(struct ff_weak_link_master ** ppLink) +{ + struct ff_weak_link_master * const w = *ppLink; + if (!w) + return; + + *ppLink = NULL; + pthread_rwlock_wrlock(&w->lock); + w->ptr = NULL; + pthread_rwlock_unlock(&w->lock); + + weak_link_do_unref(w); +} + +struct ff_weak_link_client* ff_weak_link_ref(struct ff_weak_link_master * w) +{ + if (!w) + return NULL; + atomic_fetch_add(&w->ref_count, 1); + return (struct ff_weak_link_client*)w; +} + +void ff_weak_link_unref(struct ff_weak_link_client ** ppLink) +{ + struct ff_weak_link_master * const w = weak_link_x(*ppLink); + if (!w) + return; + + *ppLink = NULL; + weak_link_do_unref(w); +} + +void * ff_weak_link_lock(struct ff_weak_link_client ** ppLink) +{ + struct ff_weak_link_master * const w = weak_link_x(*ppLink); + + if (!w) + return NULL; + + if (pthread_rwlock_rdlock(&w->lock)) + goto broken; + + if (w->ptr) + return w->ptr; + + pthread_rwlock_unlock(&w->lock); + +broken: + *ppLink = NULL; + weak_link_do_unref(w); + return NULL; +} + +// Ignores a NULL c (so can be on the return path of both broken & live links) +void ff_weak_link_unlock(struct ff_weak_link_client * c) +{ + struct ff_weak_link_master * const w = weak_link_x(c); + if (w) + pthread_rwlock_unlock(&w->lock); +} + + diff --git a/libavcodec/weak_link.h b/libavcodec/weak_link.h new file mode 100644 index 0000000000..415b6a27a0 --- /dev/null +++ b/libavcodec/weak_link.h @@ -0,0 +1,23 @@ +struct ff_weak_link_master; +struct ff_weak_link_client; + +struct ff_weak_link_master * ff_weak_link_new(void * p); +void ff_weak_link_break(struct ff_weak_link_master ** ppLink); + +struct ff_weak_link_client* ff_weak_link_ref(struct ff_weak_link_master * w); +void ff_weak_link_unref(struct ff_weak_link_client ** ppLink); + +// Returns NULL if link broken - in this case it will also zap +// *ppLink and unref the weak_link. +// Returns NULL if *ppLink is NULL (so a link once broken stays broken) +// +// The above does mean that there is a race if this is called simultainiously +// by two threads using the same weak_link_client (so don't do that) +void * ff_weak_link_lock(struct ff_weak_link_client ** ppLink); +void ff_weak_link_unlock(struct ff_weak_link_client * c); + + + + + + diff --git a/libavdevice/Makefile b/libavdevice/Makefile index 8a62822b69..0989cb895f 100644 --- a/libavdevice/Makefile +++ b/libavdevice/Makefile @@ -48,6 +48,8 @@ OBJS-$(CONFIG_SNDIO_OUTDEV) += sndio_enc.o sndio.o OBJS-$(CONFIG_V4L2_INDEV) += v4l2.o v4l2-common.o timefilter.o OBJS-$(CONFIG_V4L2_OUTDEV) += v4l2enc.o v4l2-common.o OBJS-$(CONFIG_VFWCAP_INDEV) += vfwcap.o +OBJS-$(CONFIG_VOUT_DRM_OUTDEV) += drm_vout.o +OBJS-$(CONFIG_VOUT_EGL_OUTDEV) += egl_vout.o OBJS-$(CONFIG_XCBGRAB_INDEV) += xcbgrab.o OBJS-$(CONFIG_XV_OUTDEV) += xv.o diff --git a/libavdevice/alldevices.c b/libavdevice/alldevices.c index 8a90fcb5d7..ffb410b92d 100644 --- a/libavdevice/alldevices.c +++ b/libavdevice/alldevices.c @@ -52,6 +52,8 @@ extern const FFOutputFormat ff_sndio_muxer; extern const AVInputFormat ff_v4l2_demuxer; extern const FFOutputFormat ff_v4l2_muxer; extern const AVInputFormat ff_vfwcap_demuxer; +extern const FFOutputFormat ff_vout_drm_muxer; +extern const FFOutputFormat ff_vout_egl_muxer; extern const AVInputFormat ff_xcbgrab_demuxer; extern const FFOutputFormat ff_xv_muxer; diff --git a/libavdevice/drm_vout.c b/libavdevice/drm_vout.c new file mode 100644 index 0000000000..491e1dc608 --- /dev/null +++ b/libavdevice/drm_vout.c @@ -0,0 +1,675 @@ +/* + * Copyright (c) 2020 John Cox for Raspberry Pi Trading + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + + +// *** This module is a work in progress and its utility is strictly +// limited to testing. + +#include "libavutil/opt.h" +#include "libavutil/pixdesc.h" +#include "libavutil/hwcontext_drm.h" +#include "libavformat/mux.h" +#include "avdevice.h" + +#include "pthread.h" +#include +#include + +#include +#include +#include + +#define TRACE_ALL 0 + +#define DRM_MODULE "vc4" + +#define ERRSTR strerror(errno) + +struct drm_setup { + int conId; + uint32_t crtcId; + int crtcIdx; + uint32_t planeId; + unsigned int out_fourcc; + struct { + int x, y, width, height; + } compose; +}; + +typedef struct drm_aux_s { + unsigned int fb_handle; + uint32_t bo_handles[AV_DRM_MAX_PLANES]; + AVFrame * frame; +} drm_aux_t; + +// Aux size should only need to be 2, but on a few streams (Hobbit) under FKMS +// we get initial flicker probably due to dodgy drm timing +#define AUX_SIZE 3 +typedef struct drm_display_env_s +{ + AVClass *class; + + int drm_fd; + uint32_t con_id; + struct drm_setup setup; + enum AVPixelFormat avfmt; + + int show_all; + const char * drm_module; + + unsigned int ano; + drm_aux_t aux[AUX_SIZE]; + + pthread_t q_thread; + sem_t q_sem_in; + sem_t q_sem_out; + int q_terminate; + AVFrame * q_next; + +} drm_display_env_t; + + +static int drm_vout_write_trailer(AVFormatContext *s) +{ +#if TRACE_ALL + av_log(s, AV_LOG_DEBUG, "%s\n", __func__); +#endif + + return 0; +} + +static int drm_vout_write_header(AVFormatContext *s) +{ + const AVCodecParameters * const par = s->streams[0]->codecpar; + +#if TRACE_ALL + av_log(s, AV_LOG_DEBUG, "%s\n", __func__); +#endif + if ( s->nb_streams > 1 + || par->codec_type != AVMEDIA_TYPE_VIDEO + || par->codec_id != AV_CODEC_ID_WRAPPED_AVFRAME) { + av_log(s, AV_LOG_ERROR, "Only supports one wrapped avframe stream\n"); + return AVERROR(EINVAL); + } + + return 0; +} + +static int find_plane(struct AVFormatContext * const avctx, + const int drmfd, const int crtcidx, const uint32_t format, + uint32_t * const pplane_id) +{ + drmModePlaneResPtr planes; + drmModePlanePtr plane; + drmModeObjectPropertiesPtr props = NULL; + drmModePropertyPtr prop = NULL; + unsigned int i; + unsigned int j; + int ret = -1; + + planes = drmModeGetPlaneResources(drmfd); + if (!planes) + { + av_log(avctx, AV_LOG_WARNING, "drmModeGetPlaneResources failed: %s\n", ERRSTR); + return -1; + } + + for (i = 0; i < planes->count_planes; ++i) { + plane = drmModeGetPlane(drmfd, planes->planes[i]); + if (!planes) + { + av_log(avctx, AV_LOG_WARNING, "drmModeGetPlane failed: %s\n", ERRSTR); + break; + } + + if (!(plane->possible_crtcs & (1 << crtcidx))) { + drmModeFreePlane(plane); + continue; + } + + for (j = 0; j < plane->count_formats; ++j) { + if (plane->formats[j] == format) + break; + } + + if (j == plane->count_formats) { + drmModeFreePlane(plane); + continue; + } + + *pplane_id = plane->plane_id; + drmModeFreePlane(plane); + break; + } + + if (i == planes->count_planes) { + ret = -1; + goto fail; + } + + props = drmModeObjectGetProperties(drmfd, *pplane_id, DRM_MODE_OBJECT_PLANE); + if (!props) + goto fail; + for (i = 0; i != props->count_props; ++i) { + if (prop) + drmModeFreeProperty(prop); + prop = drmModeGetProperty(drmfd, props->props[i]); + if (!prop) + goto fail; + if (strcmp("zpos", prop->name) == 0) { + if (drmModeObjectSetProperty(drmfd, *pplane_id, DRM_MODE_OBJECT_PLANE, props->props[i], prop->values[1]) == 0) + av_log(avctx, AV_LOG_DEBUG, "ZPOS set to %d\n", (int)prop->values[1]); + else + av_log(avctx, AV_LOG_WARNING, "Failed to set ZPOS on DRM plane\n"); + break; + } + } + + ret = 0; +fail: + if (props) + drmModeFreeObjectProperties(props); + if (prop) + drmModeFreeProperty(prop); + drmModeFreePlaneResources(planes); + return ret; +} + +static void da_uninit(drm_display_env_t * const de, drm_aux_t * da) +{ + if (da->fb_handle != 0) { + drmModeRmFB(de->drm_fd, da->fb_handle); + da->fb_handle = 0; + } + + for (unsigned int i = 0; i != AV_DRM_MAX_PLANES; ++i) { + if (da->bo_handles[i]) { + struct drm_gem_close gem_close = {.handle = da->bo_handles[i]}; + drmIoctl(de->drm_fd, DRM_IOCTL_GEM_CLOSE, &gem_close); + da->bo_handles[i] = 0; + } + } + av_frame_free(&da->frame); +} + +static int do_display(AVFormatContext * const s, drm_display_env_t * const de, AVFrame * frame) +{ + const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor*)frame->data[0]; + drm_aux_t * da = de->aux + de->ano; + const uint32_t format = desc->layers[0].format; + int ret = 0; + +#if TRACE_ALL + av_log(s, AV_LOG_DEBUG, "<<< %s: fd=%d\n", __func__, desc->objects[0].fd); +#endif + + if (de->setup.out_fourcc != format) { + if (find_plane(s, de->drm_fd, de->setup.crtcIdx, format, &de->setup.planeId)) { + av_frame_free(&frame); + av_log(s, AV_LOG_WARNING, "No plane for format: %#x\n", format); + return -1; + } + de->setup.out_fourcc = format; + } + + { + drmVBlank vbl = { + .request = { + .type = DRM_VBLANK_RELATIVE, + .sequence = 0 + } + }; + + while (drmWaitVBlank(de->drm_fd, &vbl)) { + if (errno != EINTR) { +// av_log(s, AV_LOG_WARNING, "drmWaitVBlank failed: %s\n", ERRSTR); + break; + } + } + } + + da_uninit(de, da); + + { + uint32_t pitches[4] = {0}; + uint32_t offsets[4] = {0}; + uint64_t modifiers[4] = {0}; + uint32_t bo_handles[4] = {0}; + int has_mods = 0; + int i, j, n; + + da->frame = frame; + + for (i = 0; i < desc->nb_objects; ++i) { + if (drmPrimeFDToHandle(de->drm_fd, desc->objects[i].fd, da->bo_handles + i) != 0) { + av_log(s, AV_LOG_WARNING, "drmPrimeFDToHandle[%d](%d) failed: %s\n", i, desc->objects[i].fd, ERRSTR); + return -1; + } + if (desc->objects[i].format_modifier != DRM_FORMAT_MOD_LINEAR && + desc->objects[i].format_modifier != DRM_FORMAT_MOD_INVALID) + has_mods = 1; + } + + n = 0; + for (i = 0; i < desc->nb_layers; ++i) { + for (j = 0; j < desc->layers[i].nb_planes; ++j) { + const AVDRMPlaneDescriptor * const p = desc->layers[i].planes + j; + const AVDRMObjectDescriptor * const obj = desc->objects + p->object_index; + pitches[n] = p->pitch; + offsets[n] = p->offset; + modifiers[n] = obj->format_modifier; + bo_handles[n] = da->bo_handles[p->object_index]; + ++n; + } + } + +#if 1 && TRACE_ALL + av_log(s, AV_LOG_DEBUG, "%dx%d, fmt: %x, boh=%d,%d,%d,%d, pitch=%d,%d,%d,%d," + " offset=%d,%d,%d,%d, mod=%llx,%llx,%llx,%llx\n", + av_frame_cropped_width(frame), + av_frame_cropped_height(frame), + desc->layers[0].format, + bo_handles[0], + bo_handles[1], + bo_handles[2], + bo_handles[3], + pitches[0], + pitches[1], + pitches[2], + pitches[3], + offsets[0], + offsets[1], + offsets[2], + offsets[3], + (long long)modifiers[0], + (long long)modifiers[1], + (long long)modifiers[2], + (long long)modifiers[3] + ); +#endif + + if (drmModeAddFB2WithModifiers(de->drm_fd, + av_frame_cropped_width(frame), + av_frame_cropped_height(frame), + desc->layers[0].format, bo_handles, + pitches, offsets, + has_mods ? modifiers : NULL, + &da->fb_handle, + has_mods ? DRM_MODE_FB_MODIFIERS : 0) != 0) { + av_log(s, AV_LOG_WARNING, "drmModeAddFB2WithModifiers failed: %s\n", ERRSTR); + return -1; + } + } + + ret = drmModeSetPlane(de->drm_fd, de->setup.planeId, de->setup.crtcId, + da->fb_handle, 0, + de->setup.compose.x, de->setup.compose.y, + de->setup.compose.width, + de->setup.compose.height, + 0, 0, + av_frame_cropped_width(frame) << 16, + av_frame_cropped_height(frame) << 16); + + if (ret != 0) { + av_log(s, AV_LOG_WARNING, "drmModeSetPlane failed: %s\n", ERRSTR); + } + + de->ano = de->ano + 1 >= AUX_SIZE ? 0 : de->ano + 1; + + return ret; +} + +static int do_sem_wait(sem_t * const sem, const int nowait) +{ + while (nowait ? sem_trywait(sem) : sem_wait(sem)) { + if (errno != EINTR) + return -errno; + } + return 0; +} + +static void * display_thread(void * v) +{ + AVFormatContext * const s = v; + drm_display_env_t * const de = s->priv_data; + int i; + +#if TRACE_ALL + av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__); +#endif + + sem_post(&de->q_sem_out); + + for (;;) { + AVFrame * frame; + + do_sem_wait(&de->q_sem_in, 0); + + if (de->q_terminate) + break; + + frame = de->q_next; + de->q_next = NULL; + sem_post(&de->q_sem_out); + + do_display(s, de, frame); + } + +#if TRACE_ALL + av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__); +#endif + + for (i = 0; i != AUX_SIZE; ++i) + da_uninit(de, de->aux + i); + + av_frame_free(&de->q_next); + + return NULL; +} + +static int drm_vout_write_packet(AVFormatContext *s, AVPacket *pkt) +{ + const AVFrame * const src_frame = (AVFrame *)pkt->data; + AVFrame * frame; + drm_display_env_t * const de = s->priv_data; + int ret; + +#if TRACE_ALL + av_log(s, AV_LOG_DEBUG, "%s\n", __func__); +#endif + + if ((src_frame->flags & AV_FRAME_FLAG_CORRUPT) != 0) { + av_log(s, AV_LOG_WARNING, "Discard corrupt frame: fmt=%d, ts=%" PRId64 "\n", src_frame->format, src_frame->pts); + return 0; + } + + if (src_frame->format == AV_PIX_FMT_DRM_PRIME) { + frame = av_frame_alloc(); + av_frame_ref(frame, src_frame); + } + else if (src_frame->format == AV_PIX_FMT_VAAPI) { + frame = av_frame_alloc(); + frame->format = AV_PIX_FMT_DRM_PRIME; + if (av_hwframe_map(frame, src_frame, 0) != 0) + { + av_log(s, AV_LOG_WARNING, "Failed to map frame (format=%d) to DRM_PRiME\n", src_frame->format); + av_frame_free(&frame); + return AVERROR(EINVAL); + } + } + else { + av_log(s, AV_LOG_WARNING, "Frame (format=%d) not DRM_PRiME\n", src_frame->format); + return AVERROR(EINVAL); + } + + ret = do_sem_wait(&de->q_sem_out, !de->show_all); + if (ret) { + av_frame_free(&frame); + } + else { + de->q_next = frame; + sem_post(&de->q_sem_in); + } + + return 0; +} + +static int drm_vout_write_frame(AVFormatContext *s, int stream_index, AVFrame **ppframe, + unsigned flags) +{ + av_log(s, AV_LOG_ERROR, "%s: NIF: idx=%d, flags=%#x\n", __func__, stream_index, flags); + return AVERROR_PATCHWELCOME; +} + +static int drm_vout_control_message(AVFormatContext *s, int type, void *data, size_t data_size) +{ +#if TRACE_ALL + av_log(s, AV_LOG_DEBUG, "%s: %d\n", __func__, type); +#endif + switch(type) { + case AV_APP_TO_DEV_WINDOW_REPAINT: + return 0; + default: + break; + } + return AVERROR(ENOSYS); +} + +static int find_crtc(struct AVFormatContext * const avctx, int drmfd, struct drm_setup *s, uint32_t * const pConId) +{ + int ret = -1; + int i; + drmModeRes *res = drmModeGetResources(drmfd); + drmModeConnector *c; + + if(!res) + { + printf( "drmModeGetResources failed: %s\n", ERRSTR); + return -1; + } + + if (res->count_crtcs <= 0) + { + printf( "drm: no crts\n"); + goto fail_res; + } + + if (!s->conId) { + fprintf(stderr, + "No connector ID specified. Choosing default from list:\n"); + + for (i = 0; i < res->count_connectors; i++) { + drmModeConnector *con = + drmModeGetConnector(drmfd, res->connectors[i]); + drmModeEncoder *enc = NULL; + drmModeCrtc *crtc = NULL; + + if (con->encoder_id) { + enc = drmModeGetEncoder(drmfd, con->encoder_id); + if (enc->crtc_id) { + crtc = drmModeGetCrtc(drmfd, enc->crtc_id); + } + } + + if (!s->conId && crtc) { + s->conId = con->connector_id; + s->crtcId = crtc->crtc_id; + } + + av_log(avctx, AV_LOG_DEBUG, "Connector %d (crtc %d): type %d, %dx%d%s\n", + con->connector_id, + crtc ? crtc->crtc_id : 0, + con->connector_type, + crtc ? crtc->width : 0, + crtc ? crtc->height : 0, + (s->conId == (int)con->connector_id ? + " (chosen)" : "")); + } + + if (!s->conId) { + av_log(avctx, AV_LOG_ERROR, + "No suitable enabled connector found.\n"); + return -1;; + } + } + + s->crtcIdx = -1; + + for (i = 0; i < res->count_crtcs; ++i) { + if (s->crtcId == res->crtcs[i]) { + s->crtcIdx = i; + break; + } + } + + if (s->crtcIdx == -1) + { + av_log(avctx, AV_LOG_WARNING, "drm: CRTC %u not found\n", s->crtcId); + goto fail_res; + } + + if (res->count_connectors <= 0) + { + av_log(avctx, AV_LOG_WARNING, "drm: no connectors\n"); + goto fail_res; + } + + c = drmModeGetConnector(drmfd, s->conId); + if (!c) + { + av_log(avctx, AV_LOG_WARNING, "drmModeGetConnector failed: %s\n", ERRSTR); + goto fail_res; + } + + if (!c->count_modes) + { + av_log(avctx, AV_LOG_WARNING, "connector supports no mode\n"); + goto fail_conn; + } + + { + drmModeCrtc *crtc = drmModeGetCrtc(drmfd, s->crtcId); + s->compose.x = crtc->x; + s->compose.y = crtc->y; + s->compose.width = crtc->width; + s->compose.height = crtc->height; + drmModeFreeCrtc(crtc); + } + + if (pConId) + *pConId = c->connector_id; + ret = 0; + +fail_conn: + drmModeFreeConnector(c); + +fail_res: + drmModeFreeResources(res); + + return ret; +} + +// deinit is called if init fails so no need to clean up explicity here +static int drm_vout_init(struct AVFormatContext * s) +{ + drm_display_env_t * const de = s->priv_data; + int rv; + + av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__); + + de->drm_fd = -1; + de->con_id = 0; + de->setup = (struct drm_setup){0}; + de->q_terminate = 0; + + if ((de->drm_fd = drmOpen(de->drm_module, NULL)) < 0) + { + rv = AVERROR(errno); + av_log(s, AV_LOG_ERROR, "Failed to drmOpen %s: %s\n", de->drm_module, av_err2str(rv)); + return rv; + } + + if (find_crtc(s, de->drm_fd, &de->setup, &de->con_id) != 0) + { + av_log(s, AV_LOG_ERROR, "failed to find valid mode\n"); + rv = AVERROR(EINVAL); + goto fail_close; + } + + sem_init(&de->q_sem_in, 0, 0); + sem_init(&de->q_sem_out, 0, 0); + if (pthread_create(&de->q_thread, NULL, display_thread, s)) { + rv = AVERROR(errno); + av_log(s, AV_LOG_ERROR, "Failed to create display thread: %s\n", av_err2str(rv)); + goto fail_close; + } + + av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__); + + return 0; + +fail_close: + close(de->drm_fd); + de->drm_fd = -1; + av_log(s, AV_LOG_DEBUG, ">>> %s: FAIL\n", __func__); + + return rv; +} + +static void drm_vout_deinit(struct AVFormatContext * s) +{ + drm_display_env_t * const de = s->priv_data; + + av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__); + + de->q_terminate = 1; + sem_post(&de->q_sem_in); + pthread_join(de->q_thread, NULL); + sem_destroy(&de->q_sem_in); + sem_destroy(&de->q_sem_out); + + for (unsigned int i = 0; i != AUX_SIZE; ++i) + da_uninit(de, de->aux + i); + + av_frame_free(&de->q_next); + + if (de->drm_fd >= 0) { + close(de->drm_fd); + de->drm_fd = -1; + } + + av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__); +} + + +#define OFFSET(x) offsetof(drm_display_env_t, x) +static const AVOption options[] = { + { "show_all", "show all frames", OFFSET(show_all), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, AV_OPT_FLAG_ENCODING_PARAM }, + { "drm_module", "drm_module name to use, default=" DRM_MODULE, OFFSET(drm_module), AV_OPT_TYPE_STRING, { .str = DRM_MODULE }, 0, 0, AV_OPT_FLAG_ENCODING_PARAM }, + { NULL } +}; + +static const AVClass drm_vout_class = { + .class_name = "drm vid outdev", + .item_name = av_default_item_name, + .option = options, + .version = LIBAVUTIL_VERSION_INT, + .category = AV_CLASS_CATEGORY_DEVICE_VIDEO_OUTPUT, +}; + +FFOutputFormat ff_vout_drm_muxer = { + .p = { + .name = "vout_drm", + .long_name = NULL_IF_CONFIG_SMALL("Drm video output device"), + .audio_codec = AV_CODEC_ID_NONE, + .video_codec = AV_CODEC_ID_WRAPPED_AVFRAME, + .flags = AVFMT_NOFILE | AVFMT_VARIABLE_FPS | AVFMT_NOTIMESTAMPS, + .priv_class = &drm_vout_class, + }, + .priv_data_size = sizeof(drm_display_env_t), + .write_header = drm_vout_write_header, + .write_packet = drm_vout_write_packet, + .write_uncoded_frame = drm_vout_write_frame, + .write_trailer = drm_vout_write_trailer, + .control_message = drm_vout_control_message, + .init = drm_vout_init, + .deinit = drm_vout_deinit, +}; + diff --git a/libavdevice/egl_vout.c b/libavdevice/egl_vout.c new file mode 100644 index 0000000000..afc7afd13e --- /dev/null +++ b/libavdevice/egl_vout.c @@ -0,0 +1,783 @@ +/* + * Copyright (c) 2020 John Cox for Raspberry Pi Trading + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + + +// *** This module is a work in progress and its utility is strictly +// limited to testing. +// Amongst other issues it doesn't wait for the pic to be displayed before +// returning the buffer so flikering does occur. + +#include +#include + +#include "libavutil/opt.h" +#include "libavutil/avassert.h" +#include "libavutil/pixdesc.h" +#include "libavutil/imgutils.h" +#include "libavutil/hwcontext_drm.h" +#include "libavformat/mux.h" +#include "avdevice.h" + +#include "pthread.h" +#include +#include +#include + +#include +#include + +#include "libavutil/rpi_sand_fns.h" + +#define TRACE_ALL 0 + +struct egl_setup { + int conId; + + Display *dpy; + EGLDisplay egl_dpy; + EGLContext ctx; + EGLSurface surf; + Window win; + + uint32_t crtcId; + int crtcIdx; + uint32_t planeId; + struct { + int x, y, width, height; + } compose; +}; + +typedef struct egl_aux_s { + int fd; + GLuint texture; + +} egl_aux_t; + +typedef struct egl_display_env_s { + AVClass *class; + + struct egl_setup setup; + enum AVPixelFormat avfmt; + + int show_all; + int window_width, window_height; + int window_x, window_y; + int fullscreen; + + egl_aux_t aux[32]; + + pthread_t q_thread; + pthread_mutex_t q_lock; + sem_t display_start_sem; + sem_t q_sem; + int q_terminate; + AVFrame *q_this; + AVFrame *q_next; + +} egl_display_env_t; + + +/** + * Remove window border/decorations. + */ +static void +no_border(Display *dpy, Window w) +{ + static const unsigned MWM_HINTS_DECORATIONS = (1 << 1); + static const int PROP_MOTIF_WM_HINTS_ELEMENTS = 5; + + typedef struct { + unsigned long flags; + unsigned long functions; + unsigned long decorations; + long inputMode; + unsigned long status; + } PropMotifWmHints; + + PropMotifWmHints motif_hints; + Atom prop, proptype; + unsigned long flags = 0; + + /* setup the property */ + motif_hints.flags = MWM_HINTS_DECORATIONS; + motif_hints.decorations = flags; + + /* get the atom for the property */ + prop = XInternAtom(dpy, "_MOTIF_WM_HINTS", True); + if (!prop) { + /* something went wrong! */ + return; + } + + /* not sure this is correct, seems to work, XA_WM_HINTS didn't work */ + proptype = prop; + + XChangeProperty(dpy, w, /* display, window */ + prop, proptype, /* property, type */ + 32, /* format: 32-bit datums */ + PropModeReplace, /* mode */ + (unsigned char *)&motif_hints, /* data */ + PROP_MOTIF_WM_HINTS_ELEMENTS /* nelements */ + ); +} + + +/* + * Create an RGB, double-buffered window. + * Return the window and context handles. + */ +static int +make_window(struct AVFormatContext *const s, + egl_display_env_t *const de, + Display *dpy, EGLDisplay egl_dpy, const char *name, + Window *winRet, EGLContext *ctxRet, EGLSurface *surfRet) +{ + int scrnum = DefaultScreen(dpy); + XSetWindowAttributes attr; + unsigned long mask; + Window root = RootWindow(dpy, scrnum); + Window win; + EGLContext ctx; + const int fullscreen = de->fullscreen; + EGLConfig config; + int x = de->window_x; + int y = de->window_y; + int width = de->window_width ? de->window_width : 1280; + int height = de->window_height ? de->window_height : 720; + + + if (fullscreen) { + int scrnum = DefaultScreen(dpy); + + x = 0; y = 0; + width = DisplayWidth(dpy, scrnum); + height = DisplayHeight(dpy, scrnum); + } + + { + EGLint num_configs; + static const EGLint attribs[] = { + EGL_RED_SIZE, 1, + EGL_GREEN_SIZE, 1, + EGL_BLUE_SIZE, 1, + EGL_RENDERABLE_TYPE, EGL_OPENGL_ES2_BIT, + EGL_NONE + }; + + if (!eglChooseConfig(egl_dpy, attribs, &config, 1, &num_configs)) { + av_log(s, AV_LOG_ERROR, "Error: couldn't get an EGL visual config\n"); + return -1; + } + } + + { + EGLint vid; + if (!eglGetConfigAttrib(egl_dpy, config, EGL_NATIVE_VISUAL_ID, &vid)) { + av_log(s, AV_LOG_ERROR, "Error: eglGetConfigAttrib() failed\n"); + return -1; + } + + { + XVisualInfo visTemplate = { + .visualid = vid, + }; + int num_visuals; + XVisualInfo *visinfo = XGetVisualInfo(dpy, VisualIDMask, + &visTemplate, &num_visuals); + + /* window attributes */ + attr.background_pixel = 0; + attr.border_pixel = 0; + attr.colormap = XCreateColormap(dpy, root, visinfo->visual, AllocNone); + attr.event_mask = StructureNotifyMask | ExposureMask | KeyPressMask; + /* XXX this is a bad way to get a borderless window! */ + mask = CWBackPixel | CWBorderPixel | CWColormap | CWEventMask; + + win = XCreateWindow(dpy, root, x, y, width, height, + 0, visinfo->depth, InputOutput, + visinfo->visual, mask, &attr); + XFree(visinfo); + } + } + + if (fullscreen) + no_border(dpy, win); + + /* set hints and properties */ + { + XSizeHints sizehints; + sizehints.x = x; + sizehints.y = y; + sizehints.width = width; + sizehints.height = height; + sizehints.flags = USSize | USPosition; + XSetNormalHints(dpy, win, &sizehints); + XSetStandardProperties(dpy, win, name, name, + None, (char **)NULL, 0, &sizehints); + } + + eglBindAPI(EGL_OPENGL_ES_API); + + { + static const EGLint ctx_attribs[] = { + EGL_CONTEXT_CLIENT_VERSION, 2, + EGL_NONE + }; + ctx = eglCreateContext(egl_dpy, config, EGL_NO_CONTEXT, ctx_attribs); + if (!ctx) { + av_log(s, AV_LOG_ERROR, "Error: eglCreateContext failed\n"); + return -1; + } + } + + + XMapWindow(dpy, win); + + { + EGLSurface surf = eglCreateWindowSurface(egl_dpy, config, (EGLNativeWindowType)win, NULL); + if (!surf) { + av_log(s, AV_LOG_ERROR, "Error: eglCreateWindowSurface failed\n"); + return -1; + } + + if (!eglMakeCurrent(egl_dpy, surf, surf, ctx)) { + av_log(s, AV_LOG_ERROR, "Error: eglCreateContext failed\n"); + return -1; + } + + *winRet = win; + *ctxRet = ctx; + *surfRet = surf; + } + + return 0; +} + +static GLint +compile_shader(struct AVFormatContext *const avctx, GLenum target, const char *source) +{ + GLuint s = glCreateShader(target); + + if (s == 0) { + av_log(avctx, AV_LOG_ERROR, "Failed to create shader\n"); + return 0; + } + + glShaderSource(s, 1, (const GLchar **)&source, NULL); + glCompileShader(s); + + { + GLint ok; + glGetShaderiv(s, GL_COMPILE_STATUS, &ok); + + if (!ok) { + GLchar *info; + GLint size; + + glGetShaderiv(s, GL_INFO_LOG_LENGTH, &size); + info = malloc(size); + + glGetShaderInfoLog(s, size, NULL, info); + av_log(avctx, AV_LOG_ERROR, "Failed to compile shader: %ssource:\n%s\n", info, source); + + return 0; + } + } + + return s; +} + +static GLuint link_program(struct AVFormatContext *const s, GLint vs, GLint fs) +{ + GLuint prog = glCreateProgram(); + + if (prog == 0) { + av_log(s, AV_LOG_ERROR, "Failed to create program\n"); + return 0; + } + + glAttachShader(prog, vs); + glAttachShader(prog, fs); + glLinkProgram(prog); + + { + GLint ok; + glGetProgramiv(prog, GL_LINK_STATUS, &ok); + if (!ok) { + /* Some drivers return a size of 1 for an empty log. This is the size + * of a log that contains only a terminating NUL character. + */ + GLint size; + GLchar *info = NULL; + glGetProgramiv(prog, GL_INFO_LOG_LENGTH, &size); + if (size > 1) { + info = malloc(size); + glGetProgramInfoLog(prog, size, NULL, info); + } + + av_log(s, AV_LOG_ERROR, "Failed to link: %s\n", + (info != NULL) ? info : ""); + return 0; + } + } + + return prog; +} + +static int +gl_setup(struct AVFormatContext *const s) +{ + const char *vs = + "attribute vec4 pos;\n" + "varying vec2 texcoord;\n" + "\n" + "void main() {\n" + " gl_Position = pos;\n" + " texcoord.x = (pos.x + 1.0) / 2.0;\n" + " texcoord.y = (-pos.y + 1.0) / 2.0;\n" + "}\n"; + const char *fs = + "#extension GL_OES_EGL_image_external : enable\n" + "precision mediump float;\n" + "uniform samplerExternalOES s;\n" + "varying vec2 texcoord;\n" + "void main() {\n" + " gl_FragColor = texture2D(s, texcoord);\n" + "}\n"; + + GLuint vs_s; + GLuint fs_s; + GLuint prog; + + if (!(vs_s = compile_shader(s, GL_VERTEX_SHADER, vs)) || + !(fs_s = compile_shader(s, GL_FRAGMENT_SHADER, fs)) || + !(prog = link_program(s, vs_s, fs_s))) + return -1; + + glUseProgram(prog); + + { + static const float verts[] = { + -1, -1, + 1, -1, + 1, 1, + -1, 1, + }; + glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 0, verts); + } + + glEnableVertexAttribArray(0); + return 0; +} + +static int egl_vout_write_trailer(AVFormatContext *s) +{ +#if TRACE_ALL + av_log(s, AV_LOG_INFO, "%s\n", __func__); +#endif + + return 0; +} + +static int egl_vout_write_header(AVFormatContext *s) +{ + const AVCodecParameters *const par = s->streams[0]->codecpar; + +#if TRACE_ALL + av_log(s, AV_LOG_INFO, "%s\n", __func__); +#endif + if (s->nb_streams > 1 + || par->codec_type != AVMEDIA_TYPE_VIDEO + || par->codec_id != AV_CODEC_ID_WRAPPED_AVFRAME) { + av_log(s, AV_LOG_ERROR, "Only supports one wrapped avframe stream\n"); + return AVERROR(EINVAL); + } + + return 0; +} + + +static int do_display(AVFormatContext *const s, egl_display_env_t *const de, AVFrame *const frame) +{ + const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor *)frame->data[0]; + egl_aux_t *da = NULL; + unsigned int i; + +#if TRACE_ALL + av_log(s, AV_LOG_INFO, "<<< %s\n", __func__); +#endif + + for (i = 0; i != 32; ++i) { + if (de->aux[i].fd == -1 || de->aux[i].fd == desc->objects[0].fd) { + da = de->aux + i; + break; + } + } + + if (da == NULL) { + av_log(s, AV_LOG_INFO, "%s: Out of handles\n", __func__); + return AVERROR(EINVAL); + } + + if (da->texture == 0) { + EGLint attribs[50]; + EGLint *a = attribs; + int i, j; + static const EGLint anames[] = { + EGL_DMA_BUF_PLANE0_FD_EXT, + EGL_DMA_BUF_PLANE0_OFFSET_EXT, + EGL_DMA_BUF_PLANE0_PITCH_EXT, + EGL_DMA_BUF_PLANE0_MODIFIER_LO_EXT, + EGL_DMA_BUF_PLANE0_MODIFIER_HI_EXT, + EGL_DMA_BUF_PLANE1_FD_EXT, + EGL_DMA_BUF_PLANE1_OFFSET_EXT, + EGL_DMA_BUF_PLANE1_PITCH_EXT, + EGL_DMA_BUF_PLANE1_MODIFIER_LO_EXT, + EGL_DMA_BUF_PLANE1_MODIFIER_HI_EXT, + EGL_DMA_BUF_PLANE2_FD_EXT, + EGL_DMA_BUF_PLANE2_OFFSET_EXT, + EGL_DMA_BUF_PLANE2_PITCH_EXT, + EGL_DMA_BUF_PLANE2_MODIFIER_LO_EXT, + EGL_DMA_BUF_PLANE2_MODIFIER_HI_EXT, + }; + const EGLint *b = anames; + + *a++ = EGL_WIDTH; + *a++ = av_frame_cropped_width(frame); + *a++ = EGL_HEIGHT; + *a++ = av_frame_cropped_height(frame); + *a++ = EGL_LINUX_DRM_FOURCC_EXT; + *a++ = desc->layers[0].format; + + for (i = 0; i < desc->nb_layers; ++i) { + for (j = 0; j < desc->layers[i].nb_planes; ++j) { + const AVDRMPlaneDescriptor *const p = desc->layers[i].planes + j; + const AVDRMObjectDescriptor *const obj = desc->objects + p->object_index; + *a++ = *b++; + *a++ = obj->fd; + *a++ = *b++; + *a++ = p->offset; + *a++ = *b++; + *a++ = p->pitch; + if (obj->format_modifier == 0) { + b += 2; + } + else { + *a++ = *b++; + *a++ = (EGLint)(obj->format_modifier & 0xFFFFFFFF); + *a++ = *b++; + *a++ = (EGLint)(obj->format_modifier >> 32); + } + } + } + + *a = EGL_NONE; + +#if TRACE_ALL + for (a = attribs, i = 0; *a != EGL_NONE; a += 2, ++i) { + av_log(s, AV_LOG_INFO, "[%2d] %4x: %d\n", i, a[0], a[1]); + } +#endif + { + const EGLImage image = eglCreateImageKHR(de->setup.egl_dpy, + EGL_NO_CONTEXT, + EGL_LINUX_DMA_BUF_EXT, + NULL, attribs); + if (!image) { + av_log(s, AV_LOG_ERROR, "Failed to import fd %d\n", desc->objects[0].fd); + return -1; + } + + glGenTextures(1, &da->texture); + glBindTexture(GL_TEXTURE_EXTERNAL_OES, da->texture); + glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glEGLImageTargetTexture2DOES(GL_TEXTURE_EXTERNAL_OES, image); + + eglDestroyImageKHR(de->setup.egl_dpy, image); + } + + da->fd = desc->objects[0].fd; + } + + glClearColor(0.5, 0.5, 0.5, 0.5); + glClear(GL_COLOR_BUFFER_BIT); + + glBindTexture(GL_TEXTURE_EXTERNAL_OES, da->texture); + glDrawArrays(GL_TRIANGLE_FAN, 0, 4); + eglSwapBuffers(de->setup.egl_dpy, de->setup.surf); + + glDeleteTextures(1, &da->texture); + da->texture = 0; + da->fd = -1; + + return 0; +} + +static void* display_thread(void *v) +{ + AVFormatContext *const s = v; + egl_display_env_t *const de = s->priv_data; + +#if TRACE_ALL + av_log(s, AV_LOG_INFO, "<<< %s\n", __func__); +#endif + { + EGLint egl_major, egl_minor; + + de->setup.dpy = XOpenDisplay(NULL); + if (!de->setup.dpy) { + av_log(s, AV_LOG_ERROR, "Couldn't open X display\n"); + goto fail; + } + + de->setup.egl_dpy = eglGetDisplay(de->setup.dpy); + if (!de->setup.egl_dpy) { + av_log(s, AV_LOG_ERROR, "eglGetDisplay() failed\n"); + goto fail; + } + + if (!eglInitialize(de->setup.egl_dpy, &egl_major, &egl_minor)) { + av_log(s, AV_LOG_ERROR, "Error: eglInitialize() failed\n"); + goto fail; + } + + av_log(s, AV_LOG_INFO, "EGL version %d.%d\n", egl_major, egl_minor); + + if (!epoxy_has_egl_extension(de->setup.egl_dpy, "EGL_KHR_image_base")) { + av_log(s, AV_LOG_ERROR, "Missing EGL KHR image extension\n"); + goto fail; + } + } + + if (!de->window_width || !de->window_height) { + de->window_width = 1280; + de->window_height = 720; + } + if (make_window(s, de, de->setup.dpy, de->setup.egl_dpy, "ffmpeg-vout", + &de->setup.win, &de->setup.ctx, &de->setup.surf)) { + av_log(s, AV_LOG_ERROR, "%s: make_window failed\n", __func__); + goto fail; + } + + if (gl_setup(s)) { + av_log(s, AV_LOG_ERROR, "%s: gl_setup failed\n", __func__); + goto fail; + } + +#if TRACE_ALL + av_log(s, AV_LOG_INFO, "--- %s: Start done\n", __func__); +#endif + sem_post(&de->display_start_sem); + + for (;;) { + AVFrame *frame; + + while (sem_wait(&de->q_sem) != 0) { + av_assert0(errno == EINTR); + } + + if (de->q_terminate) + break; + + pthread_mutex_lock(&de->q_lock); + frame = de->q_next; + de->q_next = NULL; + pthread_mutex_unlock(&de->q_lock); + + do_display(s, de, frame); + + av_frame_free(&de->q_this); + de->q_this = frame; + } + +#if TRACE_ALL + av_log(s, AV_LOG_INFO, ">>> %s\n", __func__); +#endif + + return NULL; + +fail: +#if TRACE_ALL + av_log(s, AV_LOG_INFO, ">>> %s: FAIL\n", __func__); +#endif + de->q_terminate = 1; + sem_post(&de->display_start_sem); + + return NULL; +} + +static int egl_vout_write_packet(AVFormatContext *s, AVPacket *pkt) +{ + const AVFrame *const src_frame = (AVFrame *)pkt->data; + AVFrame *frame; + egl_display_env_t *const de = s->priv_data; + +#if TRACE_ALL + av_log(s, AV_LOG_INFO, "%s\n", __func__); +#endif + + if (src_frame->format == AV_PIX_FMT_DRM_PRIME) { + frame = av_frame_alloc(); + av_frame_ref(frame, src_frame); + } + else if (src_frame->format == AV_PIX_FMT_VAAPI) { + frame = av_frame_alloc(); + frame->format = AV_PIX_FMT_DRM_PRIME; + if (av_hwframe_map(frame, src_frame, 0) != 0) { + av_log(s, AV_LOG_WARNING, "Failed to map frame (format=%d) to DRM_PRiME\n", src_frame->format); + av_frame_free(&frame); + return AVERROR(EINVAL); + } + } + else { + av_log(s, AV_LOG_WARNING, "Frame (format=%d) not DRM_PRiME\n", src_frame->format); + return AVERROR(EINVAL); + } + + // Really hacky sync + while (de->show_all && de->q_next) { + usleep(3000); + } + + pthread_mutex_lock(&de->q_lock); + { + AVFrame *const t = de->q_next; + de->q_next = frame; + frame = t; + } + pthread_mutex_unlock(&de->q_lock); + + if (frame == NULL) + sem_post(&de->q_sem); + else + av_frame_free(&frame); + + return 0; +} + +static int egl_vout_write_frame(AVFormatContext *s, int stream_index, AVFrame **ppframe, + unsigned flags) +{ + av_log(s, AV_LOG_ERROR, "%s: NIF: idx=%d, flags=%#x\n", __func__, stream_index, flags); + return AVERROR_PATCHWELCOME; +} + +static int egl_vout_control_message(AVFormatContext *s, int type, void *data, size_t data_size) +{ +#if TRACE_ALL + av_log(s, AV_LOG_INFO, "%s: %d\n", __func__, type); +#endif + switch (type) { + case AV_APP_TO_DEV_WINDOW_REPAINT: + return 0; + default: + break; + } + return AVERROR(ENOSYS); +} + +// deinit is called if init fails so no need to clean up explicity here +static int egl_vout_init(struct AVFormatContext *s) +{ + egl_display_env_t *const de = s->priv_data; + unsigned int i; + + av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__); + + de->setup = (struct egl_setup) { 0 }; + + for (i = 0; i != 32; ++i) { + de->aux[i].fd = -1; + } + + de->q_terminate = 0; + pthread_mutex_init(&de->q_lock, NULL); + sem_init(&de->q_sem, 0, 0); + sem_init(&de->display_start_sem, 0, 0); + av_assert0(pthread_create(&de->q_thread, NULL, display_thread, s) == 0); + + sem_wait(&de->display_start_sem); + if (de->q_terminate) { + av_log(s, AV_LOG_ERROR, "%s: Display startup failure\n", __func__); + return -1; + } + + av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__); + + return 0; +} + +static void egl_vout_deinit(struct AVFormatContext *s) +{ + egl_display_env_t *const de = s->priv_data; + + av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__); + + de->q_terminate = 1; + sem_post(&de->q_sem); + pthread_join(de->q_thread, NULL); + sem_destroy(&de->q_sem); + pthread_mutex_destroy(&de->q_lock); + + av_frame_free(&de->q_next); + av_frame_free(&de->q_this); + + av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__); +} + +#define OFFSET(x) offsetof(egl_display_env_t, x) +static const AVOption options[] = { + { "show_all", "show all frames", OFFSET(show_all), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, AV_OPT_FLAG_ENCODING_PARAM }, + { "window_size", "set window forced size", OFFSET(window_width), AV_OPT_TYPE_IMAGE_SIZE, { .str = NULL }, 0, 0, AV_OPT_FLAG_ENCODING_PARAM }, + { "window_x", "set window x offset", OFFSET(window_x), AV_OPT_TYPE_INT, { .i64 = 0 }, -INT_MAX, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM }, + { "window_y", "set window y offset", OFFSET(window_y), AV_OPT_TYPE_INT, { .i64 = 0 }, -INT_MAX, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM }, + { "fullscreen", "set fullscreen display", OFFSET(fullscreen), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, AV_OPT_FLAG_ENCODING_PARAM }, + { NULL } + +}; + +static const AVClass egl_vout_class = { + .class_name = "egl vid outdev", + .item_name = av_default_item_name, + .option = options, + .version = LIBAVUTIL_VERSION_INT, + .category = AV_CLASS_CATEGORY_DEVICE_VIDEO_OUTPUT, +}; + +FFOutputFormat ff_vout_egl_muxer = { + .p = { + .name = "vout_egl", + .long_name = NULL_IF_CONFIG_SMALL("Egl video output device"), + .audio_codec = AV_CODEC_ID_NONE, + .video_codec = AV_CODEC_ID_WRAPPED_AVFRAME, + .flags = AVFMT_NOFILE | AVFMT_VARIABLE_FPS | AVFMT_NOTIMESTAMPS, + .priv_class = &egl_vout_class, + }, + .priv_data_size = sizeof(egl_display_env_t), + .write_header = egl_vout_write_header, + .write_packet = egl_vout_write_packet, + .write_uncoded_frame = egl_vout_write_frame, + .write_trailer = egl_vout_write_trailer, + .control_message = egl_vout_control_message, + .init = egl_vout_init, + .deinit = egl_vout_deinit, +}; + diff --git a/libavfilter/Makefile b/libavfilter/Makefile index b3d3d981dd..0e7b5856bd 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -262,6 +262,7 @@ OBJS-$(CONFIG_DEFLATE_FILTER) += vf_neighbor.o OBJS-$(CONFIG_DEFLICKER_FILTER) += vf_deflicker.o OBJS-$(CONFIG_DEINTERLACE_QSV_FILTER) += vf_vpp_qsv.o OBJS-$(CONFIG_DEINTERLACE_VAAPI_FILTER) += vf_deinterlace_vaapi.o vaapi_vpp.o +OBJS-$(CONFIG_DEINTERLACE_V4L2M2M_FILTER) += vf_deinterlace_v4l2m2m.o OBJS-$(CONFIG_DEJUDDER_FILTER) += vf_dejudder.o OBJS-$(CONFIG_DELOGO_FILTER) += vf_delogo.o OBJS-$(CONFIG_DENOISE_VAAPI_FILTER) += vf_misc_vaapi.o vaapi_vpp.o @@ -518,6 +519,7 @@ OBJS-$(CONFIG_TRANSPOSE_VAAPI_FILTER) += vf_transpose_vaapi.o vaapi_vpp.o OBJS-$(CONFIG_TRANSPOSE_VULKAN_FILTER) += vf_transpose_vulkan.o vulkan.o vulkan_filter.o OBJS-$(CONFIG_TRIM_FILTER) += trim.o OBJS-$(CONFIG_UNPREMULTIPLY_FILTER) += vf_premultiply.o framesync.o +OBJS-$(CONFIG_UNSAND_FILTER) += vf_unsand.o OBJS-$(CONFIG_UNSHARP_FILTER) += vf_unsharp.o OBJS-$(CONFIG_UNSHARP_OPENCL_FILTER) += vf_unsharp_opencl.o opencl.o \ opencl/unsharp.o diff --git a/libavfilter/aarch64/Makefile b/libavfilter/aarch64/Makefile index b58daa3a3f..1a4cd935f1 100644 --- a/libavfilter/aarch64/Makefile +++ b/libavfilter/aarch64/Makefile @@ -1,3 +1,5 @@ +OBJS-$(CONFIG_BWDIF_FILTER) += aarch64/vf_bwdif_init_aarch64.o OBJS-$(CONFIG_NLMEANS_FILTER) += aarch64/vf_nlmeans_init.o +NEON-OBJS-$(CONFIG_BWDIF_FILTER) += aarch64/vf_bwdif_aarch64.o NEON-OBJS-$(CONFIG_NLMEANS_FILTER) += aarch64/vf_nlmeans_neon.o diff --git a/libavfilter/aarch64/vf_bwdif_aarch64.S b/libavfilter/aarch64/vf_bwdif_aarch64.S new file mode 100644 index 0000000000..d6e047dbde --- /dev/null +++ b/libavfilter/aarch64/vf_bwdif_aarch64.S @@ -0,0 +1,410 @@ +#include "libavutil/aarch64/asm.S" + +.macro SQSHRUNN b, s0, s1, s2, s3, n + sqshrun \s0\().4h, \s0\().4s, #\n - 8 + sqshrun2 \s0\().8h, \s1\().4s, #\n - 8 + sqshrun \s1\().4h, \s2\().4s, #\n - 8 + sqshrun2 \s1\().8h, \s3\().4s, #\n - 8 + uzp2 \b\().16b, \s0\().16b, \s1\().16b +.endm + +.macro SMULL4K a0, a1, a2, a3, s0, s1, k + smull \a0\().4s, \s0\().4h, \k + smull2 \a1\().4s, \s0\().8h, \k + smull \a2\().4s, \s1\().4h, \k + smull2 \a3\().4s, \s1\().8h, \k +.endm + +.macro UMULL4K a0, a1, a2, a3, s0, s1, k + umull \a0\().4s, \s0\().4h, \k + umull2 \a1\().4s, \s0\().8h, \k + umull \a2\().4s, \s1\().4h, \k + umull2 \a3\().4s, \s1\().8h, \k +.endm + +.macro UMLAL4K a0, a1, a2, a3, s0, s1, k + umlal \a0\().4s, \s0\().4h, \k + umlal2 \a1\().4s, \s0\().8h, \k + umlal \a2\().4s, \s1\().4h, \k + umlal2 \a3\().4s, \s1\().8h, \k +.endm + +.macro UMLSL4K a0, a1, a2, a3, s0, s1, k + umlsl \a0\().4s, \s0\().4h, \k + umlsl2 \a1\().4s, \s0\().8h, \k + umlsl \a2\().4s, \s1\().4h, \k + umlsl2 \a3\().4s, \s1\().8h, \k +.endm + + +// void ff_bwdif_filter_line4_aarch64( +// void * dst1, // x0 +// int d_stride, // w1 +// const void * prev1, // x2 +// const void * cur1, // x3 +// const void * next1, // x4 +// int prefs, // w5 +// int w, // w6 +// int parity, // w7 +// int clip_max); // [sp, #0] (Ignored) + +// static const uint16_t coef_lf[2] = { 4309, 213 }; +// static const uint16_t coef_hf[3] = { 5570, 3801, 1016 }; +// static const uint16_t coef_sp[2] = { 5077, 981 }; + + .align 16 + +coeffs: + .hword 4309 * 4, 213 * 4 // lf[1]*4 = v0.h[1] + .hword 5570, 3801, 1016, -3801 // hf[0] = v0.h[2], -hf[1] =v0.h[5] + .hword 5077, 981 + +function ff_bwdif_filter_line4_aarch64, export=1 +// #define prev2 cur +// const uint8_t * restrict next2 = parity ? prev : next; + cmp w7, #0 + csel x17, x2, x4, ne + + // We want all the V registers - save all the ones we must + stp d14, d15, [sp, #-64]! + stp d8, d9, [sp, #48] + stp d10, d11, [sp, #32] + stp d12, d13, [sp, #16] + + ldr q0, coeffs + + // Some rearrangement of initial values for nice layout of refs + mov w10, w6 // w10 = loop count + neg w9, w5 // w9 = mref + lsl w8, w9, #1 // w8 = mref2 + add w7, w9, w9, LSL #1 // w7 = mref3 + lsl w6, w9, #2 // w6 = mref4 + mov w11, w5 // w11 = pref + lsl w12, w5, #1 // w12 = pref2 + add w13, w5, w5, LSL #1 // w13 = pref3 + lsl w14, w5, #2 // w14 = pref4 + add w15, w5, w5, LSL #2 // w15 = pref5 + add w16, w14, w12 // w16 = pref6 + + lsl w5, w1, #1 // w5 = d_stride * 2 + +// for (x = 0; x < w; x++) { +// int diff0, diff2; +// int d0, d2; +// int temporal_diff0, temporal_diff2; +// +// int i1, i2; +// int j1, j2; +// int p6, p5, p4, p3, p2, p1, c0, m1, m2, m3, m4; + +10: +// c0 = prev2[0] + next2[0]; // c0 = v20, v21 +// d0 = c0 >> 1; // d0 = v10 +// temporal_diff0 = FFABS(prev2[0] - next2[0]); // td0 = v11 + ldr q31, [x3] + ldr q21, [x17] + uhadd v10.16b, v31.16b, v21.16b + uabd v11.16b, v31.16b, v21.16b + uaddl v20.8h, v21.8b, v31.8b + uaddl2 v21.8h, v21.16b, v31.16b + + ldr q31, [x3, w6, SXTW] + ldr q23, [x17, w6, SXTW] + +// i1 = coef_hf[0] * c0; // i1 = v2-v5 + UMULL4K v2, v3, v4, v5, v20, v21, v0.h[2] + + ldr q30, [x3, w14, SXTW] + ldr q25, [x17, w14, SXTW] + +// m4 = prev2[mrefs4] + next2[mrefs4]; // m4 = v22,v23 + uaddl v22.8h, v23.8b, v31.8b + uaddl2 v23.8h, v23.16b, v31.16b + +// p4 = prev2[prefs4] + next2[prefs4]; // p4 = v24,v25, (p4 >> 1) = v12 + uhadd v12.16b, v25.16b, v30.16b + uaddl v24.8h, v25.8b, v30.8b + uaddl2 v25.8h, v25.16b, v30.16b + +// j1 = -coef_hf[1] * (c0 + p4); // j1 = v6-v9 (-c0:v20,v21) + add v20.8h, v20.8h, v24.8h + add v21.8h, v21.8h, v25.8h + SMULL4K v6, v7, v8, v9, v20, v21, v0.h[5] + +// m3 = cur[mrefs3]; // m3 = v20 + ldr q20, [x3, w7, SXTW] + +// p3 = cur[prefs3]; // p3 = v21 + ldr q21, [x3, w13, SXTW] + +// i1 += coef_hf[2] * (m4 + p4); // (-m4:v22,v23) (-p4:v24,v25) + add v22.8h, v22.8h, v24.8h + add v23.8h, v23.8h, v25.8h + UMLAL4K v2, v3, v4, v5, v22, v23, v0.h[4] + + ldr q29, [x3, w8, SXTW] + ldr q23, [x17, w8, SXTW] + +// i1 -= coef_lf[1] * 4 * (m3 + p3); // - + uaddl v30.8h, v20.8b, v21.8b + uaddl2 v31.8h, v20.16b, v21.16b + + ldr q28, [x3, w16, SXTW] + ldr q25, [x17, w16, SXTW] + + UMLSL4K v2, v3, v4, v5, v30, v31, v0.h[1] + +// m2 = prev2[mrefs2] + next2[mrefs2]; // m2 = v22,v23, (m2 >> 1) = v13 + uhadd v13.16b, v23.16b, v29.16b + uaddl v22.8h, v23.8b, v29.8b + uaddl2 v23.8h, v23.16b, v29.16b + + ldr q31, [x3, w12, SXTW] + ldr q27, [x17, w12, SXTW] + +// p6 = prev2[prefs6] + next2[prefs6]; // p6 = v24,v25 + uaddl v24.8h, v25.8b, v28.8b + uaddl2 v25.8h, v25.16b, v28.16b + +// j1 += coef_hf[2] * (m2 + p6); // (-p6:v24,v25) + add v24.8h, v24.8h, v22.8h + add v25.8h, v25.8h, v23.8h + UMLAL4K v6, v7, v8, v9, v24, v25, v0.h[4] + +// m1 = cur[mrefs]; // m1 = v24 + ldr q24, [x3, w9, SXTW] + +// p5 = cur[prefs5]; // p5 = v25 + ldr q25, [x3, w15, SXTW] + + +// p2 = prev2[prefs2] + next2[prefs2]; // p2 = v26, v27 +// temporal_diff2 = FFABS(prev2[prefs2] - next2[prefs2]); // td2 = v14 +// d2 = p2 >> 1; // d2 = v15 + uabd v14.16b, v31.16b, v27.16b + uhadd v15.16b, v31.16b, v27.16b + uaddl v26.8h, v27.8b, v31.8b + uaddl2 v27.8h, v27.16b, v31.16b + +// j1 += coef_hf[0] * p2; // - + UMLAL4K v6, v7, v8, v9, v26, v27, v0.h[2] + +// i1 -= coef_hf[1] * (m2 + p2); // (-m2:v22,v23*) (-p2:v26*,v27*) + add v22.8h, v22.8h, v26.8h + add v23.8h, v23.8h, v27.8h + UMLSL4K v2, v3, v4, v5, v22, v23, v0.h[3] + +// p1 = cur[prefs]; // p1 = v22 + ldr q22, [x3, w11, SXTW] + +// j1 -= coef_lf[1] * 4 * (m1 + p5); // - + uaddl v26.8h, v24.8b, v25.8b + uaddl2 v27.8h, v24.16b, v25.16b + UMLSL4K v6, v7, v8, v9, v26, v27, v0.h[1] + +// j2 = (coef_sp[0] * (p1 + p3) - coef_sp[1] * (m1 + p5)) >> 13; // (-p5:v25*) j2=v16 + uaddl v18.8h, v22.8b, v21.8b + uaddl2 v19.8h, v22.16b, v21.16b + UMULL4K v28, v29, v30, v31, v18, v19, v0.h[6] + + uaddl v18.8h, v24.8b, v25.8b + uaddl2 v19.8h, v24.16b, v25.16b + UMLSL4K v28, v29, v30, v31, v18, v19, v0.h[7] + + SQSHRUNN v16, v28, v29, v30, v31, 13 + +// i2 = (coef_sp[0] * (m1 + p1) - coef_sp[1] * (m3 + p3)) >> 13; // (-m3:v20*) i2=v17 + uaddl v18.8h, v22.8b, v24.8b + uaddl2 v19.8h, v22.16b, v24.16b + UMULL4K v28, v29, v30, v31, v18, v19, v0.h[6] + + uaddl v18.8h, v20.8b, v21.8b + uaddl2 v19.8h, v20.16b, v21.16b + UMLSL4K v28, v29, v30, v31, v18, v19, v0.h[7] + + SQSHRUNN v17, v28, v29, v30, v31, 13 + +// i1 += coef_lf[0] * 4 * (m1 + p1); // p1 = v22, m1 = v24 + uaddl v26.8h, v24.8b, v22.8b + uaddl2 v27.8h, v24.16b, v22.16b + UMLAL4K v2, v3, v4, v5, v26, v27, v0.h[0] + + ldr q31, [x2, w9, SXTW] + ldr q29, [x4, w9, SXTW] + +// j1 += coef_lf[0] * 4 * (p1 + p3); // p1 = v22, p3 = v21 + uaddl v26.8h, v21.8b, v22.8b + uaddl2 v27.8h, v21.16b, v22.16b + UMLAL4K v6, v7, v8, v9, v26, v27, v0.h[0] + + ldr q30, [x2, w11, SXTW] + ldr q28, [x4, w11, SXTW] + +// i1 >>= 15; // i1 = v2, -v3, -v4*, -v5* + SQSHRUNN v2, v2, v3, v4, v5, 15 + +// j1 >>= 15; // j1 = v3, -v6*, -v7*, -v8*, -v9* + SQSHRUNN v3, v6, v7, v8, v9, 15 + +// { +// int t1 =(FFABS(prev[mrefs] - m1) + FFABS(prev[prefs] - p1)) >> 1; +// int t2 =(FFABS(next[mrefs] - m1) + FFABS(next[prefs] - p1)) >> 1; + uabd v30.16b, v22.16b, v30.16b + uabd v31.16b, v24.16b, v31.16b + uabd v28.16b, v22.16b, v28.16b + uabd v29.16b, v24.16b, v29.16b + uhadd v31.16b, v31.16b, v30.16b + uhadd v29.16b, v29.16b, v28.16b + + ldr q27, [x2, w13, SXTW] + ldr q26, [x4, w13, SXTW] + +// diff0 = FFMAX3(temporal_diff0 >> 1, t1, t2); // diff0=v18 + ushr v18.16b, v11.16b, #1 + umax v18.16b, v18.16b, v31.16b + umax v18.16b, v18.16b, v29.16b + +// } // v28, v30 preserved for next block +// { // tdiff2 = v14 +// int t1 =(FFABS(prev[prefs] - p1) + FFABS(prev[prefs3] - p3)) >> 1; +// int t2 =(FFABS(next[prefs] - p1) + FFABS(next[prefs3] - p3)) >> 1; + uabd v31.16b, v21.16b, v27.16b + uabd v29.16b, v21.16b, v26.16b + uhadd v31.16b, v31.16b, v30.16b + uhadd v29.16b, v29.16b, v28.16b + +// diff2 = FFMAX3(temporal_diff2 >> 1, t1, t2); // diff2=v19 + ushr v19.16b, v14.16b, #1 + umax v19.16b, v19.16b, v31.16b + umax v19.16b, v19.16b, v29.16b + +// } +// { // (m2 >> 1) = v13, m1 = v24, d0 = v10, d2 = v15, p1 = v22, diff0 = v18 +// int b = (m2 >> 1) - m1; +// int f = d2 - p1; +// int dc = d0 - m1; +// int de = d0 - p1; +// int sp_max = FFMIN(p1 - d0, m1 - d0); + uqsub v31.16b, v22.16b, v10.16b + uqsub v29.16b, v24.16b, v10.16b + umin v29.16b, V31.16b, v29.16b + +// sp_max = FFMIN(sp_max, FFMAX(-b,-f)); + uqsub v30.16b, v24.16b, v13.16b + uqsub v28.16b, v22.16b, v15.16b + umax v28.16b, v28.16b, v30.16b + umin v27.16b, v29.16b, v28.16b + +// int sp_min = FFMIN(d0 - p1, d0 - m1); + uqsub v31.16b, v10.16b, v22.16b + uqsub v29.16b, v10.16b, v24.16b + umin v29.16b, V31.16b, v29.16b + +// sp_min = FFMIN(sp_min, FFMAX(b,f)); + uqsub v30.16b, v13.16b, v24.16b + uqsub v28.16b, v15.16b, v22.16b + umax v28.16b, v28.16b, v30.16b + umin v26.16b, v29.16b, v28.16b + +// diff0 = FFMAX3(diff0, sp_min, sp_max); // diff0 = v18 + umax v18.16b, v18.16b, v27.16b + umax v18.16b, v18.16b, v26.16b +// } +// { // (p4 >> 1) = v12, p3 = v21, d0 = v10, d2 = v15, p1 = v22, diff2 = v19 +// int b = d0 - p1; // 1 +// int f = (p4 >> 1) - p3; // [v23] +// int dc = d2 - p1; +// int de = d2 - p3; +// int sp_max = FFMIN(-de, -dc); + uqsub v31.16b, v21.16b, v15.16b + uqsub v29.16b, v22.16b, v15.16b + umin v29.16b, V31.16b, v29.16b + +// sp_max = FFMIN(sp_max, FFMAX(-b,-f)); + uqsub v30.16b, v22.16b, v10.16b + uqsub v28.16b, v21.16b, v12.16b + umax v28.16b, v28.16b, v30.16b + umin v27.16b, v29.16b, v28.16b + +// int sp_min = FFMIN(de, dc); + uqsub v31.16b, v15.16b, v21.16b + uqsub v29.16b, v15.16b, v22.16b + umin v29.16b, V31.16b, v29.16b + +// sp_min = FFMIN(sp_min, FFMAX(b,f)); + uqsub v30.16b, v10.16b, v22.16b + uqsub v28.16b, v12.16b, v21.16b + umax v28.16b, v28.16b, v30.16b + umin v26.16b, v29.16b, v28.16b + +// diff2 = FFMAX3(diff2, sp_min, sp_max); + umax v19.16b, v19.16b, v27.16b + umax v19.16b, v19.16b, v26.16b + +// } +// +// +// { +// int interpol = FFABS(p1 - p3) > temporal_diff2 ? j1:j2; // interpol = v6 (-j1:v6) (-j2=v16) + uabd v31.16b, v22.16b, v21.16b + cmhi v31.16b, v31.16b, v14.16b + bif v3.16b, v16.16b, v31.16b + +// if (interpol > d2 + diff2) +// interpol = d2 + diff2; + uqadd v30.16b, v15.16b, v19.16b + umin v3.16b, v3.16b, v30.16b + +// else if (interpol < d2 - diff2) +// interpol = d2 - diff2; + uqsub v29.16b, v15.16b, v19.16b + umax v3.16b, v3.16b, v29.16b + +// dst[d_stride * 2] = av_clip_uint8(interpol); + str q3, [x0, w5, SXTW] + +// } + +// dst[d_stride] = p1; + str q22, [x0, w1, SXTW] + + +// { +// int interpol = FFABS(m1 - p1) > temporal_diff0 ? i1:i2; + uabd v31.16b, v24.16b, v22.16b // m1 = v24, p1 = v22 + cmhi v31.16b, v31.16b, v11.16b // td0 = v11 + bif v2.16b, v17.16b, v31.16b // i1 = v2, i2 = v17 + +// if (interpol > d0 + diff0) +// interpol = d0 + diff0; + uqadd v30.16b, v10.16b, v18.16b // diff0 = v18 + umin v2.16b, v2.16b, v30.16b + +// else if (interpol < d0 - diff0) +// interpol = d0 - diff0; + uqsub v29.16b, v10.16b, v18.16b + umax v2.16b, v2.16b, v29.16b +// +// dst[0] = av_clip_uint8(interpol); + str q2, [x0], #16 +// } +// +// dst++; +// cur++; +// prev++; +// prev2++; +// next++; +// } + + subs w10, w10, #16 + add x2, x2, #16 + add x3, x3, #16 + add x4, x4, #16 + add x17, x17, #16 + bgt 10b + + ldp d12, d13, [sp, #16] + ldp d10, d11, [sp, #32] + ldp d8, d9, [sp, #48] + ldp d14, d15, [sp], #64 + ret diff --git a/libavfilter/aarch64/vf_bwdif_aarch64.h b/libavfilter/aarch64/vf_bwdif_aarch64.h new file mode 100644 index 0000000000..8d97802e5e --- /dev/null +++ b/libavfilter/aarch64/vf_bwdif_aarch64.h @@ -0,0 +1,8 @@ +#ifndef AVFILTER_AARCH64_VF_BWDIF_H_ +#define AVFILTER_AARCH64_VF_BWDIF_H_ + +void ff_bwdif_filter_line4_aarch64(void * dst1, int d_stride, + const void * prev1, const void * cur1, const void * next1, int prefs, + int w, int parity, int clip_max); + +#endif diff --git a/libavfilter/aarch64/vf_bwdif_init_aarch64.c b/libavfilter/aarch64/vf_bwdif_init_aarch64.c new file mode 100644 index 0000000000..c5506424c9 --- /dev/null +++ b/libavfilter/aarch64/vf_bwdif_init_aarch64.c @@ -0,0 +1,273 @@ +#include "libavutil/common.h" +#include "libavutil/aarch64/cpu.h" +#include "../avfilter.h" +#include "../bwdif.h" +#include "vf_bwdif_aarch64.h" + +/* + * Filter coefficients coef_lf and coef_hf taken from BBC PH-2071 (Weston 3 Field Deinterlacer). + * Used when there is spatial and temporal interpolation. + * Filter coefficients coef_sp are used when there is spatial interpolation only. + * Adjusted for matching visual sharpness impression of spatial and temporal interpolation. + */ +static const uint16_t coef_lf[2] = { 4309, 213 }; +static const uint16_t coef_hf[3] = { 5570, 3801, 1016 }; +static const uint16_t coef_sp[2] = { 5077, 981 }; + +#define NEXT_LINE()\ + dst += d_stride; \ + prev += prefs; \ + cur += prefs; \ + next += prefs; + +static void filter_line4_check(void *restrict dst1, int d_stride, + const void *restrict prev1, const void *restrict cur1, const void *restrict next1, int prefs, + int w, int parity, int clip_max) +{ + uint8_t * restrict dst = dst1; + const uint8_t * restrict prev = prev1; + const uint8_t * restrict cur = cur1; + const uint8_t * restrict next = next1; + + const int mrefs = -prefs; + const int mrefs2 = mrefs * 2; + const int prefs2 = prefs * 2; + const int mrefs3 = mrefs * 3; + const int prefs3 = prefs * 3; + const int mrefs4 = mrefs * 4; + const int prefs4 = prefs * 4; + + static int n = 0; + uint64_t buf[2048*4/sizeof(uint64_t)]; + int i, j; + static int fail_count = 0; + + memset(dst, 0xba, d_stride * 3); + memset(buf, 0xba, d_stride * 3); + + ff_bwdif_filter_line4_aarch64(dst, d_stride, prev, cur, next, prefs, w, parity, clip_max); + + dst = (uint8_t*)buf; + prev = prev1; + cur = cur1; + next = next1; + + ff_bwdif_filter_line_c(dst, (void*)prev, (void*)cur, (void*)next, w, + prefs, mrefs, prefs2, mrefs2, prefs3, mrefs3, prefs4, mrefs4, parity, clip_max); + NEXT_LINE(); + memcpy(dst, cur, w); + NEXT_LINE(); + ff_bwdif_filter_line_c(dst, (void*)prev, (void*)cur, (void*)next, w, + prefs, mrefs, prefs2, mrefs2, prefs3, mrefs3, prefs4, mrefs4, parity, clip_max); + + for (j = 0; j != 3; ++j) + { + const uint8_t * ref = (uint8_t*)buf + j * d_stride; + const uint8_t * tst = (uint8_t*)dst1 + j * d_stride; + for (i = 0; i != w; ++i) + { + if (ref[i] != tst[i]) + { + printf("n=%d, (%d,%d): Ref: %02x, Tst: %02x\n", n, i, j, ref[i], tst[i]); + if (fail_count++ > 16) + exit(1); + } + } + } + + ++n; +} + +static void __attribute__((optimize("tree-vectorize"))) filter_line4_debug(void *restrict dst1, int d_stride, + const void *restrict prev1, const void *restrict cur1, const void *restrict next1, int prefs, + int w, int parity, int clip_max) +{ + uint8_t * restrict dst = dst1; + const uint8_t * restrict prev = prev1; + const uint8_t * restrict cur = cur1; + const uint8_t * restrict next = next1; + + const int mrefs = -prefs; + const int mrefs2 = mrefs * 2; + const int prefs2 = prefs * 2; + const int mrefs3 = mrefs * 3; + const int prefs3 = prefs * 3; + const int mrefs4 = mrefs * 4; + const int prefs4 = prefs * 4; + + static int n = 0; + static int itt = -1; + + { + int x; +#define prev2 cur + const uint8_t * restrict next2 = parity ? prev : next; + + for (x = 0; x < w; x++) { + int diff0, diff2; + int d0, d2; + int temporal_diff0, temporal_diff2; + + int i1, i2; + int j1, j2; + int p6, p5, p4, p3, p2, p1, c0, m1, m2, m3, m4; + + if ((x & 15) == 0) + ++itt; + +// printf("======= n=%d x=%d [iteration %d.%d] =======\n", n, x, itt, x & 15); + c0 = prev2[0] + next2[0]; // c0 = v20,v26 + d0 = c0 >> 1; // d0 = v21 + temporal_diff0 = FFABS(prev2[0] - next2[0]); // td0 = v9 +// printf("c0=%d, d0=%d, temporal_diff0=%d\n", c0, d0, temporal_diff0); + i1 = coef_hf[0] * c0; // - +// printf("i1=%d\n", i1); + m4 = prev2[mrefs4] + next2[mrefs4]; // m4 = v3,v4 + p4 = prev2[prefs4] + next2[prefs4]; // p4 = v5,v6, (p4 >> 1) = v23 + j1 = -coef_hf[1] * (c0 + p4); // (-c0:v20,v26*) +// printf("m4=%d, p4=%d, j1=%d\n", m4, p4, j1); + i1 += coef_hf[2] * (m4 + p4); // (-m4:v3,v4) (-p4:v5,v6) i1 = v3,v4,v7,v8 +// printf("hf2 i1=%d\n", i1); + m3 = cur[mrefs3]; // m3 = v5 + p3 = cur[prefs3]; // p3 = v10, [f2=v23] + i1 -= coef_lf[1] * 4 * (m3 + p3); // - +// printf("lf1 i1=%d\n", i1); + m2 = prev2[mrefs2] + next2[mrefs2]; // m2 = v11,v12, (m2 >> 1) = v22 + p6 = prev2[prefs4 + prefs2] + next2[prefs4 + prefs2]; // p6=v0,v1 + j1 += coef_hf[2] * (m2 + p6); // (-p6:v0*,v1*), j1 = v13,v14,v15,v16 +// printf("hf2 j1=%d\n", j1); + p2 = prev2[prefs2] + next2[prefs2]; // p2 = v17,v18 + temporal_diff2 = FFABS(prev2[prefs2] - next2[prefs2]); // td2 = v6 + j1 += coef_hf[0] * p2; // - + d2 = p2 >> 1; // d2 = v19 + i1 -= coef_hf[1] * (m2 + p2); // (-m2:v11,v12) +// printf("hf1 i1=%d\n", i1); + m1 = cur[mrefs]; // m1 = v11, [b0=v22] + p5 = cur[prefs3 + prefs2]; // p5=v2 + j1 -= coef_lf[1] * 4 * (m1 + p5); // - + p1 = cur[prefs]; // p1 = v12 + dst[d_stride] = p1; + j2 = (coef_sp[0] * (p1 + p3) - coef_sp[1] * (m1 + p5)) >> 13; // (-p5:v2) j2=v2 + i2 = (coef_sp[0] * (m1 + p1) - coef_sp[1] * (m3 + p3)) >> 13; // (-m3:v5) i2=v5 + { + int t1 =(FFABS(prev[mrefs] - m1) + FFABS(prev[prefs] - p1)) >> 1; + int t2 =(FFABS(next[mrefs] - m1) + FFABS(next[prefs] - p1)) >> 1; + diff0 = FFMAX3(temporal_diff0 >> 1, t1, t2); // diff0=v24 +// printf("tdiff0=%d, t1=%d, t2=%d\n", temporal_diff0, t1, t2); + } + { + int t1 =(FFABS(prev[prefs] - p1) + FFABS(prev[prefs3] - p3)) >> 1; + int t2 =(FFABS(next[prefs] - p1) + FFABS(next[prefs3] - p3)) >> 1; + diff2 = FFMAX3(temporal_diff2 >> 1, t1, t2); // diff2=v25 +// printf("tdiff2=%d, t1=%d, t2=%d\n", temporal_diff2, t1, t2); + } + i1 += coef_lf[0] * 4 * (m1 + p1); // - + j1 += coef_lf[0] * 4 * (p1 + p3); // - +// printf("lf0 i1=%d, j1=%d, diff0=%d, diff2=%d\n", i1, j1, diff0, diff2); + { + int b = (m2 >> 1) - m1; // [v22] + int f = d2 - p1; // 1 + int dc = d0 - m1; + int de = d0 - p1; + int sp_max = FFMIN(-de, -dc); + int sp_min = FFMIN(de, dc); + sp_max = FFMIN(sp_max, FFMAX(-b,-f)); + sp_min = FFMIN(sp_min, FFMAX(b,f)); +// printf("spmax0=%d, spmin0=%d, b=%d, f=%d, dc=%d, de=%d\n", sp_max, sp_min, b, f, dc, de); + diff0 = FFMAX3(diff0, sp_min, sp_max); + } + { + int b = d0 - p1; // 1 + int f = (p4 >> 1) - p3; // [v23] + int dc = d2 - p1; + int de = d2 - p3; + int sp_max = FFMIN(-de, -dc); + int sp_min = FFMIN(de, dc); + sp_max = FFMIN(sp_max, FFMAX(-b,-f)); + sp_min = FFMIN(sp_min, FFMAX(b,f)); +// printf("spmax2=%d, spmin2=%d, b=%d, f=%d, dc=%d, de=%d\n", sp_max, sp_min, b, f, dc, de); + diff2 = FFMAX3(diff2, sp_min, sp_max); + } + + i1 >>= 15; + j1 >>= 15; + +// printf("Final i1=%d, i2=%d, j1=%d, j2=%d\n", i1, i2, j1, j2); + + + { + int interpol = FFABS(p1 - p3) > temporal_diff2 ? j1:j2; + +// printf("diff2=%d, interpol=%d, d2=%d\n", diff2, interpol, d2); + + if (interpol > d2 + diff2) + interpol = d2 + diff2; + else if (interpol < d2 - diff2) + interpol = d2 - diff2; + dst[d_stride * 2] = av_clip_uint8(interpol); + } + { + int interpol = FFABS(m1 - p1) > temporal_diff0 ? i1:i2; + +// printf("diff0=%d, interpol=%d, d0=%d\n", diff0, interpol, d0); + + if (interpol > d0 + diff0) + interpol = d0 + diff0; + else if (interpol < d0 - diff0) + interpol = d0 - diff0; + + dst[0] = av_clip_uint8(interpol); + } +// printf("dst[0]=%d, dst[2]=%d\n", dst[0], dst[d_stride*2]); + + dst++; + cur++; + prev++; + next++; + next2++; +// if (n >= 513 && x >= 719) +// { +// exit(99); +// } + } +#undef prev2 + +// NEXT_LINE(); +// memcpy(dst, cur, w); + ++n; + } +} + + +void +ff_bwdif_init_aarch64(AVFilterContext *ctx) +{ + const int cpu_flags = av_get_cpu_flags(); + BWDIFContext *s = ctx->priv; + YADIFContext *yadif = &s->yadif; + + if ((ctx->inputs[0]->w & 31) != 0) + { + av_log(ctx, AV_LOG_DEBUG, "Cannot use aarch64 optimization: w=%d, (needs multiple of 32)\n", ctx->inputs[0]->w); + return; + } + if (yadif->csp->comp[0].depth != 8) + { + av_log(ctx, AV_LOG_DEBUG, "Cannot use aarch64 optimization: bits=%d, (only 8 supported)\n", yadif->csp->comp[0].depth); + return; + } + + if (!have_neon(cpu_flags)) + { + av_log(ctx, AV_LOG_DEBUG, "Cannot use aarch64 optimization: no NEON!\n"); + return; + } + + if (yadif->useasm == 3) + s->filter_line4 = filter_line4_check; + else if (yadif->useasm == 2) + s->filter_line4 = filter_line4_debug; + else + s->filter_line4 = ff_bwdif_filter_line4_aarch64; +} + diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index d7db46c2af..d504fa1bc8 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -248,6 +248,7 @@ extern const AVFilter ff_vf_derain; extern const AVFilter ff_vf_deshake; extern const AVFilter ff_vf_deshake_opencl; extern const AVFilter ff_vf_despill; +extern const AVFilter ff_vf_deinterlace_v4l2m2m; extern const AVFilter ff_vf_detelecine; extern const AVFilter ff_vf_dilation; extern const AVFilter ff_vf_dilation_opencl; @@ -420,6 +421,7 @@ extern const AVFilter ff_vf_scale; extern const AVFilter ff_vf_scale_cuda; extern const AVFilter ff_vf_scale_npp; extern const AVFilter ff_vf_scale_qsv; +extern const AVFilter ff_vf_scale_v4l2m2m; extern const AVFilter ff_vf_scale_vaapi; extern const AVFilter ff_vf_scale_vulkan; extern const AVFilter ff_vf_scale2ref; @@ -490,6 +492,7 @@ extern const AVFilter ff_vf_trim; extern const AVFilter ff_vf_unpremultiply; extern const AVFilter ff_vf_unsharp; extern const AVFilter ff_vf_unsharp_opencl; +extern const AVFilter ff_vf_unsand; extern const AVFilter ff_vf_untile; extern const AVFilter ff_vf_uspp; extern const AVFilter ff_vf_v360; diff --git a/libavfilter/buffersink.c b/libavfilter/buffersink.c index 306c283f77..d3c82aabf3 100644 --- a/libavfilter/buffersink.c +++ b/libavfilter/buffersink.c @@ -62,6 +62,11 @@ typedef struct BufferSinkContext { int sample_rates_size; AVFrame *peeked_frame; + + union { + av_buffersink_alloc_video_frame * video; + } alloc_cb; + void * alloc_v; } BufferSinkContext; #define NB_ITEMS(list) (list ## _size / sizeof(*list)) @@ -154,6 +159,44 @@ int attribute_align_arg av_buffersink_get_samples(AVFilterContext *ctx, return get_frame_internal(ctx, frame, 0, nb_samples); } +static AVFrame * alloc_video_buffer(AVFilterLink *link, int w, int h) +{ + AVFilterContext * const ctx = link->dst; + BufferSinkContext * const bs = ctx->priv; + return bs->alloc_cb.video ? bs->alloc_cb.video(ctx, bs->alloc_v, w, h) : + ff_default_get_video_buffer(link, w, h); +} + +int av_buffersink_set_alloc_video_frame(AVFilterContext *ctx, av_buffersink_alloc_video_frame * cb, void * v) +{ + BufferSinkContext * const bs = ctx->priv; + bs->alloc_cb.video = cb; + bs->alloc_v = v; + return 0; +} + +#if FF_API_BUFFERSINK_ALLOC +AVBufferSinkParams *av_buffersink_params_alloc(void) +{ + static const int pixel_fmts[] = { AV_PIX_FMT_NONE }; + AVBufferSinkParams *params = av_malloc(sizeof(AVBufferSinkParams)); + if (!params) + return NULL; + + params->pixel_fmts = pixel_fmts; + return params; +} + +AVABufferSinkParams *av_abuffersink_params_alloc(void) +{ + AVABufferSinkParams *params = av_mallocz(sizeof(AVABufferSinkParams)); + + if (!params) + return NULL; + return params; +} +#endif + static av_cold int common_init(AVFilterContext *ctx) { BufferSinkContext *buf = ctx->priv; @@ -381,6 +424,7 @@ static const AVFilterPad avfilter_vsink_buffer_inputs[] = { { .name = "default", .type = AVMEDIA_TYPE_VIDEO, + .get_buffer = {.video = alloc_video_buffer}, }, }; diff --git a/libavfilter/buffersink.h b/libavfilter/buffersink.h index 64e08de53e..09737d322f 100644 --- a/libavfilter/buffersink.h +++ b/libavfilter/buffersink.h @@ -166,6 +166,9 @@ int av_buffersink_get_frame(AVFilterContext *ctx, AVFrame *frame); */ int av_buffersink_get_samples(AVFilterContext *ctx, AVFrame *frame, int nb_samples); +typedef AVFrame * av_buffersink_alloc_video_frame(AVFilterContext * ctx, void * v, int w, int h); +int av_buffersink_set_alloc_video_frame(AVFilterContext *ctx, av_buffersink_alloc_video_frame * cb, void * v); + /** * @} */ diff --git a/libavfilter/buffersrc.c b/libavfilter/buffersrc.c index ba17450b93..0dbe5d2335 100644 --- a/libavfilter/buffersrc.c +++ b/libavfilter/buffersrc.c @@ -201,7 +201,7 @@ FF_ENABLE_DEPRECATION_WARNINGS switch (ctx->outputs[0]->type) { case AVMEDIA_TYPE_VIDEO: - CHECK_VIDEO_PARAM_CHANGE(ctx, s, frame->width, frame->height, + CHECK_VIDEO_PARAM_CHANGE(ctx, s, av_frame_cropped_width(frame), av_frame_cropped_height(frame), frame->format, frame->pts); break; case AVMEDIA_TYPE_AUDIO: diff --git a/libavfilter/bwdif.h b/libavfilter/bwdif.h index 889ff772ed..5ba8006e42 100644 --- a/libavfilter/bwdif.h +++ b/libavfilter/bwdif.h @@ -35,8 +35,17 @@ typedef struct BWDIFContext { void (*filter_edge)(void *dst, void *prev, void *cur, void *next, int w, int prefs, int mrefs, int prefs2, int mrefs2, int parity, int clip_max, int spat); + void (*filter_line4)(void *dst, int dstride, + const void *prev, const void *cur, const void *next, int prefs, + int w, int parity, int clip_max); } BWDIFContext; +void ff_bwdif_filter_line_c(void *restrict dst1, void *restrict prev1, void *restrict cur1, void *restrict next1, + int w, int prefs, int mrefs, int prefs2, int mrefs2, + int prefs3, int mrefs3, int prefs4, int mrefs4, + int parity, int clip_max); + void ff_bwdif_init_x86(BWDIFContext *bwdif); +void ff_bwdif_init_aarch64(AVFilterContext *ctx); #endif /* AVFILTER_BWDIF_H */ diff --git a/libavfilter/vf_bwdif.c b/libavfilter/vf_bwdif.c index 65c617ebb3..fbc9491642 100644 --- a/libavfilter/vf_bwdif.c +++ b/libavfilter/vf_bwdif.c @@ -38,6 +38,10 @@ #include "video.h" #include "bwdif.h" +#include +#define OPT_TEST 0 +#define OPT_NEW 0 + /* * Filter coefficients coef_lf and coef_hf taken from BBC PH-2071 (Weston 3 Field Deinterlacer). * Used when there is spatial and temporal interpolation. @@ -74,10 +78,10 @@ typedef struct ThreadData { int temporal_diff1 =(FFABS(prev[mrefs] - c) + FFABS(prev[prefs] - e)) >> 1; \ int temporal_diff2 =(FFABS(next[mrefs] - c) + FFABS(next[prefs] - e)) >> 1; \ int diff = FFMAX3(temporal_diff0 >> 1, temporal_diff1, temporal_diff2); \ - \ + {/*\ if (!diff) { \ dst[0] = d; \ - } else { + } else {*/ #define SPAT_CHECK() \ int b = ((prev2[mrefs2] + next2[mrefs2]) >> 1) - c; \ @@ -89,15 +93,16 @@ typedef struct ThreadData { diff = FFMAX3(diff, min, -max); #define FILTER_LINE() \ + int i1, i2; \ SPAT_CHECK() \ - if (FFABS(c - e) > temporal_diff0) { \ - interpol = (((coef_hf[0] * (prev2[0] + next2[0]) \ + /*if (FFABS(c - e) > temporal_diff0)*/ { \ + i1 = (((coef_hf[0] * (prev2[0] + next2[0]) \ - coef_hf[1] * (prev2[mrefs2] + next2[mrefs2] + prev2[prefs2] + next2[prefs2]) \ + coef_hf[2] * (prev2[mrefs4] + next2[mrefs4] + prev2[prefs4] + next2[prefs4])) >> 2) \ + coef_lf[0] * (c + e) - coef_lf[1] * (cur[mrefs3] + cur[prefs3])) >> 13; \ - } else { \ - interpol = (coef_sp[0] * (c + e) - coef_sp[1] * (cur[mrefs3] + cur[prefs3])) >> 13; \ - } + } /*else*/ { \ + i2 = (coef_sp[0] * (c + e) - coef_sp[1] * (cur[mrefs3] + cur[prefs3])) >> 13; \ + }interpol = FFABS(c - e) > temporal_diff0 ? i1:i2;\ #define FILTER_EDGE() \ if (spat) { \ @@ -111,7 +116,7 @@ typedef struct ThreadData { else if (interpol < d - diff) \ interpol = d - diff; \ \ - dst[0] = av_clip(interpol, 0, clip_max); \ + dst[0] = !diff ? d : av_clip(interpol, 0, clip_max); \ } \ \ dst++; \ @@ -122,7 +127,7 @@ typedef struct ThreadData { next2++; \ } -static void filter_intra(void *dst1, void *cur1, int w, int prefs, int mrefs, +static void __attribute__((optimize("tree-vectorize"))) filter_intra(void *restrict dst1, void *restrict cur1, int w, int prefs, int mrefs, int prefs3, int mrefs3, int parity, int clip_max) { uint8_t *dst = dst1; @@ -132,7 +137,101 @@ static void filter_intra(void *dst1, void *cur1, int w, int prefs, int mrefs, FILTER_INTRA() } -static void filter_line_c(void *dst1, void *prev1, void *cur1, void *next1, +#if OPT_NEW +void __attribute__((optimize("tree-vectorize"))) ff_bwdif_filter_line_c(void *restrict dst1, void *restrict prev1, void *restrict cur1, void *restrict next1, + int w, int prefs, int mrefs, int prefs2, int mrefs2, + int prefs3, int mrefs3, int prefs4, int mrefs4, + int parity, int clip_max) +{ + if (parity) { + uint8_t * restrict dst = dst1; + const uint8_t * prev = prev1; + const uint8_t * cur = cur1; + const uint8_t * next = next1; + const uint8_t * prev2 = prev; + const uint8_t * next2 = cur; + int interpol, x; + + FILTER1() + FILTER_LINE() + FILTER2() + } + else { + uint8_t * restrict dst = dst1; + const uint8_t * prev = prev1; + const uint8_t * cur = cur1; + const uint8_t * next = next1; + int interpol, x; +#define prev2 cur +#define next2 next + + for (x = 0; x < w; x++) { + int diff0; + int d0; + int temporal_diff0; + + int i1, i2; + int p4, p3, p2, p1, c0, m1, m2, m3, m4; + + m4 = prev2[mrefs4] + next2[mrefs4]; // 2 + p4 = prev2[prefs4] + next2[prefs4]; + i1 = coef_hf[2] * (m4 + p4); + m3 = cur[mrefs3]; // 1 + p3 = cur[prefs3]; + i1 += -coef_lf[1] * 4 * (m3 + p3); + m2 = prev2[mrefs2] + next2[mrefs2]; // 2 + p2 = prev2[prefs2] + next2[prefs2]; // 2 + i1 += -coef_hf[1] * (m2 + p2); + m1 = cur[mrefs]; // 1 + p1 = cur[prefs]; // 1 + c0 = prev2[0] + next2[0]; // 2 + i1 += coef_hf[0] * c0; // 4 + d0 = c0 >> 1; // 1 + temporal_diff0 = FFABS(prev2[0] - next2[0]); // 1 + i1 += coef_lf[0] * 4 * (m1 + p1); // - + { + int t1 =(FFABS(prev[mrefs] - m1) + FFABS(prev[prefs] - p1)) >> 1; + int t2 =(FFABS(next[mrefs] - m1) + FFABS(next[prefs] - p1)) >> 1; + diff0 = FFMAX3(temporal_diff0 >> 1, t1, t2); // 1 + } + { + int b = (m2 >> 1) - m1; // 1 + int f = (p2 >> 1) - p1; // 1 + int dc = d0 - m1; + int de = d0 - p1; + int sp_max = FFMAX(de, dc); + int sp_min = FFMIN(de, dc); + sp_max = FFMAX(sp_max, FFMIN(b,f)); + sp_min = FFMIN(sp_min, FFMAX(b,f)); + diff0 = FFMAX3(diff0, sp_min, -sp_max); + } + + i1 >>= 15; + + i2 = (coef_sp[0] * (m1 + p1) - coef_sp[1] * (m3 + p3)) >> 13; + + + interpol = FFABS(m1 - p1) > temporal_diff0 ? i1:i2; + + if (interpol > d0 + diff0) + interpol = d0 + diff0; + else if (interpol < d0 - diff0) + interpol = d0 - diff0; + + dst[0] = av_clip_uint8(interpol); + + dst++; + cur++; + prev++; + next++; +#undef prev2 +#undef next2 + } + } +} + +#else +void __attribute__((optimize("tree-vectorize"))) ff_bwdif_filter_line_c(void *restrict dst1, void *restrict prev1, void *restrict cur1, void *restrict next1, int w, int prefs, int mrefs, int prefs2, int mrefs2, int prefs3, int mrefs3, int prefs4, int mrefs4, int parity, int clip_max) @@ -149,8 +248,34 @@ static void filter_line_c(void *dst1, void *prev1, void *cur1, void *next1, FILTER_LINE() FILTER2() } +#endif + +#define NEXT_LINE()\ + dst += d_stride; \ + prev += prefs; \ + cur += prefs; \ + next += prefs; + +// ***** Temp +static void __attribute__((optimize("tree-vectorize"))) filter_line4_c(void *restrict dst1, int d_stride, + const void *restrict prev1, const void *restrict cur1, const void *restrict next1, int prefs, + int w, int parity, int clip_max) +{ + uint8_t * restrict dst = dst1; + const uint8_t * restrict prev = prev1; + const uint8_t * restrict cur = cur1; + const uint8_t * restrict next = next1; + + ff_bwdif_filter_line_c(dst, (void*)prev, (void*)cur, (void*)next, w, + prefs, -prefs, prefs * 2, - prefs * 2, prefs * 3, -prefs * 3, prefs * 4, -prefs * 4, parity, clip_max); + NEXT_LINE(); + memcpy(dst, cur, w); + NEXT_LINE(); + ff_bwdif_filter_line_c(dst, (void*)prev, (void*)cur, (void*)next, w, + prefs, -prefs, prefs * 2, - prefs * 2, prefs * 3, -prefs * 3, prefs * 4, -prefs * 4, parity, clip_max); +} -static void filter_edge(void *dst1, void *prev1, void *cur1, void *next1, +static void __attribute__((optimize("tree-vectorize"))) filter_edge(void *restrict dst1, void *restrict prev1, void *restrict cur1, void *restrict next1, int w, int prefs, int mrefs, int prefs2, int mrefs2, int parity, int clip_max, int spat) { @@ -167,7 +292,7 @@ static void filter_edge(void *dst1, void *prev1, void *cur1, void *next1, FILTER2() } -static void filter_intra_16bit(void *dst1, void *cur1, int w, int prefs, int mrefs, +static void __attribute__((optimize("tree-vectorize"))) filter_intra_16bit(void *restrict dst1, void *restrict cur1, int w, int prefs, int mrefs, int prefs3, int mrefs3, int parity, int clip_max) { uint16_t *dst = dst1; @@ -177,7 +302,7 @@ static void filter_intra_16bit(void *dst1, void *cur1, int w, int prefs, int mre FILTER_INTRA() } -static void filter_line_c_16bit(void *dst1, void *prev1, void *cur1, void *next1, +static void __attribute__((optimize("tree-vectorize"))) filter_line_c_16bit(void *restrict dst1, void *restrict prev1, void *restrict cur1, void *restrict next1, int w, int prefs, int mrefs, int prefs2, int mrefs2, int prefs3, int mrefs3, int prefs4, int mrefs4, int parity, int clip_max) @@ -195,7 +320,7 @@ static void filter_line_c_16bit(void *dst1, void *prev1, void *cur1, void *next1 FILTER2() } -static void filter_edge_16bit(void *dst1, void *prev1, void *cur1, void *next1, +static void __attribute__((optimize("tree-vectorize"))) filter_edge_16bit(void *restrict dst1, void *restrict prev1, void *restrict cur1, void *restrict next1, int w, int prefs, int mrefs, int prefs2, int mrefs2, int parity, int clip_max, int spat) { @@ -244,6 +369,10 @@ static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) refs << 1, -(refs << 1), td->parity ^ td->tff, clip_max, (y < 2) || ((y + 3) > td->h) ? 0 : 1); + } else if (s->filter_line4 && y + 2 < slice_end && ((y + 7) <= td->h)) { + s->filter_line4(dst, td->frame->linesize[td->plane], prev, cur, next, refs, td->w, + td->parity ^ td->tff, clip_max); + y += 2; } else { s->filter_line(dst, prev, cur, next, td->w, refs, -refs, refs << 1, -(refs << 1), @@ -258,6 +387,19 @@ static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) return 0; } +#if OPT_TEST +static unsigned int test_frames = 0; +static uint64_t cum_time = 0; +static uint64_t min_delta = 99999999; +static uint64_t max_delta = 0; +static uint64_t utime(void) +{ + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + return ts.tv_nsec / 1000 + (uint64_t)ts.tv_sec * 1000000; +} +#endif + static void filter(AVFilterContext *ctx, AVFrame *dstpic, int parity, int tff) { @@ -278,9 +420,23 @@ static void filter(AVFilterContext *ctx, AVFrame *dstpic, td.w = w; td.h = h; td.plane = i; - +#if OPT_TEST + { + const uint64_t now = utime(); + uint64_t delta; + filter_slice(ctx, &td, 0, 1); + delta = utime() - now; + ++test_frames; + cum_time += delta; + if (min_delta > delta) + min_delta = delta; + if (max_delta < delta) + max_delta = delta; + } +#else ff_filter_execute(ctx, filter_slice, &td, NULL, FFMIN(h, ff_filter_get_nb_threads(ctx))); +#endif } if (yadif->current_field == YADIF_FIELD_END) { yadif->current_field = YADIF_FIELD_NORMAL; @@ -297,6 +453,11 @@ static av_cold void uninit(AVFilterContext *ctx) av_frame_free(&yadif->prev); av_frame_free(&yadif->cur ); av_frame_free(&yadif->next); +#if OPT_TEST + av_log(ctx, AV_LOG_INFO, "Stats: Avg:%"PRIu64", Max:%"PRIu64", Min:%"PRIu64"\n", + test_frames == 0 ? (uint64_t)0 : cum_time / test_frames, + max_delta, min_delta); +#endif } static const enum AVPixelFormat pix_fmts[] = { @@ -340,19 +501,27 @@ static int config_props(AVFilterLink *link) yadif->csp = av_pix_fmt_desc_get(link->format); yadif->filter = filter; + s->filter_line4 = 0; if (yadif->csp->comp[0].depth > 8) { s->filter_intra = filter_intra_16bit; s->filter_line = filter_line_c_16bit; s->filter_edge = filter_edge_16bit; } else { s->filter_intra = filter_intra; - s->filter_line = filter_line_c; + s->filter_line = ff_bwdif_filter_line_c; s->filter_edge = filter_edge; + if (yadif->useasm == 0) + s->filter_line4 = filter_line4_c; } + if (yadif->useasm != 0) + { #if ARCH_X86 - ff_bwdif_init_x86(s); + ff_bwdif_init_x86(s); +#elif ARCH_AARCH64 + ff_bwdif_init_aarch64(ctx); #endif + } return 0; } @@ -377,6 +546,7 @@ static const AVOption bwdif_options[] = { CONST("all", "deinterlace all frames", YADIF_DEINT_ALL, "deint"), CONST("interlaced", "only deinterlace frames marked as interlaced", YADIF_DEINT_INTERLACED, "deint"), + {"useasm", "use asm functions (default true)", OFFSET(useasm), AV_OPT_TYPE_INT, {.i64=1}, 0, 3, FLAGS, NULL }, { NULL } }; diff --git a/libavfilter/vf_deinterlace_v4l2m2m.c b/libavfilter/vf_deinterlace_v4l2m2m.c new file mode 100644 index 0000000000..a173a291f8 --- /dev/null +++ b/libavfilter/vf_deinterlace_v4l2m2m.c @@ -0,0 +1,2102 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * deinterlace video filter - V4L2 M2M + */ + +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "config.h" + +#include "libavutil/avassert.h" +#include "libavutil/avstring.h" +#include "libavutil/common.h" +#include "libavutil/hwcontext.h" +#include "libavutil/hwcontext_drm.h" +#include "libavutil/internal.h" +#include "libavutil/mathematics.h" +#include "libavutil/opt.h" +#include "libavutil/pixdesc.h" +#include "libavutil/time.h" + +#define FF_INTERNAL_FIELDS 1 +#include "framequeue.h" +#include "filters.h" +#include "avfilter.h" +#include "formats.h" +#include "internal.h" +#include "scale_eval.h" +#include "video.h" + +#ifndef DRM_FORMAT_P030 +#define DRM_FORMAT_P030 fourcc_code('P', '0', '3', '0') /* 2x2 subsampled Cr:Cb plane 10 bits per channel packed */ +#endif + +// V4L2_PIX_FMT_NV12_10_COL128 and V4L2_PIX_FMT_NV12_COL128 should be defined +// in drm_fourcc.h hopefully will be sometime in the future but until then... +#ifndef V4L2_PIX_FMT_NV12_10_COL128 +#define V4L2_PIX_FMT_NV12_10_COL128 v4l2_fourcc('N', 'C', '3', '0') +#endif + +#ifndef V4L2_PIX_FMT_NV12_COL128 +#define V4L2_PIX_FMT_NV12_COL128 v4l2_fourcc('N', 'C', '1', '2') /* 12 Y/CbCr 4:2:0 128 pixel wide column */ +#endif + +typedef struct V4L2Queue V4L2Queue; +typedef struct DeintV4L2M2MContextShared DeintV4L2M2MContextShared; + +typedef enum filter_type_v4l2_e +{ + FILTER_V4L2_DEINTERLACE = 1, + FILTER_V4L2_SCALE, +} filter_type_v4l2_t; + +typedef struct V4L2Buffer { + int enqueued; + int reenqueue; + struct v4l2_buffer buffer; + AVFrame frame; + struct v4l2_plane planes[VIDEO_MAX_PLANES]; + int num_planes; + AVDRMFrameDescriptor drm_frame; + V4L2Queue *q; +} V4L2Buffer; + +typedef struct V4L2Queue { + struct v4l2_format format; + struct v4l2_selection sel; + int eos; + int num_buffers; + V4L2Buffer *buffers; + const char * name; + DeintV4L2M2MContextShared *ctx; +} V4L2Queue; + +typedef struct pts_stats_s +{ + void * logctx; + const char * name; // For debug + unsigned int last_count; + unsigned int last_interval; + int64_t last_pts; +} pts_stats_t; + +#define PTS_TRACK_SIZE 32 +typedef struct pts_track_el_s +{ + uint32_t n; + unsigned int interval; + AVFrame * props; +} pts_track_el_t; + +typedef struct pts_track_s +{ + uint32_t n; + uint32_t last_n; + int got_2; + void * logctx; + pts_stats_t stats; + pts_track_el_t a[PTS_TRACK_SIZE]; +} pts_track_t; + +typedef enum drain_state_e +{ + DRAIN_NONE = 0, // Not draining + DRAIN_TIMEOUT, // Drain until normal timeout setup yields no frame + DRAIN_LAST, // Drain with long timeout last_frame in received on output expected + DRAIN_EOS, // Drain with long timeout EOS expected + DRAIN_DONE // Drained +} drain_state_t; + +typedef struct DeintV4L2M2MContextShared { + void * logctx; // For logging - will be NULL when done + filter_type_v4l2_t filter_type; + + int fd; + int done; // fd closed - awating all refs dropped + int width; + int height; + + int drain; // EOS received (inlink status) + drain_state_t drain_state; + int64_t drain_pts; // PTS associated with inline status + + unsigned int frames_rx; + unsigned int frames_tx; + + // from options + int output_width; + int output_height; + enum AVPixelFormat output_format; + + int has_enc_stop; + // We expect to get exactly the same number of frames out as we put in + // We can drain by matching input to output + int one_to_one; + + int orig_width; + int orig_height; + atomic_uint refcount; + + AVBufferRef *hw_frames_ctx; + + unsigned int field_order; + + pts_track_t track; + + V4L2Queue output; + V4L2Queue capture; +} DeintV4L2M2MContextShared; + +typedef struct DeintV4L2M2MContext { + const AVClass *class; + + DeintV4L2M2MContextShared *shared; + + char * w_expr; + char * h_expr; + char * output_format_string;; + + int force_original_aspect_ratio; + int force_divisible_by; + + char *colour_primaries_string; + char *colour_transfer_string; + char *colour_matrix_string; + int colour_range; + char *chroma_location_string; + + enum AVColorPrimaries colour_primaries; + enum AVColorTransferCharacteristic colour_transfer; + enum AVColorSpace colour_matrix; + enum AVChromaLocation chroma_location; +} DeintV4L2M2MContext; + + +static inline int drain_frame_expected(const drain_state_t d) +{ + return d == DRAIN_EOS || d == DRAIN_LAST; +} + +// These just list the ones we know we can cope with +static uint32_t +fmt_av_to_v4l2(const enum AVPixelFormat avfmt) +{ + switch (avfmt) { + case AV_PIX_FMT_YUV420P: + return V4L2_PIX_FMT_YUV420; + case AV_PIX_FMT_NV12: + return V4L2_PIX_FMT_NV12; +#if CONFIG_SAND + case AV_PIX_FMT_RPI4_8: + case AV_PIX_FMT_SAND128: + return V4L2_PIX_FMT_NV12_COL128; +#endif + default: + break; + } + return 0; +} + +static enum AVPixelFormat +fmt_v4l2_to_av(const uint32_t pixfmt) +{ + switch (pixfmt) { + case V4L2_PIX_FMT_YUV420: + return AV_PIX_FMT_YUV420P; + case V4L2_PIX_FMT_NV12: + return AV_PIX_FMT_NV12; +#if CONFIG_SAND + case V4L2_PIX_FMT_NV12_COL128: + return AV_PIX_FMT_RPI4_8; +#endif + default: + break; + } + return AV_PIX_FMT_NONE; +} + +static unsigned int pts_stats_interval(const pts_stats_t * const stats) +{ + return stats->last_interval; +} + +// Pick 64 for max last count - that is >1sec at 60fps +#define STATS_LAST_COUNT_MAX 64 +#define STATS_INTERVAL_MAX (1 << 30) +static void pts_stats_add(pts_stats_t * const stats, int64_t pts) +{ + if (pts == AV_NOPTS_VALUE || pts == stats->last_pts) { + if (stats->last_count < STATS_LAST_COUNT_MAX) + ++stats->last_count; + return; + } + + if (stats->last_pts != AV_NOPTS_VALUE) { + const int64_t interval = pts - stats->last_pts; + + if (interval < 0 || interval >= STATS_INTERVAL_MAX || + stats->last_count >= STATS_LAST_COUNT_MAX) { + if (stats->last_interval != 0) + av_log(stats->logctx, AV_LOG_DEBUG, "%s: %s: Bad interval: %" PRId64 "/%d\n", + __func__, stats->name, interval, stats->last_count); + stats->last_interval = 0; + } + else { + const int64_t frame_time = interval / (int64_t)stats->last_count; + + if (frame_time != stats->last_interval) + av_log(stats->logctx, AV_LOG_DEBUG, "%s: %s: New interval: %u->%" PRId64 "/%d=%" PRId64 "\n", + __func__, stats->name, stats->last_interval, interval, stats->last_count, frame_time); + stats->last_interval = frame_time; + } + } + + stats->last_pts = pts; + stats->last_count = 1; +} + +static void pts_stats_init(pts_stats_t * const stats, void * logctx, const char * name) +{ + *stats = (pts_stats_t){ + .logctx = logctx, + .name = name, + .last_count = 1, + .last_interval = 0, + .last_pts = AV_NOPTS_VALUE + }; +} + +static inline uint32_t pts_track_next_n(pts_track_t * const trk) +{ + if (++trk->n == 0) + trk->n = 1; + return trk->n; +} + +static int pts_track_get_frame(pts_track_t * const trk, const struct timeval tv, AVFrame * const dst) +{ + uint32_t n = (uint32_t)(tv.tv_usec / 2 + tv.tv_sec * 500000); + pts_track_el_t * t; + + // As a first guess assume that n==0 means last frame + if (n == 0) { + n = trk->last_n; + if (n == 0) + goto fail; + } + + t = trk->a + (n & (PTS_TRACK_SIZE - 1)); + + if (t->n != n) { + av_log(trk->logctx, AV_LOG_ERROR, "%s: track failure: got %u, expected %u\n", __func__, n, trk->n); + goto fail; + } + + // 1st frame is simple - just believe it + if (n != trk->last_n) { + trk->last_n = n; + trk->got_2 = 0; + return av_frame_copy_props(dst, t->props); + } + + // Only believe in a single interpolated frame + if (trk->got_2) + goto fail; + trk->got_2 = 1; + + av_frame_copy_props(dst, t->props); + + + // If we can't guess - don't + if (t->interval == 0) { + dst->best_effort_timestamp = AV_NOPTS_VALUE; + dst->pts = AV_NOPTS_VALUE; + dst->pkt_dts = AV_NOPTS_VALUE; + } + else { + if (dst->best_effort_timestamp != AV_NOPTS_VALUE) + dst->best_effort_timestamp += t->interval / 2; + if (dst->pts != AV_NOPTS_VALUE) + dst->pts += t->interval / 2; + if (dst->pkt_dts != AV_NOPTS_VALUE) + dst->pkt_dts += t->interval / 2; + } + + return 0; + +fail: + trk->last_n = 0; + trk->got_2 = 0; + dst->pts = AV_NOPTS_VALUE; + dst->pkt_dts = AV_NOPTS_VALUE; + return 0; +} + +// We are only ever expecting in-order frames so nothing more clever is required +static unsigned int +pts_track_count(const pts_track_t * const trk) +{ + return (trk->n - trk->last_n) & (PTS_TRACK_SIZE - 1); +} + +static struct timeval pts_track_add_frame(pts_track_t * const trk, const AVFrame * const src) +{ + const uint32_t n = pts_track_next_n(trk); + pts_track_el_t * const t = trk->a + (n & (PTS_TRACK_SIZE - 1)); + + pts_stats_add(&trk->stats, src->pts); + + t->n = n; + t->interval = pts_stats_interval(&trk->stats); // guess that next interval is the same as the last + av_frame_unref(t->props); + av_frame_copy_props(t->props, src); + + // We now know what the previous interval was, rather than having to guess, + // so set it. There is a better than decent chance that this is before + // we use it. + if (t->interval != 0) { + pts_track_el_t * const prev_t = trk->a + ((n - 1) & (PTS_TRACK_SIZE - 1)); + prev_t->interval = t->interval; + } + + // In case deinterlace interpolates frames use every other usec + return (struct timeval){.tv_sec = n / 500000, .tv_usec = (n % 500000) * 2}; +} + +static void pts_track_uninit(pts_track_t * const trk) +{ + unsigned int i; + for (i = 0; i != PTS_TRACK_SIZE; ++i) { + trk->a[i].n = 0; + av_frame_free(&trk->a[i].props); + } +} + +static int pts_track_init(pts_track_t * const trk, void *logctx) +{ + unsigned int i; + trk->n = 1; + pts_stats_init(&trk->stats, logctx, "track"); + for (i = 0; i != PTS_TRACK_SIZE; ++i) { + trk->a[i].n = 0; + if ((trk->a[i].props = av_frame_alloc()) == NULL) { + pts_track_uninit(trk); + return AVERROR(ENOMEM); + } + } + return 0; +} + +static inline uint32_t +fmt_bpl(const struct v4l2_format * const fmt, const unsigned int plane_n) +{ + return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.plane_fmt[plane_n].bytesperline : fmt->fmt.pix.bytesperline; +} + +static inline uint32_t +fmt_height(const struct v4l2_format * const fmt) +{ + return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.height : fmt->fmt.pix.height; +} + +static inline uint32_t +fmt_width(const struct v4l2_format * const fmt) +{ + return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.width : fmt->fmt.pix.width; +} + +static inline uint32_t +fmt_pixelformat(const struct v4l2_format * const fmt) +{ + return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.pixelformat : fmt->fmt.pix.pixelformat; +} + +static inline uint32_t +buf_bytesused0(const struct v4l2_buffer * const buf) +{ + return V4L2_TYPE_IS_MULTIPLANAR(buf->type) ? buf->m.planes[0].bytesused : buf->bytesused; +} + +static void +init_format(V4L2Queue * const q, const uint32_t format_type) +{ + memset(&q->format, 0, sizeof(q->format)); + memset(&q->sel, 0, sizeof(q->sel)); + q->format.type = format_type; + q->sel.type = format_type; +} + +static int deint_v4l2m2m_prepare_context(DeintV4L2M2MContextShared *ctx) +{ + struct v4l2_capability cap; + int ret; + + memset(&cap, 0, sizeof(cap)); + ret = ioctl(ctx->fd, VIDIOC_QUERYCAP, &cap); + if (ret < 0) + return ret; + + if (ctx->filter_type == FILTER_V4L2_SCALE && + strcmp("bcm2835-codec-isp", cap.card) != 0) + { + av_log(ctx->logctx, AV_LOG_DEBUG, "Not ISP\n"); + return AVERROR(EINVAL); + } + + if (!(cap.capabilities & V4L2_CAP_STREAMING)) { + av_log(ctx->logctx, AV_LOG_DEBUG, "No streaming\n"); + return AVERROR(EINVAL); + } + + if (cap.capabilities & V4L2_CAP_VIDEO_M2M_MPLANE) { + init_format(&ctx->capture, V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE); + init_format(&ctx->output, V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE); + } + else if (cap.capabilities & V4L2_CAP_VIDEO_M2M) { + init_format(&ctx->capture, V4L2_BUF_TYPE_VIDEO_CAPTURE); + init_format(&ctx->output, V4L2_BUF_TYPE_VIDEO_OUTPUT); + } + else { + av_log(ctx->logctx, AV_LOG_DEBUG, "Not M2M\n"); + return AVERROR(EINVAL); + } + + return 0; +} + +// Just use for probe - doesn't modify q format +static int deint_v4l2m2m_try_format(V4L2Queue *queue, const uint32_t width, const uint32_t height, const enum AVPixelFormat avfmt) +{ + struct v4l2_format fmt = {.type = queue->format.type}; + DeintV4L2M2MContextShared *ctx = queue->ctx; + int ret, field; + // Pick YUV to test with if not otherwise specified + uint32_t pixelformat = avfmt == AV_PIX_FMT_NONE ? V4L2_PIX_FMT_YUV420 : fmt_av_to_v4l2(avfmt); + enum AVPixelFormat r_avfmt; + + + ret = ioctl(ctx->fd, VIDIOC_G_FMT, &fmt); + if (ret) + av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_G_FMT failed: %d\n", ret); + + if (ctx->filter_type == FILTER_V4L2_DEINTERLACE && V4L2_TYPE_IS_OUTPUT(fmt.type)) + field = V4L2_FIELD_INTERLACED_TB; + else + field = V4L2_FIELD_NONE; + + if (V4L2_TYPE_IS_MULTIPLANAR(fmt.type)) { + fmt.fmt.pix_mp.pixelformat = pixelformat; + fmt.fmt.pix_mp.field = field; + fmt.fmt.pix_mp.width = width; + fmt.fmt.pix_mp.height = height; + } else { + fmt.fmt.pix.pixelformat = pixelformat; + fmt.fmt.pix.field = field; + fmt.fmt.pix.width = width; + fmt.fmt.pix.height = height; + } + + av_log(ctx->logctx, AV_LOG_TRACE, "%s: Trying format for type %d, wxh: %dx%d, fmt: %08x, size %u bpl %u pre\n", __func__, + fmt.type, fmt.fmt.pix_mp.width, fmt.fmt.pix_mp.height, + fmt.fmt.pix_mp.pixelformat, + fmt.fmt.pix_mp.plane_fmt[0].sizeimage, fmt.fmt.pix_mp.plane_fmt[0].bytesperline); + + ret = ioctl(ctx->fd, VIDIOC_TRY_FMT, &fmt); + if (ret) + return AVERROR(EINVAL); + + av_log(ctx->logctx, AV_LOG_TRACE, "%s: Trying format for type %d, wxh: %dx%d, fmt: %08x, size %u bpl %u post\n", __func__, + fmt.type, fmt.fmt.pix_mp.width, fmt.fmt.pix_mp.height, + fmt.fmt.pix_mp.pixelformat, + fmt.fmt.pix_mp.plane_fmt[0].sizeimage, fmt.fmt.pix_mp.plane_fmt[0].bytesperline); + + r_avfmt = fmt_v4l2_to_av(fmt_pixelformat(&fmt)); + if (r_avfmt != avfmt && avfmt != AV_PIX_FMT_NONE) { + av_log(ctx->logctx, AV_LOG_DEBUG, "Unable to set format %s on %s port\n", av_get_pix_fmt_name(avfmt), V4L2_TYPE_IS_CAPTURE(fmt.type) ? "dest" : "src"); + return AVERROR(EINVAL); + } + if (r_avfmt == AV_PIX_FMT_NONE) { + av_log(ctx->logctx, AV_LOG_DEBUG, "No supported format on %s port\n", V4L2_TYPE_IS_CAPTURE(fmt.type) ? "dest" : "src"); + return AVERROR(EINVAL); + } + + if (V4L2_TYPE_IS_MULTIPLANAR(fmt.type)) { + if (fmt.fmt.pix_mp.field != field) { + av_log(ctx->logctx, AV_LOG_DEBUG, "format not supported for type %d\n", fmt.type); + + return AVERROR(EINVAL); + } + } else { + if (fmt.fmt.pix.field != field) { + av_log(ctx->logctx, AV_LOG_DEBUG, "format not supported for type %d\n", fmt.type); + + return AVERROR(EINVAL); + } + } + + return 0; +} + +static int +do_s_fmt(V4L2Queue * const q) +{ + DeintV4L2M2MContextShared * const ctx = q->ctx; + const uint32_t pixelformat = fmt_pixelformat(&q->format); + int ret; + + ret = ioctl(ctx->fd, VIDIOC_S_FMT, &q->format); + if (ret) { + ret = AVERROR(errno); + av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_S_FMT failed: %s\n", av_err2str(ret)); + return ret; + } + + if (pixelformat != fmt_pixelformat(&q->format)) { + av_log(ctx->logctx, AV_LOG_ERROR, "Format not supported: %s; S_FMT returned %s\n", av_fourcc2str(pixelformat), av_fourcc2str(fmt_pixelformat(&q->format))); + return AVERROR(EINVAL); + } + + q->sel.target = V4L2_TYPE_IS_OUTPUT(q->sel.type) ? V4L2_SEL_TGT_CROP : V4L2_SEL_TGT_COMPOSE, + q->sel.flags = V4L2_TYPE_IS_OUTPUT(q->sel.type) ? V4L2_SEL_FLAG_LE : V4L2_SEL_FLAG_GE; + + ret = ioctl(ctx->fd, VIDIOC_S_SELECTION, &q->sel); + if (ret) { + ret = AVERROR(errno); + av_log(ctx->logctx, AV_LOG_WARNING, "VIDIOC_S_SELECTION failed: %s\n", av_err2str(ret)); + } + + return 0; +} + +static void +set_fmt_color(struct v4l2_format *const fmt, + const enum AVColorPrimaries avcp, + const enum AVColorSpace avcs, + const enum AVColorTransferCharacteristic avxc) +{ + enum v4l2_ycbcr_encoding ycbcr = V4L2_YCBCR_ENC_DEFAULT; + enum v4l2_colorspace cs = V4L2_COLORSPACE_DEFAULT; + enum v4l2_xfer_func xfer = V4L2_XFER_FUNC_DEFAULT; + + switch (avcp) { + case AVCOL_PRI_BT709: + cs = V4L2_COLORSPACE_REC709; + ycbcr = V4L2_YCBCR_ENC_709; + break; + case AVCOL_PRI_BT470M: + cs = V4L2_COLORSPACE_470_SYSTEM_M; + ycbcr = V4L2_YCBCR_ENC_601; + break; + case AVCOL_PRI_BT470BG: + cs = V4L2_COLORSPACE_470_SYSTEM_BG; + break; + case AVCOL_PRI_SMPTE170M: + cs = V4L2_COLORSPACE_SMPTE170M; + break; + case AVCOL_PRI_SMPTE240M: + cs = V4L2_COLORSPACE_SMPTE240M; + break; + case AVCOL_PRI_BT2020: + cs = V4L2_COLORSPACE_BT2020; + break; + case AVCOL_PRI_SMPTE428: + case AVCOL_PRI_SMPTE431: + case AVCOL_PRI_SMPTE432: + case AVCOL_PRI_EBU3213: + case AVCOL_PRI_RESERVED: + case AVCOL_PRI_FILM: + case AVCOL_PRI_UNSPECIFIED: + default: + break; + } + + switch (avcs) { + case AVCOL_SPC_RGB: + cs = V4L2_COLORSPACE_SRGB; + break; + case AVCOL_SPC_BT709: + cs = V4L2_COLORSPACE_REC709; + break; + case AVCOL_SPC_FCC: + cs = V4L2_COLORSPACE_470_SYSTEM_M; + break; + case AVCOL_SPC_BT470BG: + cs = V4L2_COLORSPACE_470_SYSTEM_BG; + break; + case AVCOL_SPC_SMPTE170M: + cs = V4L2_COLORSPACE_SMPTE170M; + break; + case AVCOL_SPC_SMPTE240M: + cs = V4L2_COLORSPACE_SMPTE240M; + break; + case AVCOL_SPC_BT2020_CL: + cs = V4L2_COLORSPACE_BT2020; + ycbcr = V4L2_YCBCR_ENC_BT2020_CONST_LUM; + break; + case AVCOL_SPC_BT2020_NCL: + cs = V4L2_COLORSPACE_BT2020; + break; + default: + break; + } + + switch (xfer) { + case AVCOL_TRC_BT709: + xfer = V4L2_XFER_FUNC_709; + break; + case AVCOL_TRC_IEC61966_2_1: + xfer = V4L2_XFER_FUNC_SRGB; + break; + case AVCOL_TRC_SMPTE240M: + xfer = V4L2_XFER_FUNC_SMPTE240M; + break; + case AVCOL_TRC_SMPTE2084: + xfer = V4L2_XFER_FUNC_SMPTE2084; + break; + default: + break; + } + + if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) { + fmt->fmt.pix_mp.colorspace = cs; + fmt->fmt.pix_mp.ycbcr_enc = ycbcr; + fmt->fmt.pix_mp.xfer_func = xfer; + } else { + fmt->fmt.pix.colorspace = cs; + fmt->fmt.pix.ycbcr_enc = ycbcr; + fmt->fmt.pix.xfer_func = xfer; + } +} + +static void +set_fmt_color_range(struct v4l2_format *const fmt, const enum AVColorRange avcr) +{ + const enum v4l2_quantization q = + avcr == AVCOL_RANGE_MPEG ? V4L2_QUANTIZATION_LIM_RANGE : + avcr == AVCOL_RANGE_JPEG ? V4L2_QUANTIZATION_FULL_RANGE : + V4L2_QUANTIZATION_DEFAULT; + + if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) { + fmt->fmt.pix_mp.quantization = q; + } else { + fmt->fmt.pix.quantization = q; + } +} + +static enum AVColorPrimaries get_color_primaries(const struct v4l2_format *const fmt) +{ + enum v4l2_ycbcr_encoding ycbcr; + enum v4l2_colorspace cs; + + cs = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? + fmt->fmt.pix_mp.colorspace : + fmt->fmt.pix.colorspace; + + ycbcr = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? + fmt->fmt.pix_mp.ycbcr_enc: + fmt->fmt.pix.ycbcr_enc; + + switch(ycbcr) { + case V4L2_YCBCR_ENC_XV709: + case V4L2_YCBCR_ENC_709: return AVCOL_PRI_BT709; + case V4L2_YCBCR_ENC_XV601: + case V4L2_YCBCR_ENC_601:return AVCOL_PRI_BT470M; + default: + break; + } + + switch(cs) { + case V4L2_COLORSPACE_470_SYSTEM_BG: return AVCOL_PRI_BT470BG; + case V4L2_COLORSPACE_SMPTE170M: return AVCOL_PRI_SMPTE170M; + case V4L2_COLORSPACE_SMPTE240M: return AVCOL_PRI_SMPTE240M; + case V4L2_COLORSPACE_BT2020: return AVCOL_PRI_BT2020; + default: + break; + } + + return AVCOL_PRI_UNSPECIFIED; +} + +static enum AVColorSpace get_color_space(const struct v4l2_format *const fmt) +{ + enum v4l2_ycbcr_encoding ycbcr; + enum v4l2_colorspace cs; + + cs = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? + fmt->fmt.pix_mp.colorspace : + fmt->fmt.pix.colorspace; + + ycbcr = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? + fmt->fmt.pix_mp.ycbcr_enc: + fmt->fmt.pix.ycbcr_enc; + + switch(cs) { + case V4L2_COLORSPACE_SRGB: return AVCOL_SPC_RGB; + case V4L2_COLORSPACE_REC709: return AVCOL_SPC_BT709; + case V4L2_COLORSPACE_470_SYSTEM_M: return AVCOL_SPC_FCC; + case V4L2_COLORSPACE_470_SYSTEM_BG: return AVCOL_SPC_BT470BG; + case V4L2_COLORSPACE_SMPTE170M: return AVCOL_SPC_SMPTE170M; + case V4L2_COLORSPACE_SMPTE240M: return AVCOL_SPC_SMPTE240M; + case V4L2_COLORSPACE_BT2020: + if (ycbcr == V4L2_YCBCR_ENC_BT2020_CONST_LUM) + return AVCOL_SPC_BT2020_CL; + else + return AVCOL_SPC_BT2020_NCL; + default: + break; + } + + return AVCOL_SPC_UNSPECIFIED; +} + +static enum AVColorTransferCharacteristic get_color_trc(const struct v4l2_format *const fmt) +{ + enum v4l2_ycbcr_encoding ycbcr; + enum v4l2_xfer_func xfer; + enum v4l2_colorspace cs; + + cs = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? + fmt->fmt.pix_mp.colorspace : + fmt->fmt.pix.colorspace; + + ycbcr = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? + fmt->fmt.pix_mp.ycbcr_enc: + fmt->fmt.pix.ycbcr_enc; + + xfer = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? + fmt->fmt.pix_mp.xfer_func: + fmt->fmt.pix.xfer_func; + + switch (xfer) { + case V4L2_XFER_FUNC_709: return AVCOL_TRC_BT709; + case V4L2_XFER_FUNC_SRGB: return AVCOL_TRC_IEC61966_2_1; + default: + break; + } + + switch (cs) { + case V4L2_COLORSPACE_470_SYSTEM_M: return AVCOL_TRC_GAMMA22; + case V4L2_COLORSPACE_470_SYSTEM_BG: return AVCOL_TRC_GAMMA28; + case V4L2_COLORSPACE_SMPTE170M: return AVCOL_TRC_SMPTE170M; + case V4L2_COLORSPACE_SMPTE240M: return AVCOL_TRC_SMPTE240M; + default: + break; + } + + switch (ycbcr) { + case V4L2_YCBCR_ENC_XV709: + case V4L2_YCBCR_ENC_XV601: return AVCOL_TRC_BT1361_ECG; + default: + break; + } + + return AVCOL_TRC_UNSPECIFIED; +} + +static enum AVColorRange get_color_range(const struct v4l2_format *const fmt) +{ + enum v4l2_quantization qt; + + qt = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? + fmt->fmt.pix_mp.quantization : + fmt->fmt.pix.quantization; + + switch (qt) { + case V4L2_QUANTIZATION_LIM_RANGE: return AVCOL_RANGE_MPEG; + case V4L2_QUANTIZATION_FULL_RANGE: return AVCOL_RANGE_JPEG; + default: + break; + } + + return AVCOL_RANGE_UNSPECIFIED; +} + +static int set_src_fmt(V4L2Queue * const q, const AVFrame * const frame) +{ + struct v4l2_format *const format = &q->format; + const AVDRMFrameDescriptor *const src = (const AVDRMFrameDescriptor *)frame->data[0]; + + const uint32_t drm_fmt = src->layers[0].format; + // Treat INVALID as LINEAR + const uint64_t mod = src->objects[0].format_modifier == DRM_FORMAT_MOD_INVALID ? + DRM_FORMAT_MOD_LINEAR : src->objects[0].format_modifier; + uint32_t pix_fmt = 0; + uint32_t w = 0; + uint32_t h = 0; + uint32_t bpl = src->layers[0].planes[0].pitch; + + // We really don't expect multiple layers + // All formats that we currently cope with are single object + + if (src->nb_layers != 1 || src->nb_objects != 1) + return AVERROR(EINVAL); + + switch (drm_fmt) { + case DRM_FORMAT_YUV420: + if (mod == DRM_FORMAT_MOD_LINEAR) { + if (src->layers[0].nb_planes != 3) + break; + pix_fmt = V4L2_PIX_FMT_YUV420; + h = src->layers[0].planes[1].offset / bpl; + w = bpl; + } + break; + + case DRM_FORMAT_NV12: + if (mod == DRM_FORMAT_MOD_LINEAR) { + if (src->layers[0].nb_planes != 2) + break; + pix_fmt = V4L2_PIX_FMT_NV12; + h = src->layers[0].planes[1].offset / bpl; + w = bpl; + } +#if CONFIG_SAND + else if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) { + if (src->layers[0].nb_planes != 2) + break; + pix_fmt = V4L2_PIX_FMT_NV12_COL128; + w = bpl; + h = src->layers[0].planes[1].offset / 128; + bpl = fourcc_mod_broadcom_param(mod); + } +#endif + break; + + case DRM_FORMAT_P030: +#if CONFIG_SAND + if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) { + if (src->layers[0].nb_planes != 2) + break; + pix_fmt = V4L2_PIX_FMT_NV12_10_COL128; + w = bpl / 2; // Matching lie to how we construct this + h = src->layers[0].planes[1].offset / 128; + bpl = fourcc_mod_broadcom_param(mod); + } +#endif + break; + + default: + break; + } + + if (!pix_fmt) + return AVERROR(EINVAL); + + if (V4L2_TYPE_IS_MULTIPLANAR(format->type)) { + struct v4l2_pix_format_mplane *const pix = &format->fmt.pix_mp; + + pix->width = w; + pix->height = h; + pix->pixelformat = pix_fmt; + pix->plane_fmt[0].bytesperline = bpl; + pix->num_planes = 1; + } + else { + struct v4l2_pix_format *const pix = &format->fmt.pix; + + pix->width = w; + pix->height = h; + pix->pixelformat = pix_fmt; + pix->bytesperline = bpl; + } + + set_fmt_color(format, frame->color_primaries, frame->colorspace, frame->color_trc); + set_fmt_color_range(format, frame->color_range); + + q->sel.r.width = frame->width - (frame->crop_left + frame->crop_right); + q->sel.r.height = frame->height - (frame->crop_top + frame->crop_bottom); + q->sel.r.left = frame->crop_left; + q->sel.r.top = frame->crop_top; + + return 0; +} + + +static int set_dst_format(DeintV4L2M2MContext * const priv, V4L2Queue *queue, uint32_t pixelformat, uint32_t field, int width, int height) +{ + struct v4l2_format * const fmt = &queue->format; + struct v4l2_selection *const sel = &queue->sel; + + memset(&fmt->fmt, 0, sizeof(fmt->fmt)); + + // Align w/h to 16 here in case there are alignment requirements at the next + // stage of the filter chain (also RPi deinterlace setup is bust and this + // fixes it) + if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) { + fmt->fmt.pix_mp.pixelformat = pixelformat; + fmt->fmt.pix_mp.field = field; + fmt->fmt.pix_mp.width = FFALIGN(width, 16); + fmt->fmt.pix_mp.height = FFALIGN(height, 16); + } else { + fmt->fmt.pix.pixelformat = pixelformat; + fmt->fmt.pix.field = field; + fmt->fmt.pix.width = FFALIGN(width, 16); + fmt->fmt.pix.height = FFALIGN(height, 16); + } + + set_fmt_color(fmt, priv->colour_primaries, priv->colour_matrix, priv->colour_transfer); + set_fmt_color_range(fmt, priv->colour_range); + + sel->r.width = width; + sel->r.height = height; + sel->r.left = 0; + sel->r.top = 0; + + return do_s_fmt(queue); +} + +static int deint_v4l2m2m_probe_device(DeintV4L2M2MContextShared *ctx, char *node) +{ + int ret; + + ctx->fd = open(node, O_RDWR | O_NONBLOCK, 0); + if (ctx->fd < 0) + return AVERROR(errno); + + ret = deint_v4l2m2m_prepare_context(ctx); + if (ret) { + av_log(ctx->logctx, AV_LOG_DEBUG, "Failed to prepare context\n"); + goto fail; + } + + ret = deint_v4l2m2m_try_format(&ctx->capture, ctx->output_width, ctx->output_height, ctx->output_format); + if (ret) { + av_log(ctx->logctx, AV_LOG_DEBUG, "Failed to try dst format\n"); + goto fail; + } + + ret = deint_v4l2m2m_try_format(&ctx->output, ctx->width, ctx->height, AV_PIX_FMT_NONE); + if (ret) { + av_log(ctx->logctx, AV_LOG_DEBUG, "Failed to try src format\n"); + goto fail; + } + + return 0; + +fail: + close(ctx->fd); + ctx->fd = -1; + + return ret; +} + +static int deint_v4l2m2m_find_device(DeintV4L2M2MContextShared *ctx) +{ + int ret = AVERROR(EINVAL); + struct dirent *entry; + char node[PATH_MAX]; + DIR *dirp; + + dirp = opendir("/dev"); + if (!dirp) + return AVERROR(errno); + + for (entry = readdir(dirp); entry; entry = readdir(dirp)) { + + if (strncmp(entry->d_name, "video", 5)) + continue; + + snprintf(node, sizeof(node), "/dev/%s", entry->d_name); + av_log(ctx->logctx, AV_LOG_DEBUG, "probing device %s\n", node); + ret = deint_v4l2m2m_probe_device(ctx, node); + if (!ret) + break; + } + + closedir(dirp); + + if (ret) { + av_log(ctx->logctx, AV_LOG_ERROR, "Could not find a valid device\n"); + ctx->fd = -1; + + return ret; + } + + av_log(ctx->logctx, AV_LOG_INFO, "Using device %s\n", node); + + return 0; +} + +static int deint_v4l2m2m_enqueue_buffer(V4L2Buffer *buf) +{ + int ret; + + ret = ioctl(buf->q->ctx->fd, VIDIOC_QBUF, &buf->buffer); + if (ret < 0) + return AVERROR(errno); + + buf->enqueued = 1; + + return 0; +} + +static void +drm_frame_init(AVDRMFrameDescriptor * const d) +{ + unsigned int i; + for (i = 0; i != AV_DRM_MAX_PLANES; ++i) { + d->objects[i].fd = -1; + } +} + +static void +drm_frame_uninit(AVDRMFrameDescriptor * const d) +{ + unsigned int i; + for (i = 0; i != d->nb_objects; ++i) { + if (d->objects[i].fd != -1) { + close(d->objects[i].fd); + d->objects[i].fd = -1; + } + } +} + +static void +avbufs_delete(V4L2Buffer** ppavbufs, const unsigned int n) +{ + unsigned int i; + V4L2Buffer* const avbufs = *ppavbufs; + + if (avbufs == NULL) + return; + *ppavbufs = NULL; + + for (i = 0; i != n; ++i) { + V4L2Buffer* const avbuf = avbufs + i; + drm_frame_uninit(&avbuf->drm_frame); + } + + av_free(avbufs); +} + +static int v4l2_buffer_export_drm(V4L2Queue * const q, V4L2Buffer * const avbuf) +{ + struct v4l2_exportbuffer expbuf; + int i, ret; + uint64_t mod = DRM_FORMAT_MOD_LINEAR; + + AVDRMFrameDescriptor * const drm_desc = &avbuf->drm_frame; + AVDRMLayerDescriptor * const layer = &drm_desc->layers[0]; + const struct v4l2_format *const fmt = &q->format; + const uint32_t height = fmt_height(fmt); + ptrdiff_t bpl0; + + /* fill the DRM frame descriptor */ + drm_desc->nb_layers = 1; + layer->nb_planes = avbuf->num_planes; + + for (int i = 0; i < avbuf->num_planes; i++) { + layer->planes[i].object_index = i; + layer->planes[i].offset = 0; + layer->planes[i].pitch = fmt_bpl(fmt, i); + } + bpl0 = layer->planes[0].pitch; + + switch (fmt_pixelformat(fmt)) { +#if CONFIG_SAND + case V4L2_PIX_FMT_NV12_COL128: + mod = DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT(bpl0); + layer->format = V4L2_PIX_FMT_NV12; + + if (avbuf->num_planes > 1) + break; + + layer->nb_planes = 2; + layer->planes[1].object_index = 0; + layer->planes[1].offset = height * 128; + layer->planes[0].pitch = fmt_width(fmt); + layer->planes[1].pitch = layer->planes[0].pitch; + break; +#endif + + case DRM_FORMAT_NV12: + layer->format = V4L2_PIX_FMT_NV12; + + if (avbuf->num_planes > 1) + break; + + layer->nb_planes = 2; + layer->planes[1].object_index = 0; + layer->planes[1].offset = bpl0 * height; + layer->planes[1].pitch = bpl0; + break; + + case V4L2_PIX_FMT_YUV420: + layer->format = DRM_FORMAT_YUV420; + + if (avbuf->num_planes > 1) + break; + + layer->nb_planes = 3; + layer->planes[1].object_index = 0; + layer->planes[1].offset = bpl0 * height; + layer->planes[1].pitch = bpl0 / 2; + layer->planes[2].object_index = 0; + layer->planes[2].offset = layer->planes[1].offset + ((bpl0 * height) / 4); + layer->planes[2].pitch = bpl0 / 2; + break; + + default: + drm_desc->nb_layers = 0; + return AVERROR(EINVAL); + } + + drm_desc->nb_objects = 0; + for (i = 0; i < avbuf->num_planes; i++) { + memset(&expbuf, 0, sizeof(expbuf)); + + expbuf.index = avbuf->buffer.index; + expbuf.type = avbuf->buffer.type; + expbuf.plane = i; + + ret = ioctl(avbuf->q->ctx->fd, VIDIOC_EXPBUF, &expbuf); + if (ret < 0) + return AVERROR(errno); + + drm_desc->objects[i].size = V4L2_TYPE_IS_MULTIPLANAR(avbuf->buffer.type) ? + avbuf->buffer.m.planes[i].length : avbuf->buffer.length; + drm_desc->objects[i].fd = expbuf.fd; + drm_desc->objects[i].format_modifier = mod; + drm_desc->nb_objects = i + 1; + } + + return 0; +} + +static int deint_v4l2m2m_allocate_buffers(V4L2Queue *queue) +{ + struct v4l2_format *fmt = &queue->format; + DeintV4L2M2MContextShared *ctx = queue->ctx; + struct v4l2_requestbuffers req; + int ret, i, multiplanar; + uint32_t memory; + + memory = V4L2_TYPE_IS_OUTPUT(fmt->type) ? + V4L2_MEMORY_DMABUF : V4L2_MEMORY_MMAP; + + multiplanar = V4L2_TYPE_IS_MULTIPLANAR(fmt->type); + + memset(&req, 0, sizeof(req)); + req.count = queue->num_buffers; + req.memory = memory; + req.type = fmt->type; + + ret = ioctl(ctx->fd, VIDIOC_REQBUFS, &req); + if (ret < 0) { + av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_REQBUFS failed: %s\n", strerror(errno)); + + return AVERROR(errno); + } + + queue->num_buffers = req.count; + queue->buffers = av_mallocz(queue->num_buffers * sizeof(V4L2Buffer)); + if (!queue->buffers) { + av_log(ctx->logctx, AV_LOG_ERROR, "malloc enomem\n"); + + return AVERROR(ENOMEM); + } + + for (i = 0; i < queue->num_buffers; i++) { + V4L2Buffer * const buf = &queue->buffers[i]; + + buf->enqueued = 0; + buf->q = queue; + + buf->buffer.type = fmt->type; + buf->buffer.memory = memory; + buf->buffer.index = i; + + if (multiplanar) { + buf->buffer.length = VIDEO_MAX_PLANES; + buf->buffer.m.planes = buf->planes; + } + + drm_frame_init(&buf->drm_frame); + } + + for (i = 0; i < queue->num_buffers; i++) { + V4L2Buffer * const buf = &queue->buffers[i]; + + ret = ioctl(ctx->fd, VIDIOC_QUERYBUF, &buf->buffer); + if (ret < 0) { + ret = AVERROR(errno); + + goto fail; + } + + buf->num_planes = multiplanar ? buf->buffer.length : 1; + + if (!V4L2_TYPE_IS_OUTPUT(fmt->type)) { + ret = deint_v4l2m2m_enqueue_buffer(buf); + if (ret) + goto fail; + + ret = v4l2_buffer_export_drm(queue, buf); + if (ret) + goto fail; + } + } + + return 0; + +fail: + avbufs_delete(&queue->buffers, queue->num_buffers); + queue->num_buffers = 0; + return ret; +} + +static int deint_v4l2m2m_streamon(V4L2Queue *queue) +{ + DeintV4L2M2MContextShared * const ctx = queue->ctx; + int type = queue->format.type; + int ret; + + ret = ioctl(ctx->fd, VIDIOC_STREAMON, &type); + av_log(ctx->logctx, AV_LOG_DEBUG, "%s: type:%d ret:%d errno:%d\n", __func__, type, ret, AVERROR(errno)); + if (ret < 0) + return AVERROR(errno); + + return 0; +} + +static int deint_v4l2m2m_streamoff(V4L2Queue *queue) +{ + DeintV4L2M2MContextShared * const ctx = queue->ctx; + int type = queue->format.type; + int ret; + + ret = ioctl(ctx->fd, VIDIOC_STREAMOFF, &type); + av_log(ctx->logctx, AV_LOG_DEBUG, "%s: type:%d ret:%d errno:%d\n", __func__, type, ret, AVERROR(errno)); + if (ret < 0) + return AVERROR(errno); + + return 0; +} + +// timeout in ms +static V4L2Buffer* deint_v4l2m2m_dequeue_buffer(V4L2Queue *queue, int timeout) +{ + struct v4l2_plane planes[VIDEO_MAX_PLANES]; + DeintV4L2M2MContextShared *ctx = queue->ctx; + struct v4l2_buffer buf = { 0 }; + V4L2Buffer* avbuf = NULL; + struct pollfd pfd; + short events; + int ret; + + if (V4L2_TYPE_IS_OUTPUT(queue->format.type)) + events = POLLOUT | POLLWRNORM; + else + events = POLLIN | POLLRDNORM; + + pfd.events = events; + pfd.fd = ctx->fd; + + for (;;) { + ret = poll(&pfd, 1, timeout); + if (ret > 0) + break; + if (errno == EINTR) + continue; + return NULL; + } + + if (pfd.revents & POLLERR) + return NULL; + + if (pfd.revents & events) { + memset(&buf, 0, sizeof(buf)); + buf.memory = V4L2_MEMORY_MMAP; + buf.type = queue->format.type; + if (V4L2_TYPE_IS_MULTIPLANAR(queue->format.type)) { + memset(planes, 0, sizeof(planes)); + buf.length = VIDEO_MAX_PLANES; + buf.m.planes = planes; + } + + ret = ioctl(ctx->fd, VIDIOC_DQBUF, &buf); + if (ret) { + if (errno != EAGAIN) + av_log(ctx->logctx, AV_LOG_DEBUG, "VIDIOC_DQBUF, errno (%s)\n", + av_err2str(AVERROR(errno))); + return NULL; + } + + avbuf = &queue->buffers[buf.index]; + avbuf->enqueued = 0; + avbuf->buffer = buf; + if (V4L2_TYPE_IS_MULTIPLANAR(queue->format.type)) { + memcpy(avbuf->planes, planes, sizeof(planes)); + avbuf->buffer.m.planes = avbuf->planes; + } + return avbuf; + } + + return NULL; +} + +static V4L2Buffer *deint_v4l2m2m_find_free_buf(V4L2Queue *queue) +{ + int i; + V4L2Buffer *buf = NULL; + + for (i = 0; i < queue->num_buffers; i++) + if (!queue->buffers[i].enqueued) { + buf = &queue->buffers[i]; + break; + } + return buf; +} + +static void deint_v4l2m2m_unref_queued(V4L2Queue *queue) +{ + int i; + V4L2Buffer *buf = NULL; + + if (!queue || !queue->buffers) + return; + for (i = 0; i < queue->num_buffers; i++) { + buf = &queue->buffers[i]; + if (queue->buffers[i].enqueued) + av_frame_unref(&buf->frame); + } +} + +static void recycle_q(V4L2Queue * const queue) +{ + V4L2Buffer* avbuf; + while (avbuf = deint_v4l2m2m_dequeue_buffer(queue, 0), avbuf) { + av_frame_unref(&avbuf->frame); + } +} + +static int count_enqueued(V4L2Queue *queue) +{ + int i; + int n = 0; + + if (queue->buffers == NULL) + return 0; + + for (i = 0; i < queue->num_buffers; i++) + if (queue->buffers[i].enqueued) + ++n; + return n; +} + +static int deint_v4l2m2m_enqueue_frame(V4L2Queue * const queue, AVFrame * const frame) +{ + DeintV4L2M2MContextShared *const ctx = queue->ctx; + AVDRMFrameDescriptor *drm_desc = (AVDRMFrameDescriptor *)frame->data[0]; + V4L2Buffer *buf; + int i; + + if (V4L2_TYPE_IS_OUTPUT(queue->format.type)) + recycle_q(queue); + + buf = deint_v4l2m2m_find_free_buf(queue); + if (!buf) { + av_log(ctx->logctx, AV_LOG_ERROR, "%s: error %d finding free buf\n", __func__, 0); + return AVERROR(EAGAIN); + } + if (V4L2_TYPE_IS_MULTIPLANAR(buf->buffer.type)) + for (i = 0; i < drm_desc->nb_objects; i++) + buf->buffer.m.planes[i].m.fd = drm_desc->objects[i].fd; + else + buf->buffer.m.fd = drm_desc->objects[0].fd; + + buf->buffer.field = !frame->interlaced_frame ? V4L2_FIELD_NONE : + frame->top_field_first ? V4L2_FIELD_INTERLACED_TB : + V4L2_FIELD_INTERLACED_BT; + + if (ctx->field_order != buf->buffer.field) { + av_log(ctx->logctx, AV_LOG_DEBUG, "%s: Field changed: %d->%d\n", __func__, ctx->field_order, buf->buffer.field); + ctx->field_order = buf->buffer.field; + } + + buf->buffer.timestamp = pts_track_add_frame(&ctx->track, frame); + + buf->drm_frame.objects[0].fd = drm_desc->objects[0].fd; + + av_frame_move_ref(&buf->frame, frame); + + return deint_v4l2m2m_enqueue_buffer(buf); +} + +static void deint_v4l2m2m_destroy_context(DeintV4L2M2MContextShared *ctx) +{ + if (atomic_fetch_sub(&ctx->refcount, 1) == 1) { + V4L2Queue *capture = &ctx->capture; + V4L2Queue *output = &ctx->output; + + av_log(NULL, AV_LOG_DEBUG, "%s - destroying context\n", __func__); + + if (ctx->fd >= 0) { + deint_v4l2m2m_streamoff(capture); + deint_v4l2m2m_streamoff(output); + } + + avbufs_delete(&capture->buffers, capture->num_buffers); + + deint_v4l2m2m_unref_queued(output); + + av_buffer_unref(&ctx->hw_frames_ctx); + + if (capture->buffers) + av_free(capture->buffers); + + if (output->buffers) + av_free(output->buffers); + + if (ctx->fd >= 0) { + close(ctx->fd); + ctx->fd = -1; + } + + av_free(ctx); + } +} + +static void v4l2_free_buffer(void *opaque, uint8_t *unused) +{ + V4L2Buffer *buf = opaque; + DeintV4L2M2MContextShared *ctx = buf->q->ctx; + + if (!ctx->done) + deint_v4l2m2m_enqueue_buffer(buf); + + deint_v4l2m2m_destroy_context(ctx); +} + +// timeout in ms +static int deint_v4l2m2m_dequeue_frame(V4L2Queue *queue, AVFrame* frame, int timeout) +{ + DeintV4L2M2MContextShared *ctx = queue->ctx; + V4L2Buffer* avbuf; + enum AVColorPrimaries color_primaries; + enum AVColorSpace colorspace; + enum AVColorTransferCharacteristic color_trc; + enum AVColorRange color_range; + + av_log(ctx->logctx, AV_LOG_TRACE, "<<< %s\n", __func__); + + if (queue->eos) { + av_log(ctx->logctx, AV_LOG_TRACE, ">>> %s: EOS\n", __func__); + return AVERROR_EOF; + } + + avbuf = deint_v4l2m2m_dequeue_buffer(queue, timeout); + if (!avbuf) { + av_log(ctx->logctx, AV_LOG_DEBUG, "%s: No buffer to dequeue (timeout=%d)\n", __func__, timeout); + return AVERROR(EAGAIN); + } + + if (V4L2_TYPE_IS_CAPTURE(avbuf->buffer.type)) { + if ((avbuf->buffer.flags & V4L2_BUF_FLAG_LAST) != 0) + queue->eos = 1; + if (buf_bytesused0(&avbuf->buffer) == 0) + return queue->eos ? AVERROR_EOF : AVERROR(EINVAL); + } + + // Fill in PTS and anciliary info from src frame + pts_track_get_frame(&ctx->track, avbuf->buffer.timestamp, frame); + + frame->buf[0] = av_buffer_create((uint8_t *) &avbuf->drm_frame, + sizeof(avbuf->drm_frame), v4l2_free_buffer, + avbuf, AV_BUFFER_FLAG_READONLY); + if (!frame->buf[0]) { + av_log(ctx->logctx, AV_LOG_ERROR, "%s: error %d creating buffer\n", __func__, 0); + return AVERROR(ENOMEM); + } + + atomic_fetch_add(&ctx->refcount, 1); + + frame->data[0] = (uint8_t *)&avbuf->drm_frame; + frame->format = AV_PIX_FMT_DRM_PRIME; + if (ctx->hw_frames_ctx) + frame->hw_frames_ctx = av_buffer_ref(ctx->hw_frames_ctx); + frame->height = ctx->output_height; + frame->width = ctx->output_width; + + color_primaries = get_color_primaries(&ctx->capture.format); + colorspace = get_color_space(&ctx->capture.format); + color_trc = get_color_trc(&ctx->capture.format); + color_range = get_color_range(&ctx->capture.format); + + // If the color parameters are unspecified by V4L2 then leave alone as they + // will have been copied from src + if (color_primaries != AVCOL_PRI_UNSPECIFIED) + frame->color_primaries = color_primaries; + if (colorspace != AVCOL_SPC_UNSPECIFIED) + frame->colorspace = colorspace; + if (color_trc != AVCOL_TRC_UNSPECIFIED) + frame->color_trc = color_trc; + if (color_range != AVCOL_RANGE_UNSPECIFIED) + frame->color_range = color_range; + + if (ctx->filter_type == FILTER_V4L2_DEINTERLACE) { + // Not interlaced now + frame->interlaced_frame = 0; // *** Fill in from dst buffer? + frame->top_field_first = 0; + // Pkt duration halved + frame->pkt_duration /= 2; + } + + if (avbuf->buffer.flags & V4L2_BUF_FLAG_ERROR) { + av_log(ctx->logctx, AV_LOG_ERROR, "driver decode error\n"); + frame->decode_error_flags |= FF_DECODE_ERROR_INVALID_BITSTREAM; + } + + av_log(ctx->logctx, AV_LOG_TRACE, ">>> %s: PTS=%"PRId64"\n", __func__, frame->pts); + return 0; +} + +static int deint_v4l2m2m_config_props(AVFilterLink *outlink) +{ + AVFilterLink *inlink = outlink->src->inputs[0]; + AVFilterContext *avctx = outlink->src; + DeintV4L2M2MContext *priv = avctx->priv; + DeintV4L2M2MContextShared *ctx = priv->shared; + int ret; + + ctx->height = avctx->inputs[0]->h; + ctx->width = avctx->inputs[0]->w; + + if (ctx->filter_type == FILTER_V4L2_SCALE) { + if ((ret = ff_scale_eval_dimensions(priv, + priv->w_expr, priv->h_expr, + inlink, outlink, + &ctx->output_width, &ctx->output_height)) < 0) + return ret; + + ff_scale_adjust_dimensions(inlink, &ctx->output_width, &ctx->output_height, + priv->force_original_aspect_ratio, priv->force_divisible_by); + } + else { + ctx->output_width = ctx->width; + ctx->output_height = ctx->height; + } + + av_log(priv, AV_LOG_DEBUG, "%s: %dx%d->%dx%d FR: %d/%d->%d/%d\n", __func__, + ctx->width, ctx->height, ctx->output_width, ctx->output_height, + inlink->frame_rate.num, inlink->frame_rate.den, outlink->frame_rate.num, outlink->frame_rate.den); + + outlink->time_base = inlink->time_base; + outlink->w = ctx->output_width; + outlink->h = ctx->output_height; + outlink->format = inlink->format; + if (ctx->filter_type == FILTER_V4L2_DEINTERLACE && inlink->frame_rate.den != 0) + outlink->frame_rate = (AVRational){inlink->frame_rate.num * 2, inlink->frame_rate.den}; + + if (inlink->sample_aspect_ratio.num) + outlink->sample_aspect_ratio = av_mul_q((AVRational){outlink->h * inlink->w, outlink->w * inlink->h}, inlink->sample_aspect_ratio); + else + outlink->sample_aspect_ratio = inlink->sample_aspect_ratio; + + ret = deint_v4l2m2m_find_device(ctx); + if (ret) + return ret; + + if (inlink->hw_frames_ctx) { + ctx->hw_frames_ctx = av_buffer_ref(inlink->hw_frames_ctx); + if (!ctx->hw_frames_ctx) + return AVERROR(ENOMEM); + } + return 0; +} + +static uint32_t desc_pixelformat(const AVDRMFrameDescriptor * const drm_desc) +{ + const uint64_t mod = drm_desc->objects[0].format_modifier; + const int is_linear = (mod == DRM_FORMAT_MOD_LINEAR || mod == DRM_FORMAT_MOD_INVALID); + + // Only currently support single object things + if (drm_desc->nb_objects != 1) + return 0; + + switch (drm_desc->layers[0].format) { + case DRM_FORMAT_YUV420: + return is_linear ? V4L2_PIX_FMT_YUV420 : 0; + case DRM_FORMAT_NV12: + return is_linear ? V4L2_PIX_FMT_NV12 : +#if CONFIG_SAND + fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128 ? V4L2_PIX_FMT_NV12_COL128 : +#endif + 0; + default: + break; + } + return 0; +} + +static int deint_v4l2m2m_filter_frame(AVFilterLink *link, AVFrame *in) +{ + AVFilterContext *avctx = link->dst; + DeintV4L2M2MContext *priv = avctx->priv; + DeintV4L2M2MContextShared *ctx = priv->shared; + V4L2Queue *capture = &ctx->capture; + V4L2Queue *output = &ctx->output; + int ret; + + av_log(priv, AV_LOG_DEBUG, "<<< %s: input pts: %"PRId64" dts: %"PRId64" field :%d interlaced: %d aspect:%d/%d\n", + __func__, in->pts, in->pkt_dts, in->top_field_first, in->interlaced_frame, in->sample_aspect_ratio.num, in->sample_aspect_ratio.den); + av_log(priv, AV_LOG_DEBUG, "--- %s: in status in %d/ot %d; out status in %d/out %d\n", __func__, + avctx->inputs[0]->status_in, avctx->inputs[0]->status_out, avctx->outputs[0]->status_in, avctx->outputs[0]->status_out); + + if (ctx->field_order == V4L2_FIELD_ANY) { + const AVDRMFrameDescriptor * const drm_desc = (AVDRMFrameDescriptor *)in->data[0]; + uint32_t pixelformat = desc_pixelformat(drm_desc); + + if (pixelformat == 0) { + av_log(avctx, AV_LOG_ERROR, "Unsupported DRM format %s in %d objects, modifier %#" PRIx64 "\n", + av_fourcc2str(drm_desc->layers[0].format), + drm_desc->nb_objects, drm_desc->objects[0].format_modifier); + return AVERROR(EINVAL); + } + + ctx->orig_width = drm_desc->layers[0].planes[0].pitch; + ctx->orig_height = drm_desc->layers[0].planes[1].offset / ctx->orig_width; + + av_log(priv, AV_LOG_DEBUG, "%s: %dx%d (%td,%td)\n", __func__, ctx->width, ctx->height, + drm_desc->layers[0].planes[0].pitch, drm_desc->layers[0].planes[1].offset); + + if ((ret = set_src_fmt(output, in)) != 0) { + av_log(avctx, AV_LOG_WARNING, "Unknown input DRM format: %s mod: %#" PRIx64 "\n", + av_fourcc2str(drm_desc->layers[0].format), drm_desc->objects[0].format_modifier); + return ret; + } + + ret = do_s_fmt(output); + if (ret) { + av_log(avctx, AV_LOG_WARNING, "Failed to set source format\n"); + return ret; + } + + if (ctx->output_format != AV_PIX_FMT_NONE) + pixelformat = fmt_av_to_v4l2(ctx->output_format); + ret = set_dst_format(priv, capture, pixelformat, V4L2_FIELD_NONE, ctx->output_width, ctx->output_height); + if (ret) { + av_log(avctx, AV_LOG_WARNING, "Failed to set destination format\n"); + return ret; + } + + ret = deint_v4l2m2m_allocate_buffers(capture); + if (ret) { + av_log(avctx, AV_LOG_WARNING, "Failed to allocate destination buffers\n"); + return ret; + } + + ret = deint_v4l2m2m_streamon(capture); + if (ret) { + av_log(avctx, AV_LOG_WARNING, "Failed set destination streamon: %s\n", av_err2str(ret)); + return ret; + } + + ret = deint_v4l2m2m_allocate_buffers(output); + if (ret) { + av_log(avctx, AV_LOG_WARNING, "Failed to allocate src buffers\n"); + return ret; + } + + ret = deint_v4l2m2m_streamon(output); + if (ret) { + av_log(avctx, AV_LOG_WARNING, "Failed set src streamon: %s\n", av_err2str(ret)); + return ret; + } + + if (in->top_field_first) + ctx->field_order = V4L2_FIELD_INTERLACED_TB; + else + ctx->field_order = V4L2_FIELD_INTERLACED_BT; + + { + struct v4l2_encoder_cmd ecmd = { + .cmd = V4L2_ENC_CMD_STOP + }; + ctx->has_enc_stop = 0; + if (ioctl(ctx->fd, VIDIOC_TRY_ENCODER_CMD, &ecmd) == 0) { + av_log(ctx->logctx, AV_LOG_DEBUG, "Test encode stop succeeded\n"); + ctx->has_enc_stop = 1; + } + else { + av_log(ctx->logctx, AV_LOG_DEBUG, "Test encode stop fail: %s\n", av_err2str(AVERROR(errno))); + } + + } + } + + ret = deint_v4l2m2m_enqueue_frame(output, in); + + av_log(priv, AV_LOG_TRACE, ">>> %s: %s\n", __func__, av_err2str(ret)); + return ret; +} + +static int +ack_inlink(AVFilterContext * const avctx, DeintV4L2M2MContextShared *const s, + AVFilterLink * const inlink) +{ + int instatus; + int64_t inpts; + + if (ff_inlink_acknowledge_status(inlink, &instatus, &inpts) <= 0) + return 0; + + s->drain = instatus; + s->drain_pts = inpts; + s->drain_state = DRAIN_TIMEOUT; + + if (s->field_order == V4L2_FIELD_ANY) { // Not yet started + s->drain_state = DRAIN_DONE; + } + else if (s->one_to_one) { + s->drain_state = DRAIN_LAST; + } + else if (s->has_enc_stop) { + struct v4l2_encoder_cmd ecmd = { + .cmd = V4L2_ENC_CMD_STOP + }; + if (ioctl(s->fd, VIDIOC_ENCODER_CMD, &ecmd) == 0) { + av_log(avctx->priv, AV_LOG_DEBUG, "Do Encode stop\n"); + s->drain_state = DRAIN_EOS; + } + else { + av_log(avctx->priv, AV_LOG_WARNING, "Encode stop fail: %s\n", av_err2str(AVERROR(errno))); + } + } + return 1; +} + +static int deint_v4l2m2m_activate(AVFilterContext *avctx) +{ + DeintV4L2M2MContext * const priv = avctx->priv; + DeintV4L2M2MContextShared *const s = priv->shared; + AVFilterLink * const outlink = avctx->outputs[0]; + AVFilterLink * const inlink = avctx->inputs[0]; + int n = 0; + int cn = 99; + int did_something = 0; + + av_log(priv, AV_LOG_TRACE, "<<< %s\n", __func__); + + FF_FILTER_FORWARD_STATUS_BACK_ALL(outlink, avctx); + + ack_inlink(avctx, s, inlink); + + if (s->field_order != V4L2_FIELD_ANY) // Can't DQ if no setup! + { + AVFrame * frame = av_frame_alloc(); + int rv; + + recycle_q(&s->output); + n = count_enqueued(&s->output); + + if (frame == NULL) { + av_log(priv, AV_LOG_ERROR, "%s: error allocating frame\n", __func__); + return AVERROR(ENOMEM); + } + + rv = deint_v4l2m2m_dequeue_frame(&s->capture, frame, + drain_frame_expected(s->drain_state) || n > 4 ? 300 : 0); + if (rv != 0) { + av_frame_free(&frame); + if (rv == AVERROR_EOF) { + av_log(priv, AV_LOG_DEBUG, "%s: --- DQ EOF\n", __func__); + s->drain_state = DRAIN_DONE; + } + else if (rv == AVERROR(EAGAIN)) { + if (s->drain_state != DRAIN_NONE) { + av_log(priv, AV_LOG_DEBUG, "%s: --- DQ empty - drain done\n", __func__); + s->drain_state = DRAIN_DONE; + } + } + else { + av_log(priv, AV_LOG_ERROR, ">>> %s: DQ fail: %s\n", __func__, av_err2str(rv)); + return rv; + } + } + else { + frame->interlaced_frame = 0; + // frame is always consumed by filter_frame - even on error despite + // a somewhat confusing comment in the header + rv = ff_filter_frame(outlink, frame); + ++s->frames_tx; + + av_log(priv, AV_LOG_TRACE, "%s: Filtered: %s\n", __func__, av_err2str(rv)); + did_something = 1; + + if (s->drain_state != DRAIN_NONE && pts_track_count(&s->track) == 0) { + av_log(priv, AV_LOG_DEBUG, "%s: --- DQ last - drain done\n", __func__); + s->drain_state = DRAIN_DONE; + } + } + + cn = count_enqueued(&s->capture); + } + + if (s->drain_state == DRAIN_DONE) { + ff_outlink_set_status(outlink, s->drain, s->drain_pts); + av_log(priv, AV_LOG_TRACE, ">>> %s: Status done: %s\n", __func__, av_err2str(s->drain)); + return 0; + } + + recycle_q(&s->output); + n = count_enqueued(&s->output); + + while (n < 6 && !s->drain) { + AVFrame * frame; + int rv; + + if ((rv = ff_inlink_consume_frame(inlink, &frame)) < 0) { + av_log(priv, AV_LOG_ERROR, "%s: consume in failed: %s\n", __func__, av_err2str(rv)); + return rv; + } + + if (frame == NULL) { + av_log(priv, AV_LOG_TRACE, "%s: No frame\n", __func__); + if (!ack_inlink(avctx, s, inlink)) { + ff_inlink_request_frame(inlink); + av_log(priv, AV_LOG_TRACE, "%s: req frame\n", __func__); + } + break; + } + ++s->frames_rx; + + rv = deint_v4l2m2m_filter_frame(inlink, frame); + av_frame_free(&frame); + + if (rv != 0) + return rv; + + av_log(priv, AV_LOG_TRACE, "%s: Q frame\n", __func__); + did_something = 1; + ++n; + } + + if ((n > 4 || s->drain) && ff_outlink_frame_wanted(outlink)) { + ff_filter_set_ready(avctx, 1); + did_something = 1; + av_log(priv, AV_LOG_TRACE, "%s: ready\n", __func__); + } + + av_log(priv, AV_LOG_TRACE, ">>> %s: OK (n=%d, cn=%d)\n", __func__, n, cn); + return did_something ? 0 : FFERROR_NOT_READY; +} + +static av_cold int common_v4l2m2m_init(AVFilterContext * const avctx, const filter_type_v4l2_t filter_type) +{ + DeintV4L2M2MContext * const priv = avctx->priv; + DeintV4L2M2MContextShared * const ctx = av_mallocz(sizeof(DeintV4L2M2MContextShared)); + + if (!ctx) { + av_log(priv, AV_LOG_ERROR, "%s: error %d allocating context\n", __func__, 0); + return AVERROR(ENOMEM); + } + priv->shared = ctx; + ctx->logctx = priv; + ctx->filter_type = filter_type; + ctx->fd = -1; + ctx->output.ctx = ctx; + ctx->output.num_buffers = 8; + ctx->output.name = "OUTPUT"; + ctx->capture.ctx = ctx; + ctx->capture.num_buffers = 12; + ctx->capture.name = "CAPTURE"; + ctx->done = 0; + ctx->field_order = V4L2_FIELD_ANY; + + pts_track_init(&ctx->track, priv); + + atomic_init(&ctx->refcount, 1); + + if (priv->output_format_string) { + ctx->output_format = av_get_pix_fmt(priv->output_format_string); + if (ctx->output_format == AV_PIX_FMT_NONE) { + av_log(avctx, AV_LOG_ERROR, "Invalid ffmpeg output format '%s'.\n", priv->output_format_string); + return AVERROR(EINVAL); + } + if (fmt_av_to_v4l2(ctx->output_format) == 0) { + av_log(avctx, AV_LOG_ERROR, "Unsupported output format for V4L2: %s.\n", av_get_pix_fmt_name(ctx->output_format)); + return AVERROR(EINVAL); + } + } else { + // Use the input format once that is configured. + ctx->output_format = AV_PIX_FMT_NONE; + } + +#define STRING_OPTION(var_name, func_name, default_value) do { \ + if (priv->var_name ## _string) { \ + int var = av_ ## func_name ## _from_name(priv->var_name ## _string); \ + if (var < 0) { \ + av_log(avctx, AV_LOG_ERROR, "Invalid %s.\n", #var_name); \ + return AVERROR(EINVAL); \ + } \ + priv->var_name = var; \ + } else { \ + priv->var_name = default_value; \ + } \ + } while (0) + + STRING_OPTION(colour_primaries, color_primaries, AVCOL_PRI_UNSPECIFIED); + STRING_OPTION(colour_transfer, color_transfer, AVCOL_TRC_UNSPECIFIED); + STRING_OPTION(colour_matrix, color_space, AVCOL_SPC_UNSPECIFIED); + STRING_OPTION(chroma_location, chroma_location, AVCHROMA_LOC_UNSPECIFIED); + + return 0; +} + +static av_cold int deint_v4l2m2m_init(AVFilterContext *avctx) +{ + return common_v4l2m2m_init(avctx, FILTER_V4L2_DEINTERLACE); +} + +static av_cold int scale_v4l2m2m_init(AVFilterContext *avctx) +{ + int rv; + DeintV4L2M2MContext * priv; + DeintV4L2M2MContextShared * ctx; + + if ((rv = common_v4l2m2m_init(avctx, FILTER_V4L2_SCALE)) != 0) + return rv; + + priv = avctx->priv; + ctx = priv->shared; + + ctx->one_to_one = 1; + return 0; +} + +static void deint_v4l2m2m_uninit(AVFilterContext *avctx) +{ + DeintV4L2M2MContext *priv = avctx->priv; + DeintV4L2M2MContextShared *ctx = priv->shared; + + av_log(priv, AV_LOG_VERBOSE, "Frames Rx: %u, Frames Tx: %u\n", + ctx->frames_rx, ctx->frames_tx); + ctx->done = 1; + ctx->logctx = NULL; // Log to NULL works, log to missing crashes + pts_track_uninit(&ctx->track); + deint_v4l2m2m_destroy_context(ctx); +} + +static const AVOption deinterlace_v4l2m2m_options[] = { + { NULL }, +}; + +AVFILTER_DEFINE_CLASS(deinterlace_v4l2m2m); + +#define OFFSET(x) offsetof(DeintV4L2M2MContext, x) +#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM) + +static const AVOption scale_v4l2m2m_options[] = { + { "w", "Output video width", + OFFSET(w_expr), AV_OPT_TYPE_STRING, {.str = "iw"}, .flags = FLAGS }, + { "h", "Output video height", + OFFSET(h_expr), AV_OPT_TYPE_STRING, {.str = "ih"}, .flags = FLAGS }, + { "format", "Output video format (software format of hardware frames)", + OFFSET(output_format_string), AV_OPT_TYPE_STRING, .flags = FLAGS }, + // These colour properties match the ones of the same name in vf_scale. + { "out_color_matrix", "Output colour matrix coefficient set", + OFFSET(colour_matrix_string), AV_OPT_TYPE_STRING, { .str = NULL }, .flags = FLAGS }, + { "out_range", "Output colour range", + OFFSET(colour_range), AV_OPT_TYPE_INT, { .i64 = AVCOL_RANGE_UNSPECIFIED }, + AVCOL_RANGE_UNSPECIFIED, AVCOL_RANGE_JPEG, FLAGS, "range" }, + { "full", "Full range", + 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, "range" }, + { "limited", "Limited range", + 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, "range" }, + { "jpeg", "Full range", + 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, "range" }, + { "mpeg", "Limited range", + 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, "range" }, + { "tv", "Limited range", + 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, "range" }, + { "pc", "Full range", + 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, "range" }, + // These colour properties match the ones in the VAAPI scaler + { "out_color_primaries", "Output colour primaries", + OFFSET(colour_primaries_string), AV_OPT_TYPE_STRING, + { .str = NULL }, .flags = FLAGS }, + { "out_color_transfer", "Output colour transfer characteristics", + OFFSET(colour_transfer_string), AV_OPT_TYPE_STRING, + { .str = NULL }, .flags = FLAGS }, + { "out_chroma_location", "Output chroma sample location", + OFFSET(chroma_location_string), AV_OPT_TYPE_STRING, + { .str = NULL }, .flags = FLAGS }, + { "force_original_aspect_ratio", "decrease or increase w/h if necessary to keep the original AR", OFFSET(force_original_aspect_ratio), AV_OPT_TYPE_INT, { .i64 = 0}, 0, 2, FLAGS, "force_oar" }, + { "force_divisible_by", "enforce that the output resolution is divisible by a defined integer when force_original_aspect_ratio is used", OFFSET(force_divisible_by), AV_OPT_TYPE_INT, { .i64 = 1}, 1, 256, FLAGS }, + { NULL }, +}; + +AVFILTER_DEFINE_CLASS(scale_v4l2m2m); + +static const AVFilterPad deint_v4l2m2m_inputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + }, +}; + +static const AVFilterPad deint_v4l2m2m_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = deint_v4l2m2m_config_props, + }, +}; + +AVFilter ff_vf_deinterlace_v4l2m2m = { + .name = "deinterlace_v4l2m2m", + .description = NULL_IF_CONFIG_SMALL("V4L2 M2M deinterlacer"), + .priv_size = sizeof(DeintV4L2M2MContext), + .init = &deint_v4l2m2m_init, + .uninit = &deint_v4l2m2m_uninit, + FILTER_INPUTS(deint_v4l2m2m_inputs), + FILTER_OUTPUTS(deint_v4l2m2m_outputs), + FILTER_SINGLE_SAMPLEFMT(AV_PIX_FMT_DRM_PRIME), + .priv_class = &deinterlace_v4l2m2m_class, + .activate = deint_v4l2m2m_activate, +}; + +AVFilter ff_vf_scale_v4l2m2m = { + .name = "scale_v4l2m2m", + .description = NULL_IF_CONFIG_SMALL("V4L2 M2M scaler"), + .priv_size = sizeof(DeintV4L2M2MContext), + .init = &scale_v4l2m2m_init, + .uninit = &deint_v4l2m2m_uninit, + FILTER_INPUTS(deint_v4l2m2m_inputs), + FILTER_OUTPUTS(deint_v4l2m2m_outputs), + FILTER_SINGLE_SAMPLEFMT(AV_PIX_FMT_DRM_PRIME), + .priv_class = &scale_v4l2m2m_class, + .activate = deint_v4l2m2m_activate, +}; + diff --git a/libavfilter/vf_unsand.c b/libavfilter/vf_unsand.c new file mode 100644 index 0000000000..7100f2fc9b --- /dev/null +++ b/libavfilter/vf_unsand.c @@ -0,0 +1,228 @@ +/* + * Copyright (c) 2007 Bobby Bingham + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * format and noformat video filters + */ + +#include + +#include "libavutil/internal.h" +#include "libavutil/mem.h" +#include "libavutil/pixdesc.h" +#include "libavutil/opt.h" +#include "libavutil/rpi_sand_fns.h" + +#include "avfilter.h" +#include "formats.h" +#include "internal.h" +#include "video.h" + +typedef struct UnsandContext { + const AVClass *class; +} UnsandContext; + +static av_cold void uninit(AVFilterContext *ctx) +{ +// UnsandContext *s = ctx->priv; +} + +static av_cold int init(AVFilterContext *ctx) +{ +// UnsandContext *s = ctx->priv; + + return 0; +} + + +static int filter_frame(AVFilterLink *link, AVFrame *in) +{ + AVFilterLink * const outlink = link->dst->outputs[0]; + AVFrame *out = NULL; + int rv = 0; + + if (outlink->format == in->format) { + // If nothing to do then do nothing + out = in; + } + else + { + if ((out = ff_get_video_buffer(outlink, av_frame_cropped_width(in), av_frame_cropped_height(in))) == NULL) + { + rv = AVERROR(ENOMEM); + goto fail; + } + if (av_rpi_sand_to_planar_frame(out, in) != 0) + { + rv = -1; + goto fail; + } + + av_frame_free(&in); + } + + return ff_filter_frame(outlink, out); + +fail: + av_frame_free(&out); + av_frame_free(&in); + return rv; +} + +#if 0 +static void dump_fmts(const AVFilterFormats * fmts) +{ + int i; + if (fmts== NULL) { + printf("NULL\n"); + return; + } + for (i = 0; i < fmts->nb_formats; ++i) { + printf(" %d", fmts->formats[i]); + } + printf("\n"); +} +#endif + +static int query_formats(AVFilterContext *ctx) +{ +// UnsandContext *s = ctx->priv; + int ret; + + // If we aren't connected at both ends then just do nothing + if (ctx->inputs[0] == NULL || ctx->outputs[0] == NULL) + return 0; + + // Our output formats depend on our input formats and we can't/don't + // want to convert between bit depths so we need to wait for the source + // to have an opinion before we do + if (ctx->inputs[0]->incfg.formats == NULL) + return AVERROR(EAGAIN); + + // Accept anything + if (ctx->inputs[0]->outcfg.formats == NULL && + (ret = ff_formats_ref(ctx->inputs[0]->incfg.formats, &ctx->inputs[0]->outcfg.formats)) < 0) + return ret; + + // Filter out sand formats + + // Generate a container if we don't already have one + if (ctx->outputs[0]->incfg.formats == NULL) + { + // Somewhat rubbish way of ensuring we have a good structure + const static enum AVPixelFormat out_fmts[] = {AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE}; + AVFilterFormats *formats = ff_make_format_list(out_fmts); + + if (formats == NULL) + return AVERROR(ENOMEM); + if ((ret = ff_formats_ref(formats, &ctx->outputs[0]->incfg.formats)) < 0) + return ret; + } + + // Replace old format list with new filtered list derived from what our + // input says it can do + { + const AVFilterFormats * const src_ff = ctx->inputs[0]->outcfg.formats; + AVFilterFormats * const dst_ff = ctx->outputs[0]->incfg.formats; + enum AVPixelFormat *dst_fmts = av_malloc(sizeof(enum AVPixelFormat) * src_ff->nb_formats); + int i; + int n = 0; + int seen_420p = 0; + int seen_420p10 = 0; + + for (i = 0; i < src_ff->nb_formats; ++i) { + const enum AVPixelFormat f = src_ff->formats[i]; + + switch (f){ + case AV_PIX_FMT_YUV420P: + case AV_PIX_FMT_SAND128: + case AV_PIX_FMT_RPI4_8: + if (!seen_420p) { + seen_420p = 1; + dst_fmts[n++] = AV_PIX_FMT_YUV420P; + } + break; + case AV_PIX_FMT_SAND64_10: + case AV_PIX_FMT_YUV420P10: + case AV_PIX_FMT_RPI4_10: + if (!seen_420p10) { + seen_420p10 = 1; + dst_fmts[n++] = AV_PIX_FMT_YUV420P10; + } + break; + default: + dst_fmts[n++] = f; + break; + } + } + + av_freep(&dst_ff->formats); + dst_ff->formats = dst_fmts; + dst_ff->nb_formats = n; + } + +// printf("Unsand: %s calc: ", __func__); +// dump_fmts(ctx->outputs[0]->incfg.formats); + + return 0; +} + + +#define OFFSET(x) offsetof(UnsandContext, x) +static const AVOption unsand_options[] = { + { NULL } +}; + + +AVFILTER_DEFINE_CLASS(unsand); + +static const AVFilterPad avfilter_vf_unsand_inputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .filter_frame = filter_frame, + }, + { NULL } +}; + +static const AVFilterPad avfilter_vf_unsand_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO + }, +}; + +AVFilter ff_vf_unsand = { + .name = "unsand", + .description = NULL_IF_CONFIG_SMALL("Convert sand pix fmt to yuv"), + + .init = init, + .uninit = uninit, + + FILTER_QUERY_FUNC(query_formats), + + .priv_size = sizeof(UnsandContext), + .priv_class = &unsand_class, + + FILTER_INPUTS(avfilter_vf_unsand_inputs), + FILTER_OUTPUTS(avfilter_vf_unsand_outputs), +}; + diff --git a/libavfilter/yadif.h b/libavfilter/yadif.h index c928911b35..e1a6037f62 100644 --- a/libavfilter/yadif.h +++ b/libavfilter/yadif.h @@ -53,6 +53,7 @@ typedef struct YADIFContext { int mode; ///< YADIFMode int parity; ///< YADIFParity int deint; ///< YADIFDeint + int useasm; ///< Use any asm code int frame_pending; diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c index 113541bd9a..61e4c976ef 100644 --- a/libavformat/matroskaenc.c +++ b/libavformat/matroskaenc.c @@ -77,6 +77,10 @@ #define IS_WEBM(mkv) (CONFIG_WEBM_MUXER && CONFIG_MATROSKA_MUXER ? \ ((mkv)->mode == MODE_WEBM) : CONFIG_WEBM_MUXER) + +/* Reserved size for H264 headers if not extant at init time */ +#define MAX_H264_HEADER_SIZE 1024 + #define IS_SEEKABLE(pb, mkv) (((pb)->seekable & AVIO_SEEKABLE_NORMAL) && \ !(mkv)->is_live) @@ -1121,8 +1125,12 @@ static int mkv_assemble_native_codecprivate(AVFormatContext *s, AVIOContext *dyn case AV_CODEC_ID_WAVPACK: return put_wv_codecpriv(dyn_cp, extradata, extradata_size); case AV_CODEC_ID_H264: - return ff_isom_write_avcc(dyn_cp, extradata, - extradata_size); + if (par->extradata_size) + return ff_isom_write_avcc(dyn_cp, extradata, + extradata_size); + else + *size_to_reserve = MAX_H264_HEADER_SIZE; + break; case AV_CODEC_ID_HEVC: return ff_isom_write_hvcc(dyn_cp, extradata, extradata_size, 0); @@ -2731,8 +2739,8 @@ static int mkv_check_new_extra_data(AVFormatContext *s, const AVPacket *pkt) } break; #endif - // FIXME: Remove the following once libaom starts propagating proper extradata during init() - // See https://bugs.chromium.org/p/aomedia/issues/detail?id=2208 + // FIXME: Remove the following once libaom starts propagating extradata during init() + // See https://bugs.chromium.org/p/aomedia/issues/detail?id=2012 case AV_CODEC_ID_AV1: if (side_data_size && mkv->track.bc && !par->extradata_size) { // If the reserved space doesn't suffice, only write @@ -2744,6 +2752,16 @@ static int mkv_check_new_extra_data(AVFormatContext *s, const AVPacket *pkt) } else if (!par->extradata_size) return AVERROR_INVALIDDATA; break; + // H264 V4L2 has a similar issue + case AV_CODEC_ID_H264: + if (side_data_size && mkv->track.bc && !par->extradata_size) { + ret = mkv_update_codecprivate(s, mkv, side_data, side_data_size, + par, mkv->track.bc, track, 0); + if (ret < 0) + return ret; + } else if (!par->extradata_size) + return AVERROR_INVALIDDATA; + break; default: if (side_data_size) av_log(s, AV_LOG_DEBUG, "Ignoring new extradata in a packet for stream %d.\n", pkt->stream_index); diff --git a/libavformat/movenc.c b/libavformat/movenc.c index c4fcb5f8b1..891adbf7b2 100644 --- a/libavformat/movenc.c +++ b/libavformat/movenc.c @@ -6343,6 +6343,7 @@ static int mov_write_single_packet(AVFormatContext *s, AVPacket *pkt) if (trk->par->codec_id == AV_CODEC_ID_MP4ALS || trk->par->codec_id == AV_CODEC_ID_AAC || trk->par->codec_id == AV_CODEC_ID_AV1 || + trk->par->codec_id == AV_CODEC_ID_H264 || trk->par->codec_id == AV_CODEC_ID_FLAC) { size_t side_size; uint8_t *side = av_packet_get_side_data(pkt, AV_PKT_DATA_NEW_EXTRADATA, &side_size); diff --git a/libavformat/rtpenc.c b/libavformat/rtpenc.c index a8d296a154..f67dc2a15a 100644 --- a/libavformat/rtpenc.c +++ b/libavformat/rtpenc.c @@ -19,6 +19,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#include "avc.h" #include "avformat.h" #include "mpegts.h" #include "internal.h" @@ -585,8 +586,25 @@ static int rtp_write_packet(AVFormatContext *s1, AVPacket *pkt) ff_rtp_send_vc2hq(s1, pkt->data, size, st->codecpar->field_order != AV_FIELD_PROGRESSIVE ? 1 : 0); break; case AV_CODEC_ID_H264: + { + uint8_t *side_data; + int side_data_size = 0; + + side_data = av_packet_get_side_data(pkt, AV_PKT_DATA_NEW_EXTRADATA, + &side_data_size); + + if (side_data_size != 0) { + int ps_size = side_data_size; + uint8_t * ps_buf = NULL; + + ff_avc_write_annexb_extradata(side_data, &ps_buf, &ps_size); + av_log(s1, AV_LOG_TRACE, "H264: write side data=%d\n", ps_size); + ff_rtp_send_h264_hevc(s1, ps_buf ? ps_buf : side_data, ps_size); + av_free(ps_buf); + } ff_rtp_send_h264_hevc(s1, pkt->data, size); break; + } case AV_CODEC_ID_H261: ff_rtp_send_h261(s1, pkt->data, size); break; diff --git a/libavutil/Makefile b/libavutil/Makefile index dc9012f9a8..e33f5db099 100644 --- a/libavutil/Makefile +++ b/libavutil/Makefile @@ -73,6 +73,7 @@ HEADERS = adler32.h \ rational.h \ replaygain.h \ ripemd.h \ + rpi_sand_fns.h \ samplefmt.h \ sha.h \ sha512.h \ @@ -192,6 +193,7 @@ OBJS-$(CONFIG_MACOS_KPERF) += macos_kperf.o OBJS-$(CONFIG_MEDIACODEC) += hwcontext_mediacodec.o OBJS-$(CONFIG_OPENCL) += hwcontext_opencl.o OBJS-$(CONFIG_QSV) += hwcontext_qsv.o +OBJS-$(CONFIG_SAND) += rpi_sand_fns.o OBJS-$(CONFIG_VAAPI) += hwcontext_vaapi.o OBJS-$(CONFIG_VIDEOTOOLBOX) += hwcontext_videotoolbox.o OBJS-$(CONFIG_VDPAU) += hwcontext_vdpau.o @@ -212,6 +214,7 @@ SKIPHEADERS-$(CONFIG_D3D11VA) += hwcontext_d3d11va.h SKIPHEADERS-$(CONFIG_DXVA2) += hwcontext_dxva2.h SKIPHEADERS-$(CONFIG_QSV) += hwcontext_qsv.h SKIPHEADERS-$(CONFIG_OPENCL) += hwcontext_opencl.h +SKIPHEADERS-$(CONFIG-RPI) += rpi_sand_fn_pw.h SKIPHEADERS-$(CONFIG_VAAPI) += hwcontext_vaapi.h SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX) += hwcontext_videotoolbox.h SKIPHEADERS-$(CONFIG_VDPAU) += hwcontext_vdpau.h diff --git a/libavutil/aarch64/Makefile b/libavutil/aarch64/Makefile index eba0151337..1b44beab39 100644 --- a/libavutil/aarch64/Makefile +++ b/libavutil/aarch64/Makefile @@ -4,3 +4,5 @@ OBJS += aarch64/cpu.o \ NEON-OBJS += aarch64/float_dsp_neon.o \ aarch64/tx_float_neon.o \ + aarch64/rpi_sand_neon.o \ + diff --git a/libavutil/aarch64/rpi_sand_neon.S b/libavutil/aarch64/rpi_sand_neon.S new file mode 100644 index 0000000000..2f07d9674c --- /dev/null +++ b/libavutil/aarch64/rpi_sand_neon.S @@ -0,0 +1,781 @@ +/* +Copyright (c) 2021 Michael Eiler + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Authors: Michael Eiler +*/ + +#include "asm.S" + +// void ff_rpi_sand8_lines_to_planar_y8( +// uint8_t * dest, : x0 +// unsigned int dst_stride, : w1 +// const uint8_t * src, : x2 +// unsigned int src_stride1, : w3, always 128 +// unsigned int src_stride2, : w4 +// unsigned int _x, : w5 +// unsigned int y, : w6 +// unsigned int _w, : w7 +// unsigned int h); : [sp, #0] + +function ff_rpi_sand8_lines_to_planar_y8, export=1 + // w15 contains the number of rows we need to process + ldr w15, [sp, #0] + + // w8 will contain the number of blocks per row + // w8 = floor(_w/stride1) + // stride1 is assumed to always be 128 + mov w8, w1 + lsr w8, w8, #7 + + // in case the width of the image is not a multiple of 128, there will + // be an incomplete block at the end of every row + // w9 contains the number of pixels stored within this block + // w9 = _w - w8 * 128 + lsl w9, w8, #7 + sub w9, w7, w9 + + // this is the value we have to add to the src pointer after reading a complete block + // it will move the address to the start of the next block + // w10 = stride2 * stride1 - stride1 + mov w10, w4 + lsl w10, w10, #7 + sub w10, w10, #128 + + // w11 is the row offset, meaning the start offset of the first block of every collumn + // this will be increased with stride1 within every iteration of the row_loop + eor w11, w11, w11 + + // w12 = 0, processed row count + eor w12, w12, w12 +row_loop: + // start of the first block within the current row + // x13 = row offset + src + mov x13, x2 + add x13, x13, x11 + + // w14 = 0, processed block count + eor w14, w14, w14 + + cmp w8, #0 + beq no_main_y8 + +block_loop: + // copy 128 bytes (a full block) into the vector registers v0-v7 and increase the src address by 128 + // fortunately these aren't callee saved ones, meaning we don't need to backup them + ld1 { v0.16b, v1.16b, v2.16b, v3.16b}, [x13], #64 + ld1 { v4.16b, v5.16b, v6.16b, v7.16b}, [x13], #64 + + // write these registers back to the destination vector and increase the dst address by 128 + st1 { v0.16b, v1.16b, v2.16b, v3.16b }, [x0], #64 + st1 { v4.16b, v5.16b, v6.16b, v7.16b }, [x0], #64 + + // move the source register to the beginning of the next block (x13 = src + block offset) + add x13, x13, x10 + // increase the block counter + add w14, w14, #1 + + // continue with the block_loop if we haven't copied all full blocks yet + cmp w8, w14 + bgt block_loop + + // handle the last block at the end of each row + // at most 127 byte values copied from src to dst +no_main_y8: + eor w5, w5, w5 // i = 0 +incomplete_block_loop_y8: + cmp w5, w9 + bge incomplete_block_loop_end_y8 + + ldrb w6, [x13] + strb w6, [x0] + add x13, x13, #1 + add x0, x0, #1 + + add w5, w5, #1 + b incomplete_block_loop_y8 +incomplete_block_loop_end_y8: + + + // increase the row offset by 128 (stride1) + add w11, w11, #128 + // increment the row counter + add w12, w12, #1 + + // process the next row if we haven't finished yet + cmp w15, w12 + bgt row_loop + + ret +endfunc + + + +// void ff_rpi_sand8_lines_to_planar_c8( +// uint8_t * dst_u, : x0 +// unsigned int dst_stride_u, : w1 == width +// uint8_t * dst_v, : x2 +// unsigned int dst_stride_v, : w3 == width +// const uint8_t * src, : x4 +// unsigned int stride1, : w5 == 128 +// unsigned int stride2, : w6 +// unsigned int _x, : w7 +// unsigned int y, : [sp, #0] +// unsigned int _w, : [sp, #8] +// unsigned int h); : [sp, #16] + +function ff_rpi_sand8_lines_to_planar_c8, export=1 + // w7 = width + ldr w7, [sp, #8] + + // w15 contains the number of rows we need to process + // counts down + ldr w15, [sp, #16] + + // number of full blocks, w8 = _w / (stride1 >> 1) == _w / 64 == _w >> 6 + mov w8, w7 + lsr w8, w8, #6 + + // number of pixels in block at the end of every row + // w9 = _w - (w8 * 64) + lsl w9, w8, #6 + sub w9, w7, w9 + + // Skip at the end of the line to account for stride + sub w12, w1, w7 + + // address delta to the beginning of the next block + // w10 = (stride2 * stride1 - stride1) = stride2 * 128 - 128 + lsl w10, w6, #7 + sub w10, w10, #128 + + // w11 = row address start offset = 0 + eor w11, w11, w11 + +row_loop_c8: + // start of the first block within the current row + // x13 = row offset + src + mov x13, x4 + add x13, x13, x11 + + // w14 = 0, processed block count + eor w14, w14, w14 + + cmp w8, #0 + beq no_main_c8 + +block_loop_c8: + // load the full block -> 128 bytes, the block contains 64 interleaved U and V values + ld2 { v0.16b, v1.16b }, [x13], #32 + ld2 { v2.16b, v3.16b }, [x13], #32 + ld2 { v4.16b, v5.16b }, [x13], #32 + ld2 { v6.16b, v7.16b }, [x13], #32 + + // swap register so that we can write them out with a single instruction + mov v16.16b, v1.16b + mov v17.16b, v3.16b + mov v18.16b, v5.16b + mov v1.16b, v2.16b + mov v2.16b, v4.16b + mov v3.16b, v6.16b + mov v4.16b, v16.16b + mov v5.16b, v17.16b + mov v6.16b, v18.16b + + st1 { v0.16b, v1.16b, v2.16b, v3.16b }, [x0], #64 + st1 { v4.16b, v5.16b, v6.16b, v7.16b }, [x2], #64 + + // increment row counter and move src to the beginning of the next block + add w14, w14, #1 + add x13, x13, x10 + + // jump to block_loop_c8 iff the block count is smaller than the number of full blocks + cmp w8, w14 + bgt block_loop_c8 + +no_main_c8: + // handle incomplete block at the end of every row + eor w5, w5, w5 // point counter, this might be +incomplete_block_loop_c8: + cmp w5, w9 + bge incomplete_block_loop_end_c8 + + ldrb w1, [x13] + strb w1, [x0] + add x13, x13, #1 + + ldrb w1, [x13] + strb w1, [x2] + add x13, x13, #1 + + add x0, x0, #1 + add x2, x2, #1 + + add w5, w5, #1 + b incomplete_block_loop_c8 +incomplete_block_loop_end_c8: + + // increase row_offset by stride1 + add w11, w11, #128 + add x0, x0, w12, sxtw + add x2, x2, w12, sxtw + + // jump to row_Loop_c8 iff the row count is small than the height + subs w15, w15, #1 + bgt row_loop_c8 + + ret +endfunc + +//void ff_rpi_sand30_lines_to_planar_c16( +// uint8_t * dst_u, // [x0] +// unsigned int dst_stride_u, // [w1] == _w*2 +// uint8_t * dst_v, // [x2] +// unsigned int dst_stride_v, // [w3] == _w*2 +// const uint8_t * src, // [x4] +// unsigned int stride1, // [w5] == 128 +// unsigned int stride2, // [w6] +// unsigned int _x, // [w7] == 0 +// unsigned int y, // [sp, #0] == 0 +// unsigned int _w, // [sp, #8] -> w3 +// unsigned int h); // [sp, #16] -> w7 + +.macro rpi_sand30_lines_to_planar_c16_block_half + ld1 { v0.4s, v1.4s, v2.4s, v3.4s }, [x13], #64 + + xtn v4.4h, v0.4s + ushr v0.4s, v0.4s, #10 + xtn v5.4h, v0.4s + ushr v0.4s, v0.4s, #10 + xtn v6.4h, v0.4s + xtn2 v4.8h, v1.4s + ushr v1.4s, v1.4s, #10 + xtn2 v5.8h, v1.4s + ushr v1.4s, v1.4s, #10 + xtn2 v6.8h, v1.4s + and v4.16b, v4.16b, v16.16b + and v5.16b, v5.16b, v16.16b + and v6.16b, v6.16b, v16.16b + st3 { v4.8h, v5.8h, v6.8h }, [sp], #48 + + xtn v4.4h, v2.4s + ushr v2.4s, v2.4s, #10 + xtn v5.4h, v2.4s + ushr v2.4s, v2.4s, #10 + xtn v6.4h, v2.4s + xtn2 v4.8h, v3.4s + ushr v3.4s, v3.4s, #10 + xtn2 v5.8h, v3.4s + ushr v3.4s, v3.4s, #10 + xtn2 v6.8h, v3.4s + and v4.16b, v4.16b, v16.16b + and v5.16b, v5.16b, v16.16b + and v6.16b, v6.16b, v16.16b + st3 { v4.8h, v5.8h, v6.8h }, [sp] + sub sp, sp, #48 +.endm + +function ff_rpi_sand30_lines_to_planar_c16, export=1 + stp x19, x20, [sp, #-48]! + stp x21, x22, [sp, #16] + stp x23, x24, [sp, #32] + + ldr w3, [sp, #48+8] // w3 = width + ldr w7, [sp, #48+16] // w7 = height + + // reserve space on the stack for intermediate results + sub sp, sp, #256 + + // number of 128byte blocks per row, w8 = width / 48 + mov w9, #48 + udiv w8, w3, w9 + + // remaining pixels (rem_pix) per row, w9 = width - w8 * 48 + mul w9, w8, w9 + sub w9, w3, w9 + + // row offset, the beginning of the next row to process + eor w10, w10, w10 + + // offset to the beginning of the next block, w11 = stride2 * 128 - 128 + lsl w11, w6, #7 + sub w11, w11, #128 + + // decrease the height by one and in case of remaining pixels increase the block count by one + sub w7, w7, #1 + cmp w9, #0 + cset w19, ne // w19 == 1 iff reamining pixels != 0 + add w8, w8, w19 + + // bytes we have to move dst back by at the end of every row + mov w21, #48*2 + mul w21, w21, w8 + sub w21, w1, w21 + + mov w20, #0 // w20 = flag, last row processed + + mov x12, #0x03ff03ff03ff03ff + dup v16.2d, x12 + + // iterate through rows, row counter = w12 = 0 + eor w12, w12, w12 +row_loop_c16: + cmp w12, w7 + bge row_loop_c16_fin + + // address of row data = src + row_offset + mov x13, x4 + add x13, x13, x10 + + eor w14, w14, w14 +block_loop_c16: + cmp w14, w8 + bge block_loop_c16_fin + + rpi_sand30_lines_to_planar_c16_block_half + + ld2 { v0.8h, v1.8h }, [sp], #32 + ld2 { v2.8h, v3.8h }, [sp], #32 + ld2 { v4.8h, v5.8h }, [sp] + sub sp, sp, #64 + + st1 { v0.8h }, [x0], #16 + st1 { v2.8h }, [x0], #16 + st1 { v4.8h }, [x0], #16 + st1 { v1.8h }, [x2], #16 + st1 { v3.8h }, [x2], #16 + st1 { v5.8h }, [x2], #16 + + rpi_sand30_lines_to_planar_c16_block_half + + ld2 { v0.8h, v1.8h }, [sp], #32 + ld2 { v2.8h, v3.8h }, [sp], #32 + ld2 { v4.8h, v5.8h }, [sp] + sub sp, sp, #64 + + st1 { v0.8h }, [x0], #16 + st1 { v2.8h }, [x0], #16 + st1 { v4.8h }, [x0], #16 + st1 { v1.8h }, [x2], #16 + st1 { v3.8h }, [x2], #16 + st1 { v5.8h }, [x2], #16 + + add x13, x13, x11 // offset to next block + add w14, w14, #1 + b block_loop_c16 +block_loop_c16_fin: + + add w10, w10, #128 + add w12, w12, #1 + add x0, x0, w21, sxtw // move dst pointers back by x21 + add x2, x2, w21, sxtw + b row_loop_c16 +row_loop_c16_fin: + + cmp w20, #1 + beq row_loop_c16_fin2 + mov w20, #1 + sub w8, w8, w19 // decrease block count by w19 + add w7, w7, #1 // increase height + b row_loop_c16 + +row_loop_c16_fin2: + sub x0, x0, w21, sxtw // readd x21 in case of the last row + sub x2, x2, w21, sxtw // so that we can write out the few remaining pixels + + // last incomplete block to be finished + // read operations are fine, stride2 is more than large enough even if rem_pix is 0 + rpi_sand30_lines_to_planar_c16_block_half + ld2 { v0.8h, v1.8h }, [sp], #32 + ld2 { v2.8h, v3.8h }, [sp], #32 + ld2 { v4.8h, v5.8h }, [sp], #32 + rpi_sand30_lines_to_planar_c16_block_half + ld2 { v0.8h, v1.8h }, [sp], #32 + ld2 { v2.8h, v3.8h }, [sp], #32 + ld2 { v4.8h, v5.8h }, [sp] + sub sp, sp, #160 + + mov x4, sp + eor w20, w20, w20 +rem_pix_c16_loop: + cmp w20, w9 + bge rem_pix_c16_fin + + ldr w22, [x4], #4 + str w22, [x0], #2 + lsr w22, w22, #16 + str w22, [x2], #2 + + add w20, w20, #1 + b rem_pix_c16_loop +rem_pix_c16_fin: + + add sp, sp, #256 + + ldp x23, x24, [sp, #32] + ldp x21, x22, [sp, #16] + ldp x19, x20, [sp], #48 + ret +endfunc + + + +//void ff_rpi_sand30_lines_to_planar_p010( +// uint8_t * dest, +// unsigned int dst_stride, +// const uint8_t * src, +// unsigned int src_stride1, +// unsigned int src_stride2, +// unsigned int _x, +// unsigned int y, +// unsigned int _w, +// unsigned int h); + +// void ff_rpi_sand30_lines_to_planar_y8( +// uint8_t * dest, : x0 +// unsigned int dst_stride, : w1 +// const uint8_t * src, : x2 +// unsigned int src_stride1, : w3, always 128 +// unsigned int src_stride2, : w4 +// unsigned int _x, : w5 +// unsigned int y, : w6 +// unsigned int _w, : w7 +// unsigned int h); : [sp, #0] +// +// Assumes that we are starting on a stripe boundary and that overreading +// within the stripe is OK. However it does respect the dest size for wri + +function ff_rpi_sand30_lines_to_planar_y16, export=1 + lsl w4, w4, #7 + sub w4, w4, #64 + sub w1, w1, w7, lsl #1 + uxtw x6, w6 + add x8, x2, x6, lsl #7 + ldr w6, [sp, #0] + +10: + mov x2, x8 + mov w5, w7 +1: + ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x2], #64 + ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x2], x4 + + subs w5, w5, #96 + + // v0, v1 + + shrn v18.4h, v0.4s, #14 + xtn v16.4h, v0.4s + shrn v17.4h, v0.4s, #10 + + shrn2 v18.8h, v1.4s, #14 + xtn2 v16.8h, v1.4s + shrn2 v17.8h, v1.4s, #10 + + ushr v18.8h, v18.8h, #6 + bic v16.8h, #0xfc, lsl #8 + bic v17.8h, #0xfc, lsl #8 + + // v2, v3 + + shrn v21.4h, v2.4s, #14 + xtn v19.4h, v2.4s + shrn v20.4h, v2.4s, #10 + + shrn2 v21.8h, v3.4s, #14 + xtn2 v19.8h, v3.4s + shrn2 v20.8h, v3.4s, #10 + + ushr v21.8h, v21.8h, #6 + bic v19.8h, #0xfc, lsl #8 + bic v20.8h, #0xfc, lsl #8 + + // v4, v5 + + shrn v24.4h, v4.4s, #14 + xtn v22.4h, v4.4s + shrn v23.4h, v4.4s, #10 + + shrn2 v24.8h, v5.4s, #14 + xtn2 v22.8h, v5.4s + shrn2 v23.8h, v5.4s, #10 + + ushr v24.8h, v24.8h, #6 + bic v22.8h, #0xfc, lsl #8 + bic v23.8h, #0xfc, lsl #8 + + // v6, v7 + + shrn v27.4h, v6.4s, #14 + xtn v25.4h, v6.4s + shrn v26.4h, v6.4s, #10 + + shrn2 v27.8h, v7.4s, #14 + xtn2 v25.8h, v7.4s + shrn2 v26.8h, v7.4s, #10 + + ushr v27.8h, v27.8h, #6 + bic v25.8h, #0xfc, lsl #8 + bic v26.8h, #0xfc, lsl #8 + + blt 2f + + st3 {v16.8h, v17.8h, v18.8h}, [x0], #48 + st3 {v19.8h, v20.8h, v21.8h}, [x0], #48 + st3 {v22.8h, v23.8h, v24.8h}, [x0], #48 + st3 {v25.8h, v26.8h, v27.8h}, [x0], #48 + + bne 1b + +11: + subs w6, w6, #1 + add x0, x0, w1, uxtw + add x8, x8, #128 + bne 10b + + ret + +// Partial final write +2: + cmp w5, #48-96 + blt 1f + st3 {v16.8h, v17.8h, v18.8h}, [x0], #48 + st3 {v19.8h, v20.8h, v21.8h}, [x0], #48 + beq 11b + mov v16.16b, v22.16b + mov v17.16b, v23.16b + sub w5, w5, #48 + mov v18.16b, v24.16b + mov v19.16b, v25.16b + mov v20.16b, v26.16b + mov v21.16b, v27.16b +1: + cmp w5, #24-96 + blt 1f + st3 {v16.8h, v17.8h, v18.8h}, [x0], #48 + beq 11b + mov v16.16b, v19.16b + mov v17.16b, v20.16b + sub w5, w5, #24 + mov v18.16b, v21.16b +1: + cmp w5, #12-96 + blt 1f + st3 {v16.4h, v17.4h, v18.4h}, [x0], #24 + beq 11b + mov v16.2d[0], v16.2d[1] + sub w5, w5, #12 + mov v17.2d[0], v17.2d[1] + mov v18.2d[0], v18.2d[1] +1: + cmp w5, #6-96 + blt 1f + st3 {v16.h, v17.h, v18.h}[0], [x0], #6 + st3 {v16.h, v17.h, v18.h}[1], [x0], #6 + beq 11b + mov v16.2s[0], v16.2s[1] + sub w5, w5, #6 + mov v17.2s[0], v17.2s[1] + mov v18.2s[0], v18.2s[1] +1: + cmp w5, #3-96 + blt 1f + st3 {v16.h, v17.h, v18.h}[0], [x0], #6 + beq 11b + mov v16.4h[0], v16.4h[1] + sub w5, w5, #3 + mov v17.4h[0], v17.4h[1] +1: + cmp w5, #2-96 + blt 1f + st2 {v16.h, v17.h}[0], [x0], #4 + b 11b +1: + st1 {v16.h}[0], [x0], #2 + b 11b + +endfunc + +// void ff_rpi_sand30_lines_to_planar_y8( +// uint8_t * dest, : x0 +// unsigned int dst_stride, : w1 +// const uint8_t * src, : x2 +// unsigned int src_stride1, : w3, always 128 +// unsigned int src_stride2, : w4 +// unsigned int _x, : w5 +// unsigned int y, : w6 +// unsigned int _w, : w7 +// unsigned int h); : [sp, #0] +// +// Assumes that we are starting on a stripe boundary and that overreading +// within the stripe is OK. However it does respect the dest size for wri + +function ff_rpi_sand30_lines_to_planar_y8, export=1 + lsl w4, w4, #7 + sub w4, w4, #64 + sub w1, w1, w7 + uxtw x6, w6 + add x8, x2, x6, lsl #7 + ldr w6, [sp, #0] + +10: + mov x2, x8 + mov w5, w7 +1: + ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x2], #64 + ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x2], x4 + + subs w5, w5, #96 + + // v0, v1 + + shrn v18.4h, v0.4s, #16 + xtn v16.4h, v0.4s + shrn v17.4h, v0.4s, #12 + + shrn2 v18.8h, v1.4s, #16 + xtn2 v16.8h, v1.4s + shrn2 v17.8h, v1.4s, #12 + + shrn v18.8b, v18.8h, #6 + shrn v16.8b, v16.8h, #2 + xtn v17.8b, v17.8h + + // v2, v3 + + shrn v21.4h, v2.4s, #16 + xtn v19.4h, v2.4s + shrn v20.4h, v2.4s, #12 + + shrn2 v21.8h, v3.4s, #16 + xtn2 v19.8h, v3.4s + shrn2 v20.8h, v3.4s, #12 + + shrn2 v18.16b, v21.8h, #6 + shrn2 v16.16b, v19.8h, #2 + xtn2 v17.16b, v20.8h + + // v4, v5 + + shrn v24.4h, v4.4s, #16 + xtn v22.4h, v4.4s + shrn v23.4h, v4.4s, #12 + + shrn2 v24.8h, v5.4s, #16 + xtn2 v22.8h, v5.4s + shrn2 v23.8h, v5.4s, #12 + + shrn v21.8b, v24.8h, #6 + shrn v19.8b, v22.8h, #2 + xtn v20.8b, v23.8h + + // v6, v7 + + shrn v27.4h, v6.4s, #16 + xtn v25.4h, v6.4s + shrn v26.4h, v6.4s, #12 + + shrn2 v27.8h, v7.4s, #16 + xtn2 v25.8h, v7.4s + shrn2 v26.8h, v7.4s, #12 + + shrn2 v21.16b, v27.8h, #6 + shrn2 v19.16b, v25.8h, #2 + xtn2 v20.16b, v26.8h + + blt 2f + + st3 {v16.16b, v17.16b, v18.16b}, [x0], #48 + st3 {v19.16b, v20.16b, v21.16b}, [x0], #48 + + bne 1b + +11: + subs w6, w6, #1 + add x0, x0, w1, uxtw + add x8, x8, #128 + bne 10b + + ret + +// Partial final write +2: + cmp w5, #48-96 + blt 1f + st3 {v16.16b, v17.16b, v18.16b}, [x0], #48 + beq 11b + mov v16.16b, v22.16b + mov v17.16b, v23.16b + sub w5, w5, #48 + mov v18.16b, v24.16b +1: + cmp w5, #24-96 + blt 1f + st3 {v16.8b, v17.8b, v18.8b}, [x0], #24 + beq 11b + mov v16.2d[0], v16.2d[1] + sub w5, w5, #24 + mov v17.2d[0], v17.2d[1] + mov v18.2d[0], v18.2d[1] +1: + cmp w5, #12-96 + blt 1f + st3 {v16.b, v17.b, v18.b}[0], [x0], #3 + st3 {v16.b, v17.b, v18.b}[1], [x0], #3 + st3 {v16.b, v17.b, v18.b}[2], [x0], #3 + st3 {v16.b, v17.b, v18.b}[3], [x0], #3 + beq 11b + mov v16.2s[0], v16.2s[1] + sub w5, w5, #12 + mov v17.2s[0], v17.2s[1] + mov v18.2s[0], v18.2s[1] +1: + cmp w5, #6-96 + blt 1f + st3 {v16.b, v17.b, v18.b}[0], [x0], #3 + st3 {v16.b, v17.b, v18.b}[1], [x0], #3 + beq 11b + mov v16.4h[0], v16.4h[1] + sub w5, w5, #6 + mov v17.4h[0], v17.4h[1] + mov v18.4h[0], v18.4h[1] +1: + cmp w5, #3-96 + blt 1f + st3 {v16.b, v17.b, v18.b}[0], [x0], #3 + beq 11b + mov v16.8b[0], v16.8b[1] + sub w5, w5, #3 + mov v17.8b[0], v17.8b[1] +1: + cmp w5, #2-96 + blt 1f + st2 {v16.b, v17.b}[0], [x0], #2 + b 11b +1: + st1 {v16.b}[0], [x0], #1 + b 11b + +endfunc + diff --git a/libavutil/aarch64/rpi_sand_neon.h b/libavutil/aarch64/rpi_sand_neon.h new file mode 100644 index 0000000000..2a56135bc3 --- /dev/null +++ b/libavutil/aarch64/rpi_sand_neon.h @@ -0,0 +1,59 @@ +/* +Copyright (c) 2021 Michael Eiler + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Authors: Michael Eiler +*/ + +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +void ff_rpi_sand8_lines_to_planar_y8(uint8_t * dest, unsigned int dst_stride, + const uint8_t * src, unsigned int src_stride1, unsigned int src_stride2, + unsigned int _x, unsigned int y, unsigned int _w, unsigned int h); + +void ff_rpi_sand8_lines_to_planar_c8(uint8_t * dst_u, unsigned int dst_stride_u, + uint8_t * dst_v, unsigned int dst_stride_v, const uint8_t * src, + unsigned int stride1, unsigned int stride2, unsigned int _x, unsigned int y, + unsigned int _w, unsigned int h); + +void ff_rpi_sand30_lines_to_planar_y16(uint8_t * dest, unsigned int dst_stride, + const uint8_t * src, unsigned int src_stride1, unsigned int src_stride2, + unsigned int _x, unsigned int y, unsigned int _w, unsigned int h); + +void ff_rpi_sand30_lines_to_planar_c16(uint8_t * dst_u, unsigned int dst_stride_u, + uint8_t * dst_v, unsigned int dst_stride_v, const uint8_t * src, unsigned int stride1, + unsigned int stride2, unsigned int _x, unsigned int y, unsigned int _w, unsigned int h); + +void ff_rpi_sand30_lines_to_planar_y8(uint8_t * dest, unsigned int dst_stride, + const uint8_t * src, unsigned int src_stride1, unsigned int src_stride2, + unsigned int _x, unsigned int y, unsigned int _w, unsigned int h); + +#ifdef __cplusplus +} +#endif + diff --git a/libavutil/arm/Makefile b/libavutil/arm/Makefile index 5da44b0542..b74b7c4e2f 100644 --- a/libavutil/arm/Makefile +++ b/libavutil/arm/Makefile @@ -6,3 +6,4 @@ VFP-OBJS += arm/float_dsp_init_vfp.o \ NEON-OBJS += arm/float_dsp_init_neon.o \ arm/float_dsp_neon.o \ + arm/rpi_sand_neon.o \ diff --git a/libavutil/arm/rpi_sand_neon.S b/libavutil/arm/rpi_sand_neon.S new file mode 100644 index 0000000000..60e697f681 --- /dev/null +++ b/libavutil/arm/rpi_sand_neon.S @@ -0,0 +1,925 @@ +/* +Copyright (c) 2018 Raspberry Pi (Trading) Ltd. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Authors: John Cox +*/ + +#include "libavutil/arm/asm.S" + + +@ General notes: +@ Having done some timing on this in sand8->y8 (Pi4) +@ vst1 (680fps) is a bit faster than vstm (660fps) +@ vldm (680fps) is noticably faster than vld1 (480fps) +@ (or it might be that a mix is what is required) +@ +@ At least on a Pi4 it is no more expensive to have a single auto-inc register +@ for dest address than it is to have 2 used alternately (On Pi3 Ben asserted +@ the latter was better) +@ +@ vstm will bus error on unaligned access (so will vldm), vst1 is safe unless +@ the memory is uncached. +@ As these are Sand -> planar we can assume that src is going to be aligned but +@ it is possible that dest isn't (converting to .yuv or other packed format). +@ Luckily vst1 is faster than vstm :-) so all is well +@ vst1 has alignment requirements of el size so maybe splitting vst1.32 into 4 +@ .8 stores would let us do non-word aligned stores into uncached but it +@ probably isn't worth it. + + + + +@ void ff_rpi_sand128b_stripe_to_8_10( +@ uint8_t * dest, // [r0] +@ const uint8_t * src1, // [r1] +@ const uint8_t * src2, // [r2] +@ unsigned int lines); // [r3] + +.macro stripe2_to_8, bit_depth + vpush {q4-q7} +1: + vldm r1!, {q0-q7} + subs r3, #1 + vldm r2!, {q8-q15} + vqrshrn.u16 d0, q0, #\bit_depth - 8 + vqrshrn.u16 d1, q1, #\bit_depth - 8 + vqrshrn.u16 d2, q2, #\bit_depth - 8 + vqrshrn.u16 d3, q3, #\bit_depth - 8 + vqrshrn.u16 d4, q4, #\bit_depth - 8 + vqrshrn.u16 d5, q5, #\bit_depth - 8 + vqrshrn.u16 d6, q6, #\bit_depth - 8 + vqrshrn.u16 d7, q7, #\bit_depth - 8 + vqrshrn.u16 d8, q8, #\bit_depth - 8 + vqrshrn.u16 d9, q9, #\bit_depth - 8 + vqrshrn.u16 d10, q10, #\bit_depth - 8 + vqrshrn.u16 d11, q11, #\bit_depth - 8 + vqrshrn.u16 d12, q12, #\bit_depth - 8 + vqrshrn.u16 d13, q13, #\bit_depth - 8 + vqrshrn.u16 d14, q14, #\bit_depth - 8 + vqrshrn.u16 d15, q15, #\bit_depth - 8 + vstm r0!, {q0-q7} + bne 1b + vpop {q4-q7} + bx lr +.endm + +function ff_rpi_sand128b_stripe_to_8_10, export=1 + stripe2_to_8 10 +endfunc + +@ void ff_rpi_sand8_lines_to_planar_y8( +@ uint8_t * dest, // [r0] +@ unsigned int dst_stride, // [r1] +@ const uint8_t * src, // [r2] +@ unsigned int src_stride1, // [r3] Ignored - assumed 128 +@ unsigned int src_stride2, // [sp, #0] -> r3 +@ unsigned int _x, // [sp, #4] Ignored - 0 +@ unsigned int y, // [sp, #8] (r7 in prefix) +@ unsigned int _w, // [sp, #12] -> r6 (cur r5) +@ unsigned int h); // [sp, #16] -> r7 +@ +@ Assumes that we are starting on a stripe boundary and that overreading +@ within the stripe is OK. However it does respect the dest size for writing + +function ff_rpi_sand8_lines_to_planar_y8, export=1 + push {r4-r8, lr} @ +24 L + ldr r3, [sp, #24] + ldr r6, [sp, #36] + ldr r7, [sp, #32] @ y + lsl r3, #7 + sub r1, r6 + add r8, r2, r7, lsl #7 + ldr r7, [sp, #40] + +10: + mov r2, r8 + add r4, r0, #24 + mov r5, r6 + mov lr, #0 +1: + vldm r2, {q8-q15} + add r2, r3 + subs r5, #128 + blt 2f + vst1.8 {d16, d17, d18, d19}, [r0]! + vst1.8 {d20, d21, d22, d23}, [r0]! + vst1.8 {d24, d25, d26, d27}, [r0]! + vst1.8 {d28, d29, d30, d31}, [r0]! + bne 1b +11: + subs r7, #1 + add r0, r1 + add r8, #128 + bne 10b + + pop {r4-r8, pc} + +@ Partial final write +2: + cmp r5, #64-128 + blt 1f + vst1.8 {d16, d17, d18, d19}, [r0]! + vst1.8 {d20, d21, d22, d23}, [r0]! + beq 11b + vmov q8, q12 + vmov q9, q13 + sub r5, #64 + vmov q10, q14 + vmov q11, q15 +1: + cmp r5, #32-128 + blt 1f + vst1.8 {d16, d17, d18, d19}, [r0]! + beq 11b + vmov q8, q10 + sub r5, #32 + vmov q9, q11 +1: + cmp r5, #16-128 + blt 1f + vst1.8 {d16, d17}, [r0]! + beq 11b + sub r5, #16 + vmov q8, q9 +1: + cmp r5, #8-128 + blt 1f + vst1.8 {d16}, [r0]! + beq 11b + sub r5, #8 + vmov d16, d17 +1: + cmp r5, #4-128 + blt 1f + vst1.32 {d16[0]}, [r0]! + beq 11b + sub r5, #4 + vshr.u64 d16, #32 +1: + cmp r5, #2-128 + blt 1f + vst1.16 {d16[0]}, [r0]! + beq 11b + vst1.8 {d16[2]}, [r0]! + b 11b +1: + vst1.8 {d16[0]}, [r0]! + b 11b +endfunc + +@ void ff_rpi_sand8_lines_to_planar_c8( +@ uint8_t * dst_u, // [r0] +@ unsigned int dst_stride_u, // [r1] +@ uint8_t * dst_v, // [r2] +@ unsigned int dst_stride_v, // [r3] +@ const uint8_t * src, // [sp, #0] -> r4, r5 +@ unsigned int stride1, // [sp, #4] 128 +@ unsigned int stride2, // [sp, #8] -> r8 +@ unsigned int _x, // [sp, #12] 0 +@ unsigned int y, // [sp, #16] (r7 in prefix) +@ unsigned int _w, // [sp, #20] -> r12, r6 +@ unsigned int h); // [sp, #24] -> r7 +@ +@ Assumes that we are starting on a stripe boundary and that overreading +@ within the stripe is OK. However it does respect the dest size for writing + +function ff_rpi_sand8_lines_to_planar_c8, export=1 + push {r4-r8, lr} @ +24 + + ldr r5, [sp, #24] + ldr r8, [sp, #32] + ldr r7, [sp, #40] + ldr r6, [sp, #44] + lsl r8, #7 + add r5, r5, r7, lsl #7 + sub r1, r1, r6 + sub r3, r3, r6 + ldr r7, [sp, #48] + vpush {q4-q7} + +10: + mov r4, r5 + mov r12, r6 +1: + subs r12, #64 + vldm r4, {q0-q7} + add r4, r8 + it gt + vldmgt r4, {q8-q15} + add r4, r8 + + vuzp.8 q0, q1 + vuzp.8 q2, q3 + vuzp.8 q4, q5 + vuzp.8 q6, q7 + + vuzp.8 q8, q9 + vuzp.8 q10, q11 + vuzp.8 q12, q13 + vuzp.8 q14, q15 + subs r12, #64 + + @ Rearrange regs so we can use vst1 with 4 regs + vswp q1, q2 + vswp q5, q6 + vswp q9, q10 + vswp q13, q14 + blt 2f + + vst1.8 {d0, d1, d2, d3 }, [r0]! + vst1.8 {d8, d9, d10, d11}, [r0]! + vst1.8 {d16, d17, d18, d19}, [r0]! + vst1.8 {d24, d25, d26, d27}, [r0]! + + vst1.8 {d4, d5, d6, d7 }, [r2]! + vst1.8 {d12, d13, d14, d15}, [r2]! + vst1.8 {d20, d21, d22, d23}, [r2]! + vst1.8 {d28, d29, d30, d31}, [r2]! + bne 1b +11: + subs r7, #1 + add r5, #128 + add r0, r1 + add r2, r3 + bne 10b + vpop {q4-q7} + pop {r4-r8,pc} + +2: + cmp r12, #64-128 + blt 1f + vst1.8 {d0, d1, d2, d3 }, [r0]! + vst1.8 {d8, d9, d10, d11}, [r0]! + vst1.8 {d4, d5, d6, d7 }, [r2]! + vst1.8 {d12, d13, d14, d15}, [r2]! + beq 11b + sub r12, #64 + vmov q0, q8 + vmov q1, q9 + vmov q2, q10 + vmov q3, q11 + vmov q4, q12 + vmov q5, q13 + vmov q6, q14 + vmov q7, q15 +1: + cmp r12, #32-128 + blt 1f + vst1.8 {d0, d1, d2, d3 }, [r0]! + vst1.8 {d4, d5, d6, d7 }, [r2]! + beq 11b + sub r12, #32 + vmov q0, q4 + vmov q1, q5 + vmov q2, q6 + vmov q3, q7 +1: + cmp r12, #16-128 + blt 1f + vst1.8 {d0, d1 }, [r0]! + vst1.8 {d4, d5 }, [r2]! + beq 11b + sub r12, #16 + vmov q0, q1 + vmov q2, q3 +1: + cmp r12, #8-128 + blt 1f + vst1.8 {d0}, [r0]! + vst1.8 {d4}, [r2]! + beq 11b + sub r12, #8 + vmov d0, d1 + vmov d4, d5 +1: + cmp r12, #4-128 + blt 1f + vst1.32 {d0[0]}, [r0]! + vst1.32 {d4[0]}, [r2]! + beq 11b + sub r12, #4 + vmov s0, s1 + vmov s8, s9 +1: + cmp r12, #2-128 + blt 1f + vst1.16 {d0[0]}, [r0]! + vst1.16 {d4[0]}, [r2]! + beq 11b + vst1.8 {d0[2]}, [r0]! + vst1.8 {d4[2]}, [r2]! + b 11b +1: + vst1.8 {d0[0]}, [r0]! + vst1.8 {d4[0]}, [r2]! + b 11b +endfunc + + + +@ void ff_rpi_sand30_lines_to_planar_y16( +@ uint8_t * dest, // [r0] +@ unsigned int dst_stride, // [r1] +@ const uint8_t * src, // [r2] +@ unsigned int src_stride1, // [r3] Ignored - assumed 128 +@ unsigned int src_stride2, // [sp, #0] -> r3 +@ unsigned int _x, // [sp, #4] Ignored - 0 +@ unsigned int y, // [sp, #8] (r7 in prefix) +@ unsigned int _w, // [sp, #12] -> r6 (cur r5) +@ unsigned int h); // [sp, #16] -> r7 +@ +@ Assumes that we are starting on a stripe boundary and that overreading +@ within the stripe is OK. However it does respect the dest size for writing + +function ff_rpi_sand30_lines_to_planar_y16, export=1 + push {r4-r8, lr} @ +24 + ldr r3, [sp, #24] + ldr r6, [sp, #36] + ldr r7, [sp, #32] @ y + mov r12, #48 + sub r3, #1 + lsl r3, #7 + sub r1, r1, r6, lsl #1 + add r8, r2, r7, lsl #7 + ldr r7, [sp, #40] + +10: + mov r2, r8 + add r4, r0, #24 + mov r5, r6 + mov lr, #0 +1: + vldm r2!, {q10-q13} + add lr, #64 + + vshrn.u32 d4 , q10, #14 @ Cannot vshrn.u32 #20! + ands lr, #127 + vshrn.u32 d2, q10, #10 + vmovn.u32 d0, q10 + + vshrn.u32 d5, q11, #14 + it eq + addeq r2, r3 + vshrn.u32 d3, q11, #10 + vmovn.u32 d1, q11 + + subs r5, #48 + vshr.u16 q2, #6 + vbic.u16 q0, #0xfc00 + vbic.u16 q1, #0xfc00 + + vshrn.u32 d20, q12, #14 + vshrn.u32 d18, q12, #10 + vmovn.u32 d16, q12 + + vshrn.u32 d21, q13, #14 + vshrn.u32 d19, q13, #10 + vmovn.u32 d17, q13 + + vshr.u16 q10, #6 + vbic.u16 q8, #0xfc00 + vbic.u16 q9 , #0xfc00 + blt 2f + + vst3.16 {d0, d2, d4}, [r0], r12 + vst3.16 {d1, d3, d5}, [r4], r12 + vst3.16 {d16, d18, d20}, [r0], r12 + vst3.16 {d17, d19, d21}, [r4], r12 + + bne 1b + +11: + subs r7, #1 + add r0, r1 + add r8, #128 + bne 10b + + pop {r4-r8, pc} + +@ Partial final write +2: + cmp r5, #24-48 + blt 1f + vst3.16 {d0, d2, d4}, [r0], r12 + vst3.16 {d1, d3, d5}, [r4] + beq 11b + vmov q0, q8 + sub r5, #24 + vmov q1, q9 + vmov q2, q10 +1: + cmp r5, #12-48 + blt 1f + vst3.16 {d0, d2, d4}, [r0]! + beq 11b + vmov d0, d1 + sub r5, #12 + vmov d2, d3 + vmov d4, d5 +1: + cmp r5, #6-48 + add r4, r0, #6 @ avoid [r0]! on sequential instructions + blt 1f + vst3.16 {d0[0], d2[0], d4[0]}, [r0] + vst3.16 {d0[1], d2[1], d4[1]}, [r4] + add r0, #12 + beq 11b + vmov s0, s1 + sub r5, #6 + vmov s4, s5 + vmov s8, s9 +1: + cmp r5, #3-48 + blt 1f + vst3.16 {d0[0], d2[0], d4[0]}, [r0]! + beq 11b + sub r5, #3 + vshr.u32 d0, #16 + vshr.u32 d2, #16 +1: + cmp r5, #2-48 + blt 1f + vst2.16 {d0[0], d2[0]}, [r0]! + b 11b +1: + vst1.16 {d0[0]}, [r0]! + b 11b + +endfunc + + +@ void ff_rpi_sand30_lines_to_planar_c16( +@ uint8_t * dst_u, // [r0] +@ unsigned int dst_stride_u, // [r1] +@ uint8_t * dst_v, // [r2] +@ unsigned int dst_stride_v, // [r3] +@ const uint8_t * src, // [sp, #0] -> r4, r5 +@ unsigned int stride1, // [sp, #4] 128 +@ unsigned int stride2, // [sp, #8] -> r8 +@ unsigned int _x, // [sp, #12] 0 +@ unsigned int y, // [sp, #16] (r7 in prefix) +@ unsigned int _w, // [sp, #20] -> r6, r9 +@ unsigned int h); // [sp, #24] -> r7 +@ +@ Assumes that we are starting on a stripe boundary and that overreading +@ within the stripe is OK. However it does respect the dest size for writing + +function ff_rpi_sand30_lines_to_planar_c16, export=1 + push {r4-r10, lr} @ +32 + ldr r5, [sp, #32] + ldr r8, [sp, #40] + ldr r7, [sp, #48] + ldr r9, [sp, #52] + mov r12, #48 + sub r8, #1 + lsl r8, #7 + add r5, r5, r7, lsl #7 + sub r1, r1, r9, lsl #1 + sub r3, r3, r9, lsl #1 + ldr r7, [sp, #56] +10: + mov lr, #0 + mov r4, r5 + mov r6, r9 +1: + vldm r4!, {q0-q3} + add lr, #64 + + @ N.B. unpack [0,1,2] -> (reg order) 1, 0, 2 + vshrn.u32 d20, q0, #14 + vmovn.u32 d18, q0 + vshrn.u32 d0, q0, #10 + ands lr, #127 + + vshrn.u32 d21, q1, #14 + vmovn.u32 d19, q1 + vshrn.u32 d1, q1, #10 + + vshrn.u32 d22, q2, #10 + vmovn.u32 d2, q2 + vshrn.u32 d4, q2, #14 + + add r10, r0, #24 + vshrn.u32 d23, q3, #10 + vmovn.u32 d3, q3 + vshrn.u32 d5, q3, #14 + + it eq + addeq r4, r8 + vuzp.16 q0, q11 + vuzp.16 q9, q1 + vuzp.16 q10, q2 + + @ q0 V0, V3,.. + @ q9 U0, U3... + @ q10 U1, U4... + @ q11 U2, U5,.. + @ q1 V1, V4, + @ q2 V2, V5,.. + + subs r6, #24 + vbic.u16 q11, #0xfc00 + vbic.u16 q9, #0xfc00 + vshr.u16 q10, #6 + vshr.u16 q2, #6 + vbic.u16 q0, #0xfc00 + vbic.u16 q1, #0xfc00 + + blt 2f + + vst3.16 {d18, d20, d22}, [r0], r12 + vst3.16 {d19, d21, d23}, [r10] + add r10, r2, #24 + vst3.16 {d0, d2, d4}, [r2], r12 + vst3.16 {d1, d3, d5}, [r10] + + bne 1b + +11: + subs r7, #1 + add r5, #128 + add r0, r1 + add r2, r3 + bne 10b + + pop {r4-r10, pc} + +@ Partial final write +2: + cmp r6, #-12 + blt 1f + vst3.16 {d18, d20, d22}, [r0]! + vst3.16 {d0, d2, d4}, [r2]! + beq 11b + vmov d18, d19 + vmov d20, d21 + vmov d22, d23 + sub r6, #12 + vmov d0, d1 + vmov d2, d3 + vmov d4, d5 +1: + cmp r6, #-18 + @ Rezip here as it makes the remaining tail handling easier + vzip.16 d0, d18 + vzip.16 d2, d20 + vzip.16 d4, d22 + blt 1f + vst3.16 {d0[1], d2[1], d4[1]}, [r0]! + vst3.16 {d0[0], d2[0], d4[0]}, [r2]! + vst3.16 {d0[3], d2[3], d4[3]}, [r0]! + vst3.16 {d0[2], d2[2], d4[2]}, [r2]! + beq 11b + vmov d0, d18 + vmov d2, d20 + sub r6, #6 + vmov d4, d22 +1: + cmp r6, #-21 + blt 1f + vst3.16 {d0[1], d2[1], d4[1]}, [r0]! + vst3.16 {d0[0], d2[0], d4[0]}, [r2]! + beq 11b + vmov s4, s5 + sub r6, #3 + vmov s0, s1 +1: + cmp r6, #-22 + blt 1f + vst2.16 {d0[1], d2[1]}, [r0]! + vst2.16 {d0[0], d2[0]}, [r2]! + b 11b +1: + vst1.16 {d0[1]}, [r0]! + vst1.16 {d0[0]}, [r2]! + b 11b + +endfunc + +@ void ff_rpi_sand30_lines_to_planar_p010( +@ uint8_t * dest, // [r0] +@ unsigned int dst_stride, // [r1] +@ const uint8_t * src, // [r2] +@ unsigned int src_stride1, // [r3] Ignored - assumed 128 +@ unsigned int src_stride2, // [sp, #0] -> r3 +@ unsigned int _x, // [sp, #4] Ignored - 0 +@ unsigned int y, // [sp, #8] (r7 in prefix) +@ unsigned int _w, // [sp, #12] -> r6 (cur r5) +@ unsigned int h); // [sp, #16] -> r7 +@ +@ Assumes that we are starting on a stripe boundary and that overreading +@ within the stripe is OK. However it does respect the dest size for writing + +function ff_rpi_sand30_lines_to_planar_p010, export=1 + push {r4-r8, lr} @ +24 + ldr r3, [sp, #24] + ldr r6, [sp, #36] + ldr r7, [sp, #32] @ y + mov r12, #48 + vmov.u16 q15, #0xffc0 + sub r3, #1 + lsl r3, #7 + sub r1, r1, r6, lsl #1 + add r8, r2, r7, lsl #7 + ldr r7, [sp, #40] + +10: + mov r2, r8 + add r4, r0, #24 + mov r5, r6 + mov lr, #0 +1: + vldm r2!, {q10-q13} + add lr, #64 + + vshl.u32 q14, q10, #6 + ands lr, #127 + vshrn.u32 d4, q10, #14 + vshrn.u32 d2, q10, #4 + vmovn.u32 d0, q14 + + vshl.u32 q14, q11, #6 + it eq + addeq r2, r3 + vshrn.u32 d5, q11, #14 + vshrn.u32 d3, q11, #4 + vmovn.u32 d1, q14 + + subs r5, #48 + vand q2, q15 + vand q1, q15 + vand q0, q15 + + vshl.u32 q14, q12, #6 + vshrn.u32 d20, q12, #14 + vshrn.u32 d18, q12, #4 + vmovn.u32 d16, q14 + + vshl.u32 q14, q13, #6 + vshrn.u32 d21, q13, #14 + vshrn.u32 d19, q13, #4 + vmovn.u32 d17, q14 + + vand q10, q15 + vand q9, q15 + vand q8, q15 + blt 2f + + vst3.16 {d0, d2, d4}, [r0], r12 + vst3.16 {d1, d3, d5}, [r4], r12 + vst3.16 {d16, d18, d20}, [r0], r12 + vst3.16 {d17, d19, d21}, [r4], r12 + + bne 1b + +11: + subs r7, #1 + add r0, r1 + add r8, #128 + bne 10b + + pop {r4-r8, pc} + +@ Partial final write +2: + cmp r5, #24-48 + blt 1f + vst3.16 {d0, d2, d4}, [r0], r12 + vst3.16 {d1, d3, d5}, [r4] + beq 11b + vmov q0, q8 + sub r5, #24 + vmov q1, q9 + vmov q2, q10 +1: + cmp r5, #12-48 + blt 1f + vst3.16 {d0, d2, d4}, [r0]! + beq 11b + vmov d0, d1 + sub r5, #12 + vmov d2, d3 + vmov d4, d5 +1: + cmp r5, #6-48 + add r4, r0, #6 @ avoid [r0]! on sequential instructions + blt 1f + vst3.16 {d0[0], d2[0], d4[0]}, [r0] + vst3.16 {d0[1], d2[1], d4[1]}, [r4] + add r0, #12 + beq 11b + vmov s0, s1 + sub r5, #6 + vmov s4, s5 + vmov s8, s9 +1: + cmp r5, #3-48 + blt 1f + vst3.16 {d0[0], d2[0], d4[0]}, [r0]! + beq 11b + sub r5, #3 + vshr.u32 d0, #16 + vshr.u32 d2, #16 +1: + cmp r5, #2-48 + blt 1f + vst2.16 {d0[0], d2[0]}, [r0]! + b 11b +1: + vst1.16 {d0[0]}, [r0]! + b 11b + +endfunc + + +@ void ff_rpi_sand30_lines_to_planar_y8( +@ uint8_t * dest, // [r0] +@ unsigned int dst_stride, // [r1] +@ const uint8_t * src, // [r2] +@ unsigned int src_stride1, // [r3] Ignored - assumed 128 +@ unsigned int src_stride2, // [sp, #0] -> r3 +@ unsigned int _x, // [sp, #4] Ignored - 0 +@ unsigned int y, // [sp, #8] (r7 in prefix) +@ unsigned int _w, // [sp, #12] -> r6 (cur r5) +@ unsigned int h); // [sp, #16] -> r7 +@ +@ Assumes that we are starting on a stripe boundary and that overreading +@ within the stripe is OK. However it does respect the dest size for wri + +function ff_rpi_sand30_lines_to_planar_y8, export=1 + push {r4-r8, lr} @ +24 + ldr r3, [sp, #24] + ldr r6, [sp, #36] + ldr r7, [sp, #32] @ y + mov r12, #48 + lsl r3, #7 + sub r1, r1, r6 + add r8, r2, r7, lsl #7 + ldr r7, [sp, #40] + +10: + mov r2, r8 + add r4, r0, #24 + mov r5, r6 +1: + vldm r2, {q8-q15} + + subs r5, #96 + + vmovn.u32 d0, q8 + vshrn.u32 d2, q8, #12 + vshrn.u32 d4, q8, #16 @ Cannot vshrn.u32 #20! + + add r2, r3 + + vmovn.u32 d1, q9 + vshrn.u32 d3, q9, #12 + vshrn.u32 d5, q9, #16 + + pld [r2, #0] + + vshrn.u16 d0, q0, #2 + vmovn.u16 d1, q1 + vshrn.u16 d2, q2, #6 + + vmovn.u32 d16, q10 + vshrn.u32 d18, q10, #12 + vshrn.u32 d20, q10, #16 + + vmovn.u32 d17, q11 + vshrn.u32 d19, q11, #12 + vshrn.u32 d21, q11, #16 + + pld [r2, #64] + + vshrn.u16 d4, q8, #2 + vmovn.u16 d5, q9 + vshrn.u16 d6, q10, #6 + + vmovn.u32 d16, q12 + vshrn.u32 d18, q12, #12 + vshrn.u32 d20, q12, #16 + + vmovn.u32 d17, q13 + vshrn.u32 d19, q13, #12 + vshrn.u32 d21, q13, #16 + + vshrn.u16 d16, q8, #2 + vmovn.u16 d17, q9 + vshrn.u16 d18, q10, #6 + + vmovn.u32 d20, q14 + vshrn.u32 d22, q14, #12 + vshrn.u32 d24, q14, #16 + + vmovn.u32 d21, q15 + vshrn.u32 d23, q15, #12 + vshrn.u32 d25, q15, #16 + + vshrn.u16 d20, q10, #2 + vmovn.u16 d21, q11 + vshrn.u16 d22, q12, #6 + + blt 2f + + vst3.8 {d0, d1, d2}, [r0], r12 + vst3.8 {d4, d5, d6}, [r4], r12 + vst3.8 {d16, d17, d18}, [r0], r12 + vst3.8 {d20, d21, d22}, [r4], r12 + + bne 1b + +11: + subs r7, #1 + add r0, r1 + add r8, #128 + bne 10b + + pop {r4-r8, pc} + +@ Partial final write +2: + cmp r5, #48-96 + blt 1f + vst3.8 {d0, d1, d2}, [r0], r12 + vst3.8 {d4, d5, d6}, [r4], r12 + beq 11b + vmov q0, q8 + vmov q2, q10 + sub r5, #48 + vmov d2, d18 + vmov d6, d22 +1: + cmp r5, #24-96 + blt 1f + vst3.8 {d0, d1, d2}, [r0]! + beq 11b + vmov q0, q2 + sub r5, #24 + vmov d2, d6 +1: + cmp r5, #12-96 + blt 1f + vst3.8 {d0[0], d1[0], d2[0]}, [r0]! + vst3.8 {d0[1], d1[1], d2[1]}, [r0]! + vst3.8 {d0[2], d1[2], d2[2]}, [r0]! + vst3.8 {d0[3], d1[3], d2[3]}, [r0]! + beq 11b + vmov s0, s1 + sub r5, #12 + vmov s2, s3 + vmov s4, s5 +1: + cmp r5, #6-96 + blt 1f + vst3.8 {d0[0], d1[0], d2[0]}, [r0]! + vst3.8 {d0[1], d1[1], d2[1]}, [r0]! + add r0, #12 + beq 11b + vshr.u32 d0, #16 + sub r5, #6 + vshr.u32 d1, #16 + vshr.u32 d2, #16 +1: + cmp r5, #3-96 + blt 1f + vst3.8 {d0[0], d1[0], d2[0]}, [r0]! + beq 11b + sub r5, #3 + vshr.u32 d0, #8 + vshr.u32 d1, #8 +1: + cmp r5, #2-96 + blt 1f + vst2.8 {d0[0], d1[0]}, [r0]! + b 11b +1: + vst1.8 {d0[0]}, [r0]! + b 11b + +endfunc + + diff --git a/libavutil/arm/rpi_sand_neon.h b/libavutil/arm/rpi_sand_neon.h new file mode 100644 index 0000000000..d457c10870 --- /dev/null +++ b/libavutil/arm/rpi_sand_neon.h @@ -0,0 +1,110 @@ +/* +Copyright (c) 2020 Raspberry Pi (Trading) Ltd. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Authors: John Cox +*/ + +#ifndef AVUTIL_ARM_SAND_NEON_H +#define AVUTIL_ARM_SAND_NEON_H + +void ff_rpi_sand128b_stripe_to_8_10( + uint8_t * dest, // [r0] + const uint8_t * src1, // [r1] + const uint8_t * src2, // [r2] + unsigned int lines); // [r3] + +void ff_rpi_sand8_lines_to_planar_y8( + uint8_t * dest, // [r0] + unsigned int dst_stride, // [r1] + const uint8_t * src, // [r2] + unsigned int src_stride1, // [r3] Ignored - assumed 128 + unsigned int src_stride2, // [sp, #0] -> r3 + unsigned int _x, // [sp, #4] Ignored - 0 + unsigned int y, // [sp, #8] (r7 in prefix) + unsigned int _w, // [sp, #12] -> r6 (cur r5) + unsigned int h); // [sp, #16] -> r7 + +void ff_rpi_sand8_lines_to_planar_c8( + uint8_t * dst_u, // [r0] + unsigned int dst_stride_u, // [r1] + uint8_t * dst_v, // [r2] + unsigned int dst_stride_v, // [r3] + const uint8_t * src, // [sp, #0] -> r4, r5 + unsigned int stride1, // [sp, #4] 128 + unsigned int stride2, // [sp, #8] -> r8 + unsigned int _x, // [sp, #12] 0 + unsigned int y, // [sp, #16] (r7 in prefix) + unsigned int _w, // [sp, #20] -> r12, r6 + unsigned int h); // [sp, #24] -> r7 + +void ff_rpi_sand30_lines_to_planar_y16( + uint8_t * dest, // [r0] + unsigned int dst_stride, // [r1] + const uint8_t * src, // [r2] + unsigned int src_stride1, // [r3] Ignored - assumed 128 + unsigned int src_stride2, // [sp, #0] -> r3 + unsigned int _x, // [sp, #4] Ignored - 0 + unsigned int y, // [sp, #8] (r7 in prefix) + unsigned int _w, // [sp, #12] -> r6 (cur r5) + unsigned int h); // [sp, #16] -> r7 + +void ff_rpi_sand30_lines_to_planar_c16( + uint8_t * dst_u, // [r0] + unsigned int dst_stride_u, // [r1] + uint8_t * dst_v, // [r2] + unsigned int dst_stride_v, // [r3] + const uint8_t * src, // [sp, #0] -> r4, r5 + unsigned int stride1, // [sp, #4] 128 + unsigned int stride2, // [sp, #8] -> r8 + unsigned int _x, // [sp, #12] 0 + unsigned int y, // [sp, #16] (r7 in prefix) + unsigned int _w, // [sp, #20] -> r6, r9 + unsigned int h); // [sp, #24] -> r7 + +void ff_rpi_sand30_lines_to_planar_p010( + uint8_t * dest, // [r0] + unsigned int dst_stride, // [r1] + const uint8_t * src, // [r2] + unsigned int src_stride1, // [r3] Ignored - assumed 128 + unsigned int src_stride2, // [sp, #0] -> r3 + unsigned int _x, // [sp, #4] Ignored - 0 + unsigned int y, // [sp, #8] (r7 in prefix) + unsigned int _w, // [sp, #12] -> r6 (cur r5) + unsigned int h); // [sp, #16] -> r7 + +void ff_rpi_sand30_lines_to_planar_y8( + uint8_t * dest, // [r0] + unsigned int dst_stride, // [r1] + const uint8_t * src, // [r2] + unsigned int src_stride1, // [r3] Ignored - assumed 128 + unsigned int src_stride2, // [sp, #0] -> r3 + unsigned int _x, // [sp, #4] Ignored - 0 + unsigned int y, // [sp, #8] (r7 in prefix) + unsigned int _w, // [sp, #12] -> r6 (cur r5) + unsigned int h); // [sp, #16] -> r7 + +#endif // AVUTIL_ARM_SAND_NEON_H + diff --git a/libavutil/frame.c b/libavutil/frame.c index 9545477acc..48621e4098 100644 --- a/libavutil/frame.c +++ b/libavutil/frame.c @@ -16,6 +16,8 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#include "config.h" + #include "channel_layout.h" #include "avassert.h" #include "buffer.h" @@ -27,6 +29,9 @@ #include "mem.h" #include "samplefmt.h" #include "hwcontext.h" +#if CONFIG_SAND +#include "rpi_sand_fns.h" +#endif #if FF_API_OLD_CHANNEL_LAYOUT #define CHECK_CHANNELS_CONSISTENCY(frame) \ @@ -874,6 +879,12 @@ int av_frame_apply_cropping(AVFrame *frame, int flags) (frame->crop_top + frame->crop_bottom) >= frame->height) return AVERROR(ERANGE); +#if CONFIG_SAND + // Sand cannot be cropped - do not try + if (av_rpi_is_sand_format(frame->format)) + return 0; +#endif + desc = av_pix_fmt_desc_get(frame->format); if (!desc) return AVERROR_BUG; diff --git a/libavutil/frame.h b/libavutil/frame.h index 2580269549..3a9d323325 100644 --- a/libavutil/frame.h +++ b/libavutil/frame.h @@ -957,6 +957,16 @@ int av_frame_apply_cropping(AVFrame *frame, int flags); */ const char *av_frame_side_data_name(enum AVFrameSideDataType type); + +static inline int av_frame_cropped_width(const AVFrame * const frame) +{ + return frame->width - (frame->crop_left + frame->crop_right); +} +static inline int av_frame_cropped_height(const AVFrame * const frame) +{ + return frame->height - (frame->crop_top + frame->crop_bottom); +} + /** * @} */ diff --git a/libavutil/hwcontext_drm.c b/libavutil/hwcontext_drm.c index 7a9fdbd263..137a952d2c 100644 --- a/libavutil/hwcontext_drm.c +++ b/libavutil/hwcontext_drm.c @@ -21,6 +21,7 @@ #include #include #include +#include /* This was introduced in version 4.6. And may not exist all without an * optional package. So to prevent a hard dependency on needing the Linux @@ -31,6 +32,7 @@ #endif #include +#include #include #include "avassert.h" @@ -38,7 +40,9 @@ #include "hwcontext_drm.h" #include "hwcontext_internal.h" #include "imgutils.h" - +#if CONFIG_SAND +#include "libavutil/rpi_sand_fns.h" +#endif static void drm_device_free(AVHWDeviceContext *hwdev) { @@ -53,6 +57,11 @@ static int drm_device_create(AVHWDeviceContext *hwdev, const char *device, AVDRMDeviceContext *hwctx = hwdev->hwctx; drmVersionPtr version; + if (device == NULL) { + hwctx->fd = -1; + return 0; + } + hwctx->fd = open(device, O_RDWR); if (hwctx->fd < 0) return AVERROR(errno); @@ -139,6 +148,8 @@ static int drm_map_frame(AVHWFramesContext *hwfc, if (flags & AV_HWFRAME_MAP_WRITE) mmap_prot |= PROT_WRITE; + if (dst->format == AV_PIX_FMT_NONE) + dst->format = hwfc->sw_format; #if HAVE_LINUX_DMA_BUF_H if (flags & AV_HWFRAME_MAP_READ) map->sync_flags |= DMA_BUF_SYNC_READ; @@ -185,6 +196,23 @@ static int drm_map_frame(AVHWFramesContext *hwfc, dst->width = src->width; dst->height = src->height; + dst->crop_top = src->crop_top; + dst->crop_bottom = src->crop_bottom; + dst->crop_left = src->crop_left; + dst->crop_right = src->crop_right; + +#if CONFIG_SAND + // Rework for sand frames + if (av_rpi_is_sand_frame(dst)) { + // As it stands the sand formats hold stride2 in linesize[3] + // linesize[0] & [1] contain stride1 which is always 128 for everything we do + // * Arguably this should be reworked s.t. stride2 is in linesize[0] & [1] + dst->linesize[3] = fourcc_mod_broadcom_param(desc->objects[0].format_modifier); + dst->linesize[0] = 128; + dst->linesize[1] = 128; + // *** Are we sure src->height is actually what we want ??? + } +#endif err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src, &drm_unmap_frame, map); @@ -206,16 +234,29 @@ static int drm_transfer_get_formats(AVHWFramesContext *ctx, enum AVHWFrameTransferDirection dir, enum AVPixelFormat **formats) { - enum AVPixelFormat *pix_fmts; + enum AVPixelFormat *p; - pix_fmts = av_malloc_array(2, sizeof(*pix_fmts)); - if (!pix_fmts) + p = *formats = av_malloc_array(3, sizeof(*p)); + if (!p) return AVERROR(ENOMEM); - pix_fmts[0] = ctx->sw_format; - pix_fmts[1] = AV_PIX_FMT_NONE; + // **** Offer native sand too ???? + *p++ = +#if CONFIG_SAND + ctx->sw_format == AV_PIX_FMT_RPI4_8 || ctx->sw_format == AV_PIX_FMT_SAND128 ? + AV_PIX_FMT_YUV420P : + ctx->sw_format == AV_PIX_FMT_RPI4_10 ? + AV_PIX_FMT_YUV420P10LE : +#endif + ctx->sw_format; + +#if CONFIG_SAND + if (ctx->sw_format == AV_PIX_FMT_RPI4_10 || + ctx->sw_format == AV_PIX_FMT_RPI4_8 || ctx->sw_format == AV_PIX_FMT_SAND128) + *p++ = AV_PIX_FMT_NV12; +#endif - *formats = pix_fmts; + *p = AV_PIX_FMT_NONE; return 0; } @@ -231,18 +272,62 @@ static int drm_transfer_data_from(AVHWFramesContext *hwfc, map = av_frame_alloc(); if (!map) return AVERROR(ENOMEM); - map->format = dst->format; + // Map to default + map->format = AV_PIX_FMT_NONE; err = drm_map_frame(hwfc, map, src, AV_HWFRAME_MAP_READ); if (err) goto fail; - map->width = dst->width; - map->height = dst->height; +#if 0 + av_log(hwfc, AV_LOG_INFO, "%s: src fmt=%d (%d), dst fmt=%d (%d) s=%dx%d l=%d/%d/%d/%d, d=%dx%d l=%d/%d/%d\n", __func__, + hwfc->sw_format, AV_PIX_FMT_RPI4_8, dst->format, AV_PIX_FMT_YUV420P10LE, + map->width, map->height, + map->linesize[0], + map->linesize[1], + map->linesize[2], + map->linesize[3], + dst->width, dst->height, + dst->linesize[0], + dst->linesize[1], + dst->linesize[2]); +#endif +#if CONFIG_SAND + if (av_rpi_is_sand_frame(map)) { + // Preserve crop - later ffmpeg code assumes that we have in that it + // overwrites any crop that we create with the old values + const unsigned int w = FFMIN(dst->width, map->width); + const unsigned int h = FFMIN(dst->height, map->height); + + map->crop_top = 0; + map->crop_bottom = 0; + map->crop_left = 0; + map->crop_right = 0; + + if (av_rpi_sand_to_planar_frame(dst, map) != 0) + { + av_log(hwfc, AV_LOG_ERROR, "%s: Incompatible output pixfmt for sand\n", __func__); + err = AVERROR(EINVAL); + goto fail; + } + + dst->width = w; + dst->height = h; + } + else +#endif + { + // Kludge mapped h/w s.t. frame_copy works + map->width = dst->width; + map->height = dst->height; + err = av_frame_copy(dst, map); + } - err = av_frame_copy(dst, map); if (err) + { + av_log(hwfc, AV_LOG_ERROR, "%s: Copy fail\n", __func__); goto fail; + } err = 0; fail: @@ -257,7 +342,10 @@ static int drm_transfer_data_to(AVHWFramesContext *hwfc, int err; if (src->width > hwfc->width || src->height > hwfc->height) + { + av_log(hwfc, AV_LOG_ERROR, "%s: H/w mismatch: %d/%d, %d/%d\n", __func__, dst->width, hwfc->width, dst->height, hwfc->height); return AVERROR(EINVAL); + } map = av_frame_alloc(); if (!map) diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index 2a9b5f4aac..11e7945f18 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -57,6 +57,14 @@ #define CHECK_CU(x) FF_CUDA_CHECK_DL(cuda_cu, cu, x) #endif +// Sometimes missing definitions +#ifndef VK_EXT_VIDEO_DECODE_H264_EXTENSION_NAME +#define VK_EXT_VIDEO_DECODE_H264_EXTENSION_NAME "VK_EXT_video_decode_h264" +#endif +#ifndef VK_EXT_VIDEO_DECODE_H265_EXTENSION_NAME +#define VK_EXT_VIDEO_DECODE_H265_EXTENSION_NAME "VK_EXT_video_decode_h265" +#endif + typedef struct VulkanQueueCtx { VkFence fence; VkQueue queue; diff --git a/libavutil/pixdesc.c b/libavutil/pixdesc.c index 62a2ae08d9..cb73521ea7 100644 --- a/libavutil/pixdesc.c +++ b/libavutil/pixdesc.c @@ -2717,6 +2717,50 @@ static const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = { .flags = AV_PIX_FMT_FLAG_RGB | AV_PIX_FMT_FLAG_FLOAT | AV_PIX_FMT_FLAG_ALPHA, }, + [AV_PIX_FMT_SAND128] = { + .name = "sand128", + .nb_components = 3, + .log2_chroma_w = 1, + .log2_chroma_h = 1, + .comp = { + { 0, 1, 0, 0, 8 }, /* Y */ + { 1, 2, 0, 0, 8 }, /* U */ + { 1, 2, 1, 0, 8 }, /* V */ + }, + .flags = 0, + }, + [AV_PIX_FMT_SAND64_10] = { + .name = "sand64_10", + .nb_components = 3, + .log2_chroma_w = 1, + .log2_chroma_h = 1, + .comp = { + { 0, 2, 0, 0, 10 }, /* Y */ + { 1, 4, 0, 0, 10 }, /* U */ + { 1, 4, 2, 0, 10 }, /* V */ + }, + .flags = 0, + }, + [AV_PIX_FMT_SAND64_16] = { + .name = "sand64_16", + .nb_components = 3, + .log2_chroma_w = 1, + .log2_chroma_h = 1, + .comp = { + { 0, 2, 0, 0, 16 }, /* Y */ + { 1, 4, 0, 0, 16 }, /* U */ + { 1, 4, 2, 0, 16 }, /* V */ + }, + .flags = 0, + }, + [AV_PIX_FMT_RPI4_8] = { + .name = "rpi4_8", + .flags = AV_PIX_FMT_FLAG_HWACCEL, + }, + [AV_PIX_FMT_RPI4_10] = { + .name = "rpi4_10", + .flags = AV_PIX_FMT_FLAG_HWACCEL, + }, }; static const char * const color_range_names[] = { diff --git a/libavutil/pixfmt.h b/libavutil/pixfmt.h index 37c2c79e01..5cc780e7d5 100644 --- a/libavutil/pixfmt.h +++ b/libavutil/pixfmt.h @@ -377,6 +377,14 @@ enum AVPixelFormat { AV_PIX_FMT_Y210BE, ///< packed YUV 4:2:2 like YUYV422, 20bpp, data in the high bits, big-endian AV_PIX_FMT_Y210LE, ///< packed YUV 4:2:2 like YUYV422, 20bpp, data in the high bits, little-endian +// RPI - not on ifdef so can be got at by calling progs +// #define so code that uses this can know it is there +#define AVUTIL_HAVE_PIX_FMT_SAND 1 + AV_PIX_FMT_SAND128, ///< 4:2:0 8-bit 128x*Y stripe, 64x*UV stripe, then next x stripe, mysterious padding + AV_PIX_FMT_SAND64_10, ///< 4:2:0 10-bit 64x*Y stripe, 32x*UV stripe, then next x stripe, mysterious padding + AV_PIX_FMT_SAND64_16, ///< 4:2:0 16-bit 64x*Y stripe, 32x*UV stripe, then next x stripe, mysterious padding + AV_PIX_FMT_RPI4_8, + AV_PIX_FMT_RPI4_10, AV_PIX_FMT_X2RGB10LE, ///< packed RGB 10:10:10, 30bpp, (msb)2X 10R 10G 10B(lsb), little-endian, X=unused/undefined AV_PIX_FMT_X2RGB10BE, ///< packed RGB 10:10:10, 30bpp, (msb)2X 10R 10G 10B(lsb), big-endian, X=unused/undefined diff --git a/libavutil/rpi_sand_fn_pw.h b/libavutil/rpi_sand_fn_pw.h new file mode 100644 index 0000000000..0d5d203dc3 --- /dev/null +++ b/libavutil/rpi_sand_fn_pw.h @@ -0,0 +1,227 @@ +/* +Copyright (c) 2018 Raspberry Pi (Trading) Ltd. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Authors: John Cox +*/ + +// * Included twice from rpi_sand_fn with different PW + +#define STRCAT(x,y) x##y + +#if PW == 1 +#define pixel uint8_t +#define FUNC(f) STRCAT(f, 8) +#elif PW == 2 +#define pixel uint16_t +#define FUNC(f) STRCAT(f, 16) +#else +#error Unexpected PW +#endif + +// Fetches a single patch - offscreen fixup not done here +// w <= stride1 +// unclipped +void FUNC(av_rpi_sand_to_planar_y)(uint8_t * dst, const unsigned int dst_stride, + const uint8_t * src, + unsigned int stride1, unsigned int stride2, + unsigned int _x, unsigned int y, + unsigned int _w, unsigned int h) +{ + const unsigned int x = _x; + const unsigned int w = _w; + const unsigned int mask = stride1 - 1; + +#if PW == 1 && HAVE_SAND_ASM + if (_x == 0) { + ff_rpi_sand8_lines_to_planar_y8(dst, dst_stride, + src, stride1, stride2, _x, y, _w, h); + return; + } +#endif + + if ((x & ~mask) == ((x + w) & ~mask)) { + // All in one sand stripe + const uint8_t * p = src + (x & mask) + y * stride1 + (x & ~mask) * stride2; + for (unsigned int i = 0; i != h; ++i, dst += dst_stride, p += stride1) { + memcpy(dst, p, w); + } + } + else + { + // Two+ stripe + const unsigned int sstride = stride1 * stride2; + const uint8_t * p1 = src + (x & mask) + y * stride1 + (x & ~mask) * stride2; + const uint8_t * p2 = p1 + sstride - (x & mask); + const unsigned int w1 = stride1 - (x & mask); + const unsigned int w3 = (x + w) & mask; + const unsigned int w2 = w - (w1 + w3); + + for (unsigned int i = 0; i != h; ++i, dst += dst_stride, p1 += stride1, p2 += stride1) { + unsigned int j; + const uint8_t * p = p2; + uint8_t * d = dst; + memcpy(d, p1, w1); + d += w1; + for (j = 0; j < w2; j += stride1, d += stride1, p += sstride) { + memcpy(d, p, stride1); + } + memcpy(d, p, w3); + } + } +} + +// x & w in bytes but not of interleave (i.e. offset = x*2 for U&V) + +void FUNC(av_rpi_sand_to_planar_c)(uint8_t * dst_u, const unsigned int dst_stride_u, + uint8_t * dst_v, const unsigned int dst_stride_v, + const uint8_t * src, + unsigned int stride1, unsigned int stride2, + unsigned int _x, unsigned int y, + unsigned int _w, unsigned int h) +{ + const unsigned int x = _x * 2; + const unsigned int w = _w * 2; + const unsigned int mask = stride1 - 1; + +#if PW == 1 && HAVE_SAND_ASM + if (_x == 0) { + ff_rpi_sand8_lines_to_planar_c8(dst_u, dst_stride_u, dst_v, dst_stride_v, + src, stride1, stride2, _x, y, _w, h); + return; + } +#endif + + if ((x & ~mask) == ((x + w) & ~mask)) { + // All in one sand stripe + const uint8_t * p1 = src + (x & mask) + y * stride1 + (x & ~mask) * stride2; + for (unsigned int i = 0; i != h; ++i, dst_u += dst_stride_u, dst_v += dst_stride_v, p1 += stride1) { + pixel * du = (pixel *)dst_u; + pixel * dv = (pixel *)dst_v; + const pixel * p = (const pixel *)p1; + for (unsigned int k = 0; k < w; k += 2 * PW) { + *du++ = *p++; + *dv++ = *p++; + } + } + } + else + { + // Two+ stripe + const unsigned int sstride = stride1 * stride2; + const unsigned int sstride_p = (sstride - stride1) / PW; + + const uint8_t * p1 = src + (x & mask) + y * stride1 + (x & ~mask) * stride2; + const uint8_t * p2 = p1 + sstride - (x & mask); + const unsigned int w1 = stride1 - (x & mask); + const unsigned int w3 = (x + w) & mask; + const unsigned int w2 = w - (w1 + w3); + + for (unsigned int i = 0; i != h; ++i, dst_u += dst_stride_u, dst_v += dst_stride_v, p1 += stride1, p2 += stride1) { + unsigned int j; + const pixel * p = (const pixel *)p1; + pixel * du = (pixel *)dst_u; + pixel * dv = (pixel *)dst_v; + for (unsigned int k = 0; k < w1; k += 2 * PW) { + *du++ = *p++; + *dv++ = *p++; + } + for (j = 0, p = (const pixel *)p2; j < w2; j += stride1, p += sstride_p) { + for (unsigned int k = 0; k < stride1; k += 2 * PW) { + *du++ = *p++; + *dv++ = *p++; + } + } + for (unsigned int k = 0; k < w3; k += 2 * PW) { + *du++ = *p++; + *dv++ = *p++; + } + } + } +} + +void FUNC(av_rpi_planar_to_sand_c)(uint8_t * dst_c, + unsigned int stride1, unsigned int stride2, + const uint8_t * src_u, const unsigned int src_stride_u, + const uint8_t * src_v, const unsigned int src_stride_v, + unsigned int _x, unsigned int y, + unsigned int _w, unsigned int h) +{ + const unsigned int x = _x * 2; + const unsigned int w = _w * 2; + const unsigned int mask = stride1 - 1; + if ((x & ~mask) == ((x + w) & ~mask)) { + // All in one sand stripe + uint8_t * p1 = dst_c + (x & mask) + y * stride1 + (x & ~mask) * stride2; + for (unsigned int i = 0; i != h; ++i, src_u += src_stride_u, src_v += src_stride_v, p1 += stride1) { + const pixel * su = (const pixel *)src_u; + const pixel * sv = (const pixel *)src_v; + pixel * p = (pixel *)p1; + for (unsigned int k = 0; k < w; k += 2 * PW) { + *p++ = *su++; + *p++ = *sv++; + } + } + } + else + { + // Two+ stripe + const unsigned int sstride = stride1 * stride2; + const unsigned int sstride_p = (sstride - stride1) / PW; + + const uint8_t * p1 = dst_c + (x & mask) + y * stride1 + (x & ~mask) * stride2; + const uint8_t * p2 = p1 + sstride - (x & mask); + const unsigned int w1 = stride1 - (x & mask); + const unsigned int w3 = (x + w) & mask; + const unsigned int w2 = w - (w1 + w3); + + for (unsigned int i = 0; i != h; ++i, src_u += src_stride_u, src_v += src_stride_v, p1 += stride1, p2 += stride1) { + unsigned int j; + const pixel * su = (const pixel *)src_u; + const pixel * sv = (const pixel *)src_v; + pixel * p = (pixel *)p1; + for (unsigned int k = 0; k < w1; k += 2 * PW) { + *p++ = *su++; + *p++ = *sv++; + } + for (j = 0, p = (pixel *)p2; j < w2; j += stride1, p += sstride_p) { + for (unsigned int k = 0; k < stride1; k += 2 * PW) { + *p++ = *su++; + *p++ = *sv++; + } + } + for (unsigned int k = 0; k < w3; k += 2 * PW) { + *p++ = *su++; + *p++ = *sv++; + } + } + } +} + + +#undef pixel +#undef STRCAT +#undef FUNC + diff --git a/libavutil/rpi_sand_fns.c b/libavutil/rpi_sand_fns.c new file mode 100644 index 0000000000..b6071e2928 --- /dev/null +++ b/libavutil/rpi_sand_fns.c @@ -0,0 +1,445 @@ +/* +Copyright (c) 2018 Raspberry Pi (Trading) Ltd. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Authors: John Cox +*/ + +#include "config.h" +#include +#include +#include "rpi_sand_fns.h" +#include "avassert.h" +#include "frame.h" + +#if ARCH_ARM && HAVE_NEON +#include "arm/rpi_sand_neon.h" +#define HAVE_SAND_ASM 1 +#elif ARCH_AARCH64 && HAVE_NEON +#include "aarch64/rpi_sand_neon.h" +#define HAVE_SAND_ASM 1 +#else +#define HAVE_SAND_ASM 0 +#endif + +#define PW 1 +#include "rpi_sand_fn_pw.h" +#undef PW + +#define PW 2 +#include "rpi_sand_fn_pw.h" +#undef PW + +#if 1 +// Simple round +static void cpy16_to_8(uint8_t * dst, const uint8_t * _src, unsigned int n, const unsigned int shr) +{ + const unsigned int rnd = (1 << shr) >> 1; + const uint16_t * src = (const uint16_t *)_src; + + for (; n != 0; --n) { + *dst++ = (*src++ + rnd) >> shr; + } +} +#else +// Dithered variation +static void cpy16_to_8(uint8_t * dst, const uint8_t * _src, unsigned int n, const unsigned int shr) +{ + unsigned int rnd = (1 << shr) >> 1; + const unsigned int mask = ((1 << shr) - 1); + const uint16_t * src = (const uint16_t *)_src; + + for (; n != 0; --n) { + rnd = *src++ + (rnd & mask); + *dst++ = rnd >> shr; + } +} +#endif + +// Fetches a single patch - offscreen fixup not done here +// w <= stride1 +// unclipped +// _x & _w in pixels, strides in bytes +void av_rpi_sand30_to_planar_y16(uint8_t * dst, const unsigned int dst_stride, + const uint8_t * src, + unsigned int stride1, unsigned int stride2, + unsigned int _x, unsigned int y, + unsigned int _w, unsigned int h) +{ + const unsigned int x0 = (_x / 3) * 4; // Byte offset of the word + const unsigned int xskip0 = _x - (x0 >> 2) * 3; + const unsigned int x1 = ((_x + _w) / 3) * 4; + const unsigned int xrem1 = _x + _w - (x1 >> 2) * 3; + const unsigned int mask = stride1 - 1; + const uint8_t * p0 = src + (x0 & mask) + y * stride1 + (x0 & ~mask) * stride2; + const unsigned int slice_inc = ((stride2 - 1) * stride1) >> 2; // RHS of a stripe to LHS of next in words + +#if HAVE_SAND_ASM + if (_x == 0) { + ff_rpi_sand30_lines_to_planar_y16(dst, dst_stride, src, stride1, stride2, _x, y, _w, h); + return; + } +#endif + + if (x0 == x1) { + // ******************* + // Partial single word xfer + return; + } + + for (unsigned int i = 0; i != h; ++i, dst += dst_stride, p0 += stride1) + { + unsigned int x = x0; + const uint32_t * p = (const uint32_t *)p0; + uint16_t * d = (uint16_t *)dst; + + if (xskip0 != 0) { + const uint32_t p3 = *p++; + + if (xskip0 == 1) + *d++ = (p3 >> 10) & 0x3ff; + *d++ = (p3 >> 20) & 0x3ff; + + if (((x += 4) & mask) == 0) + p += slice_inc; + } + + while (x != x1) { + const uint32_t p3 = *p++; + *d++ = p3 & 0x3ff; + *d++ = (p3 >> 10) & 0x3ff; + *d++ = (p3 >> 20) & 0x3ff; + + if (((x += 4) & mask) == 0) + p += slice_inc; + } + + if (xrem1 != 0) { + const uint32_t p3 = *p; + + *d++ = p3 & 0x3ff; + if (xrem1 == 2) + *d++ = (p3 >> 10) & 0x3ff; + } + } +} + + +void av_rpi_sand30_to_planar_c16(uint8_t * dst_u, const unsigned int dst_stride_u, + uint8_t * dst_v, const unsigned int dst_stride_v, + const uint8_t * src, + unsigned int stride1, unsigned int stride2, + unsigned int _x, unsigned int y, + unsigned int _w, unsigned int h) +{ + const unsigned int x0 = (_x / 3) * 8; // Byte offset of the word + const unsigned int xskip0 = _x - (x0 >> 3) * 3; + const unsigned int x1 = ((_x + _w) / 3) * 8; + const unsigned int xrem1 = _x + _w - (x1 >> 3) * 3; + const unsigned int mask = stride1 - 1; + const uint8_t * p0 = src + (x0 & mask) + y * stride1 + (x0 & ~mask) * stride2; + const unsigned int slice_inc = ((stride2 - 1) * stride1) >> 2; // RHS of a stripe to LHS of next in words + +#if HAVE_SAND_ASM + if (_x == 0) { + ff_rpi_sand30_lines_to_planar_c16(dst_u, dst_stride_u, dst_v, dst_stride_v, + src, stride1, stride2, _x, y, _w, h); + return; + } +#endif + + if (x0 == x1) { + // ******************* + // Partial single word xfer + return; + } + + for (unsigned int i = 0; i != h; ++i, dst_u += dst_stride_u, dst_v += dst_stride_v, p0 += stride1) + { + unsigned int x = x0; + const uint32_t * p = (const uint32_t *)p0; + uint16_t * du = (uint16_t *)dst_u; + uint16_t * dv = (uint16_t *)dst_v; + + if (xskip0 != 0) { + const uint32_t p3a = *p++; + const uint32_t p3b = *p++; + + if (xskip0 == 1) + { + *du++ = (p3a >> 20) & 0x3ff; + *dv++ = (p3b >> 0) & 0x3ff; + } + *du++ = (p3b >> 10) & 0x3ff; + *dv++ = (p3b >> 20) & 0x3ff; + + if (((x += 8) & mask) == 0) + p += slice_inc; + } + + while (x != x1) { + const uint32_t p3a = *p++; + const uint32_t p3b = *p++; + + *du++ = p3a & 0x3ff; + *dv++ = (p3a >> 10) & 0x3ff; + *du++ = (p3a >> 20) & 0x3ff; + *dv++ = p3b & 0x3ff; + *du++ = (p3b >> 10) & 0x3ff; + *dv++ = (p3b >> 20) & 0x3ff; + + if (((x += 8) & mask) == 0) + p += slice_inc; + } + + if (xrem1 != 0) { + const uint32_t p3a = *p++; + const uint32_t p3b = *p++; + + *du++ = p3a & 0x3ff; + *dv++ = (p3a >> 10) & 0x3ff; + if (xrem1 == 2) + { + *du++ = (p3a >> 20) & 0x3ff; + *dv++ = p3b & 0x3ff; + } + } + } +} + +// Fetches a single patch - offscreen fixup not done here +// w <= stride1 +// single lose bottom 2 bits truncation +// _x & _w in pixels, strides in bytes +void av_rpi_sand30_to_planar_y8(uint8_t * dst, const unsigned int dst_stride, + const uint8_t * src, + unsigned int stride1, unsigned int stride2, + unsigned int _x, unsigned int y, + unsigned int _w, unsigned int h) +{ + const unsigned int x0 = (_x / 3) * 4; // Byte offset of the word + const unsigned int xskip0 = _x - (x0 >> 2) * 3; + const unsigned int x1 = ((_x + _w) / 3) * 4; + const unsigned int xrem1 = _x + _w - (x1 >> 2) * 3; + const unsigned int mask = stride1 - 1; + const uint8_t * p0 = src + (x0 & mask) + y * stride1 + (x0 & ~mask) * stride2; + const unsigned int slice_inc = ((stride2 - 1) * stride1) >> 2; // RHS of a stripe to LHS of next in words + +#if HAVE_SAND_ASM + if (_x == 0) { + ff_rpi_sand30_lines_to_planar_y8(dst, dst_stride, src, stride1, stride2, _x, y, _w, h); + return; + } +#endif + + if (x0 == x1) { + // ******************* + // Partial single word xfer + return; + } + + for (unsigned int i = 0; i != h; ++i, dst += dst_stride, p0 += stride1) + { + unsigned int x = x0; + const uint32_t * p = (const uint32_t *)p0; + uint8_t * d = dst; + + if (xskip0 != 0) { + const uint32_t p3 = *p++; + + if (xskip0 == 1) + *d++ = (p3 >> 12) & 0xff; + *d++ = (p3 >> 22) & 0xff; + + if (((x += 4) & mask) == 0) + p += slice_inc; + } + + while (x != x1) { + const uint32_t p3 = *p++; + *d++ = (p3 >> 2) & 0xff; + *d++ = (p3 >> 12) & 0xff; + *d++ = (p3 >> 22) & 0xff; + + if (((x += 4) & mask) == 0) + p += slice_inc; + } + + if (xrem1 != 0) { + const uint32_t p3 = *p; + + *d++ = (p3 >> 2) & 0xff; + if (xrem1 == 2) + *d++ = (p3 >> 12) & 0xff; + } + } +} + + + +// w/h in pixels +void av_rpi_sand16_to_sand8(uint8_t * dst, const unsigned int dst_stride1, const unsigned int dst_stride2, + const uint8_t * src, const unsigned int src_stride1, const unsigned int src_stride2, + unsigned int w, unsigned int h, const unsigned int shr) +{ + const unsigned int n = dst_stride1 / 2; + unsigned int j; + + // This is true for our current layouts + av_assert0(dst_stride1 == src_stride1); + + // As we have the same stride1 for src & dest and src is wider than dest + // then if we loop on src we can always write contiguously to dest + // We make no effort to copy an exact width - round up to nearest src stripe + // as we will always have storage in dest for that + +#if ARCH_ARM && HAVE_NEON + if (shr == 3 && src_stride1 == 128) { + for (j = 0; j + n < w; j += dst_stride1) { + uint8_t * d = dst + j * dst_stride2; + const uint8_t * s1 = src + j * 2 * src_stride2; + const uint8_t * s2 = s1 + src_stride1 * src_stride2; + + ff_rpi_sand128b_stripe_to_8_10(d, s1, s2, h); + } + } + else +#endif + { + for (j = 0; j + n < w; j += dst_stride1) { + uint8_t * d = dst + j * dst_stride2; + const uint8_t * s1 = src + j * 2 * src_stride2; + const uint8_t * s2 = s1 + src_stride1 * src_stride2; + + for (unsigned int i = 0; i != h; ++i, s1 += src_stride1, s2 += src_stride1, d += dst_stride1) { + cpy16_to_8(d, s1, n, shr); + cpy16_to_8(d + n, s2, n, shr); + } + } + } + + // Fix up a trailing dest half stripe + if (j < w) { + uint8_t * d = dst + j * dst_stride2; + const uint8_t * s1 = src + j * 2 * src_stride2; + + for (unsigned int i = 0; i != h; ++i, s1 += src_stride1, d += dst_stride1) { + cpy16_to_8(d, s1, n, shr); + } + } +} + +int av_rpi_sand_to_planar_frame(AVFrame * const dst, const AVFrame * const src) +{ + const int w = av_frame_cropped_width(src); + const int h = av_frame_cropped_height(src); + const int x = src->crop_left; + const int y = src->crop_top; + + // We will crop as part of the conversion + dst->crop_top = 0; + dst->crop_left = 0; + dst->crop_bottom = 0; + dst->crop_right = 0; + + switch (src->format){ + case AV_PIX_FMT_SAND128: + case AV_PIX_FMT_RPI4_8: + switch (dst->format){ + case AV_PIX_FMT_YUV420P: + av_rpi_sand_to_planar_y8(dst->data[0], dst->linesize[0], + src->data[0], + av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), + x, y, w, h); + av_rpi_sand_to_planar_c8(dst->data[1], dst->linesize[1], + dst->data[2], dst->linesize[2], + src->data[1], + av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), + x/2, y/2, w/2, h/2); + break; + case AV_PIX_FMT_NV12: + av_rpi_sand_to_planar_y8(dst->data[0], dst->linesize[0], + src->data[0], + av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), + x, y, w, h); + av_rpi_sand_to_planar_y8(dst->data[1], dst->linesize[1], + src->data[1], + av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), + x/2, y/2, w, h/2); + break; + default: + return -1; + } + break; + case AV_PIX_FMT_SAND64_10: + switch (dst->format){ + case AV_PIX_FMT_YUV420P10: + av_rpi_sand_to_planar_y16(dst->data[0], dst->linesize[0], + src->data[0], + av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), + x*2, y, w*2, h); + av_rpi_sand_to_planar_c16(dst->data[1], dst->linesize[1], + dst->data[2], dst->linesize[2], + src->data[1], + av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), + x, y/2, w, h/2); + break; + default: + return -1; + } + break; + case AV_PIX_FMT_RPI4_10: + switch (dst->format){ + case AV_PIX_FMT_YUV420P10: + av_rpi_sand30_to_planar_y16(dst->data[0], dst->linesize[0], + src->data[0], + av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), + x, y, w, h); + av_rpi_sand30_to_planar_c16(dst->data[1], dst->linesize[1], + dst->data[2], dst->linesize[2], + src->data[1], + av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), + x/2, y/2, w/2, h/2); + break; + case AV_PIX_FMT_NV12: + av_rpi_sand30_to_planar_y8(dst->data[0], dst->linesize[0], + src->data[0], + av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), + x, y, w, h); + av_rpi_sand30_to_planar_y8(dst->data[1], dst->linesize[1], + src->data[1], + av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), + x/2, y/2, w, h/2); + break; + default: + return -1; + } + break; + default: + return -1; + } + + return av_frame_copy_props(dst, src); +} diff --git a/libavutil/rpi_sand_fns.h b/libavutil/rpi_sand_fns.h new file mode 100644 index 0000000000..462ccb8abd --- /dev/null +++ b/libavutil/rpi_sand_fns.h @@ -0,0 +1,188 @@ +/* +Copyright (c) 2018 Raspberry Pi (Trading) Ltd. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Authors: John Cox +*/ + +#ifndef AVUTIL_RPI_SAND_FNS +#define AVUTIL_RPI_SAND_FNS + +#include "libavutil/frame.h" + +// For all these fns _x & _w are measured as coord * PW +// For the C fns coords are in chroma pels (so luma / 2) +// Strides are in bytes + +void av_rpi_sand_to_planar_y8(uint8_t * dst, const unsigned int dst_stride, + const uint8_t * src, + unsigned int stride1, unsigned int stride2, + unsigned int _x, unsigned int y, + unsigned int _w, unsigned int h); +void av_rpi_sand_to_planar_y16(uint8_t * dst, const unsigned int dst_stride, + const uint8_t * src, + unsigned int stride1, unsigned int stride2, + unsigned int _x, unsigned int y, + unsigned int _w, unsigned int h); + +void av_rpi_sand_to_planar_c8(uint8_t * dst_u, const unsigned int dst_stride_u, + uint8_t * dst_v, const unsigned int dst_stride_v, + const uint8_t * src, + unsigned int stride1, unsigned int stride2, + unsigned int _x, unsigned int y, + unsigned int _w, unsigned int h); +void av_rpi_sand_to_planar_c16(uint8_t * dst_u, const unsigned int dst_stride_u, + uint8_t * dst_v, const unsigned int dst_stride_v, + const uint8_t * src, + unsigned int stride1, unsigned int stride2, + unsigned int _x, unsigned int y, + unsigned int _w, unsigned int h); + +void av_rpi_planar_to_sand_c8(uint8_t * dst_c, + unsigned int stride1, unsigned int stride2, + const uint8_t * src_u, const unsigned int src_stride_u, + const uint8_t * src_v, const unsigned int src_stride_v, + unsigned int _x, unsigned int y, + unsigned int _w, unsigned int h); +void av_rpi_planar_to_sand_c16(uint8_t * dst_c, + unsigned int stride1, unsigned int stride2, + const uint8_t * src_u, const unsigned int src_stride_u, + const uint8_t * src_v, const unsigned int src_stride_v, + unsigned int _x, unsigned int y, + unsigned int _w, unsigned int h); + +void av_rpi_sand30_to_planar_y16(uint8_t * dst, const unsigned int dst_stride, + const uint8_t * src, + unsigned int stride1, unsigned int stride2, + unsigned int _x, unsigned int y, + unsigned int _w, unsigned int h); +void av_rpi_sand30_to_planar_c16(uint8_t * dst_u, const unsigned int dst_stride_u, + uint8_t * dst_v, const unsigned int dst_stride_v, + const uint8_t * src, + unsigned int stride1, unsigned int stride2, + unsigned int _x, unsigned int y, + unsigned int _w, unsigned int h); + +void av_rpi_sand30_to_planar_y8(uint8_t * dst, const unsigned int dst_stride, + const uint8_t * src, + unsigned int stride1, unsigned int stride2, + unsigned int _x, unsigned int y, + unsigned int _w, unsigned int h); + +// w/h in pixels +void av_rpi_sand16_to_sand8(uint8_t * dst, const unsigned int dst_stride1, const unsigned int dst_stride2, + const uint8_t * src, const unsigned int src_stride1, const unsigned int src_stride2, + unsigned int w, unsigned int h, const unsigned int shr); + + +// dst must contain required pixel format & allocated data buffers +// Cropping on the src buffer will be honoured and dst crop will be set to zero +int av_rpi_sand_to_planar_frame(AVFrame * const dst, const AVFrame * const src); + + +static inline unsigned int av_rpi_sand_frame_stride1(const AVFrame * const frame) +{ +#ifdef RPI_ZC_SAND128_ONLY + // If we are sure we only only support 128 byte sand formats replace the + // var with a constant which should allow for better optimisation + return 128; +#else + return frame->linesize[0]; +#endif +} + +static inline unsigned int av_rpi_sand_frame_stride2(const AVFrame * const frame) +{ + return frame->linesize[3]; +} + + +static inline int av_rpi_is_sand_format(const int format) +{ + return (format >= AV_PIX_FMT_SAND128 && format <= AV_PIX_FMT_RPI4_10); +} + +static inline int av_rpi_is_sand_frame(const AVFrame * const frame) +{ + return av_rpi_is_sand_format(frame->format); +} + +static inline int av_rpi_is_sand8_frame(const AVFrame * const frame) +{ + return (frame->format == AV_PIX_FMT_SAND128 || frame->format == AV_PIX_FMT_RPI4_8); +} + +static inline int av_rpi_is_sand16_frame(const AVFrame * const frame) +{ + return (frame->format >= AV_PIX_FMT_SAND64_10 && frame->format <= AV_PIX_FMT_SAND64_16); +} + +static inline int av_rpi_is_sand30_frame(const AVFrame * const frame) +{ + return (frame->format == AV_PIX_FMT_RPI4_10); +} + +static inline int av_rpi_sand_frame_xshl(const AVFrame * const frame) +{ + return av_rpi_is_sand8_frame(frame) ? 0 : 1; +} + +// If x is measured in bytes (not pixels) then this works for sand64_16 as +// well as sand128 - but in the general case we work that out + +static inline unsigned int av_rpi_sand_frame_off_y(const AVFrame * const frame, const unsigned int x_y, const unsigned int y) +{ + const unsigned int stride1 = av_rpi_sand_frame_stride1(frame); + const unsigned int stride2 = av_rpi_sand_frame_stride2(frame); + const unsigned int x = x_y << av_rpi_sand_frame_xshl(frame); + const unsigned int x1 = x & (stride1 - 1); + const unsigned int x2 = x ^ x1; + + return x1 + stride1 * y + stride2 * x2; +} + +static inline unsigned int av_rpi_sand_frame_off_c(const AVFrame * const frame, const unsigned int x_c, const unsigned int y_c) +{ + const unsigned int stride1 = av_rpi_sand_frame_stride1(frame); + const unsigned int stride2 = av_rpi_sand_frame_stride2(frame); + const unsigned int x = x_c << (av_rpi_sand_frame_xshl(frame) + 1); + const unsigned int x1 = x & (stride1 - 1); + const unsigned int x2 = x ^ x1; + + return x1 + stride1 * y_c + stride2 * x2; +} + +static inline uint8_t * av_rpi_sand_frame_pos_y(const AVFrame * const frame, const unsigned int x, const unsigned int y) +{ + return frame->data[0] + av_rpi_sand_frame_off_y(frame, x, y); +} + +static inline uint8_t * av_rpi_sand_frame_pos_c(const AVFrame * const frame, const unsigned int x, const unsigned int y) +{ + return frame->data[1] + av_rpi_sand_frame_off_c(frame, x, y); +} + +#endif + diff --git a/libswscale/aarch64/rgb2rgb.c b/libswscale/aarch64/rgb2rgb.c index a9bf6ff9e0..6a0e2dcc09 100644 --- a/libswscale/aarch64/rgb2rgb.c +++ b/libswscale/aarch64/rgb2rgb.c @@ -30,6 +30,12 @@ void ff_interleave_bytes_neon(const uint8_t *src1, const uint8_t *src2, uint8_t *dest, int width, int height, int src1Stride, int src2Stride, int dstStride); +void ff_bgr24toyv12_aarch64(const uint8_t *src, uint8_t *ydst, uint8_t *udst, + uint8_t *vdst, int width, int height, int lumStride, + int chromStride, int srcStride, int32_t *rgb2yuv); +void ff_rgb24toyv12_aarch64(const uint8_t *src, uint8_t *ydst, uint8_t *udst, + uint8_t *vdst, int width, int height, int lumStride, + int chromStride, int srcStride, int32_t *rgb2yuv); av_cold void rgb2rgb_init_aarch64(void) { @@ -37,5 +43,7 @@ av_cold void rgb2rgb_init_aarch64(void) if (have_neon(cpu_flags)) { interleaveBytes = ff_interleave_bytes_neon; + ff_rgb24toyv12 = ff_rgb24toyv12_aarch64; + ff_bgr24toyv12 = ff_bgr24toyv12_aarch64; } } diff --git a/libswscale/aarch64/rgb2rgb_neon.S b/libswscale/aarch64/rgb2rgb_neon.S index d81110ec57..476ca723a0 100644 --- a/libswscale/aarch64/rgb2rgb_neon.S +++ b/libswscale/aarch64/rgb2rgb_neon.S @@ -77,3 +77,448 @@ function ff_interleave_bytes_neon, export=1 0: ret endfunc + +// void ff_rgb24toyv12_aarch64( +// const uint8_t *src, // x0 +// uint8_t *ydst, // x1 +// uint8_t *udst, // x2 +// uint8_t *vdst, // x3 +// int width, // w4 +// int height, // w5 +// int lumStride, // w6 +// int chromStride, // w7 +// int srcStr, // [sp, #0] +// int32_t *rgb2yuv); // [sp, #8] + +function ff_rgb24toyv12_aarch64, export=1 + ldr x15, [sp, #8] + ld1 {v3.s}[2], [x15], #4 + ld1 {v3.s}[1], [x15], #4 + ld1 {v3.s}[0], [x15], #4 + ld1 {v4.s}[2], [x15], #4 + ld1 {v4.s}[1], [x15], #4 + ld1 {v4.s}[0], [x15], #4 + ld1 {v5.s}[2], [x15], #4 + ld1 {v5.s}[1], [x15], #4 + ld1 {v5.s}[0], [x15] + b 99f +endfunc + +// void ff_bgr24toyv12_aarch64( +// const uint8_t *src, // x0 +// uint8_t *ydst, // x1 +// uint8_t *udst, // x2 +// uint8_t *vdst, // x3 +// int width, // w4 +// int height, // w5 +// int lumStride, // w6 +// int chromStride, // w7 +// int srcStr, // [sp, #0] +// int32_t *rgb2yuv); // [sp, #8] + +// regs +// v0-2 Src bytes - reused as chroma src +// v3-5 Coeffs (packed very inefficiently - could be squashed) +// v6 128b +// v7 128h +// v8-15 Reserved +// v16-18 Lo Src expanded as H +// v19 - +// v20-22 Hi Src expanded as H +// v23 - +// v24 U out +// v25 U tmp +// v26 Y out +// v27-29 Y tmp +// v30 V out +// v31 V tmp + +// Assumes Little Endian in tail stores & conversion matrix + +function ff_bgr24toyv12_aarch64, export=1 + ldr x15, [sp, #8] + ld3 {v3.s, v4.s, v5.s}[0], [x15], #12 + ld3 {v3.s, v4.s, v5.s}[1], [x15], #12 + ld3 {v3.s, v4.s, v5.s}[2], [x15] +99: + ldr w14, [sp, #0] + movi v7.8b, #128 + uxtl v6.8h, v7.8b + // Ensure if nothing to do then we do nothing + cmp w4, #0 + b.le 90f + cmp w5, #0 + b.le 90f + // If w % 16 != 0 then -16 so we do main loop 1 fewer times with + // the remainder done in the tail + tst w4, #15 + b.eq 1f + sub w4, w4, #16 +1: + +// -------------------- Even line body - YUV +11: + subs w9, w4, #0 + mov x10, x0 + mov x11, x1 + mov x12, x2 + mov x13, x3 + b.lt 12f + + ld3 {v0.16b, v1.16b, v2.16b}, [x10], #48 + subs w9, w9, #16 + b.le 13f + +10: + uxtl v16.8h, v0.8b + uxtl v17.8h, v1.8b + uxtl v18.8h, v2.8b + + uxtl2 v20.8h, v0.16b + uxtl2 v21.8h, v1.16b + uxtl2 v22.8h, v2.16b + + bic v0.8h, #0xff, LSL #8 + bic v1.8h, #0xff, LSL #8 + bic v2.8h, #0xff, LSL #8 + + // Testing shows it is faster to stack the smull/smlal ops together + // rather than interleave them between channels and indeed even the + // shift/add sections seem happier not interleaved + + // Y0 + smull v26.4s, v16.4h, v3.h[0] + smlal v26.4s, v17.4h, v4.h[0] + smlal v26.4s, v18.4h, v5.h[0] + smull2 v27.4s, v16.8h, v3.h[0] + smlal2 v27.4s, v17.8h, v4.h[0] + smlal2 v27.4s, v18.8h, v5.h[0] + // Y1 + smull v28.4s, v20.4h, v3.h[0] + smlal v28.4s, v21.4h, v4.h[0] + smlal v28.4s, v22.4h, v5.h[0] + smull2 v29.4s, v20.8h, v3.h[0] + smlal2 v29.4s, v21.8h, v4.h[0] + smlal2 v29.4s, v22.8h, v5.h[0] + shrn v26.4h, v26.4s, #12 + shrn2 v26.8h, v27.4s, #12 + add v26.8h, v26.8h, v6.8h // +128 (>> 3 = 16) + sqrshrun v26.8b, v26.8h, #3 + shrn v28.4h, v28.4s, #12 + shrn2 v28.8h, v29.4s, #12 + add v28.8h, v28.8h, v6.8h + sqrshrun2 v26.16b, v28.8h, #3 + // Y0/Y1 + + // U + // Vector subscript *2 as we loaded into S but are only using H + smull v24.4s, v0.4h, v3.h[2] + smlal v24.4s, v1.4h, v4.h[2] + smlal v24.4s, v2.4h, v5.h[2] + smull2 v25.4s, v0.8h, v3.h[2] + smlal2 v25.4s, v1.8h, v4.h[2] + smlal2 v25.4s, v2.8h, v5.h[2] + + // V + smull v30.4s, v0.4h, v3.h[4] + smlal v30.4s, v1.4h, v4.h[4] + smlal v30.4s, v2.4h, v5.h[4] + smull2 v31.4s, v0.8h, v3.h[4] + smlal2 v31.4s, v1.8h, v4.h[4] + smlal2 v31.4s, v2.8h, v5.h[4] + + ld3 {v0.16b, v1.16b, v2.16b}, [x10], #48 + + shrn v24.4h, v24.4s, #14 + shrn2 v24.8h, v25.4s, #14 + sqrshrn v24.8b, v24.8h, #1 + add v24.8b, v24.8b, v7.8b // +128 + shrn v30.4h, v30.4s, #14 + shrn2 v30.8h, v31.4s, #14 + sqrshrn v30.8b, v30.8h, #1 + add v30.8b, v30.8b, v7.8b // +128 + + subs w9, w9, #16 + + st1 {v26.16b}, [x11], #16 + st1 {v24.8b}, [x12], #8 + st1 {v30.8b}, [x13], #8 + + b.gt 10b + +// -------------------- Even line tail - YUV +// If width % 16 == 0 then simply runs once with preloaded RGB +// If other then deals with preload & then does remaining tail + +13: + // Body is simple copy of main loop body minus preload + + uxtl v16.8h, v0.8b + uxtl v17.8h, v1.8b + uxtl v18.8h, v2.8b + + uxtl2 v20.8h, v0.16b + uxtl2 v21.8h, v1.16b + uxtl2 v22.8h, v2.16b + + bic v0.8h, #0xff, LSL #8 + bic v1.8h, #0xff, LSL #8 + bic v2.8h, #0xff, LSL #8 + + // Y0 + smull v26.4s, v16.4h, v3.h[0] + smlal v26.4s, v17.4h, v4.h[0] + smlal v26.4s, v18.4h, v5.h[0] + smull2 v27.4s, v16.8h, v3.h[0] + smlal2 v27.4s, v17.8h, v4.h[0] + smlal2 v27.4s, v18.8h, v5.h[0] + // Y1 + smull v28.4s, v20.4h, v3.h[0] + smlal v28.4s, v21.4h, v4.h[0] + smlal v28.4s, v22.4h, v5.h[0] + smull2 v29.4s, v20.8h, v3.h[0] + smlal2 v29.4s, v21.8h, v4.h[0] + smlal2 v29.4s, v22.8h, v5.h[0] + shrn v26.4h, v26.4s, #12 + shrn2 v26.8h, v27.4s, #12 + add v26.8h, v26.8h, v6.8h // +128 (>> 3 = 16) + sqrshrun v26.8b, v26.8h, #3 + shrn v28.4h, v28.4s, #12 + shrn2 v28.8h, v29.4s, #12 + add v28.8h, v28.8h, v6.8h + sqrshrun2 v26.16b, v28.8h, #3 + // Y0/Y1 + + // U + // Vector subscript *2 as we loaded into S but are only using H + smull v24.4s, v0.4h, v3.h[2] + smlal v24.4s, v1.4h, v4.h[2] + smlal v24.4s, v2.4h, v5.h[2] + smull2 v25.4s, v0.8h, v3.h[2] + smlal2 v25.4s, v1.8h, v4.h[2] + smlal2 v25.4s, v2.8h, v5.h[2] + + // V + smull v30.4s, v0.4h, v3.h[4] + smlal v30.4s, v1.4h, v4.h[4] + smlal v30.4s, v2.4h, v5.h[4] + smull2 v31.4s, v0.8h, v3.h[4] + smlal2 v31.4s, v1.8h, v4.h[4] + smlal2 v31.4s, v2.8h, v5.h[4] + + cmp w9, #-16 + + shrn v24.4h, v24.4s, #14 + shrn2 v24.8h, v25.4s, #14 + sqrshrn v24.8b, v24.8h, #1 + add v24.8b, v24.8b, v7.8b // +128 + shrn v30.4h, v30.4s, #14 + shrn2 v30.8h, v31.4s, #14 + sqrshrn v30.8b, v30.8h, #1 + add v30.8b, v30.8b, v7.8b // +128 + + // Here: + // w9 == 0 width % 16 == 0, tail done + // w9 > -16 1st tail done (16 pels), remainder still to go + // w9 == -16 shouldn't happen + // w9 > -32 2nd tail done + // w9 <= -32 shouldn't happen + + b.lt 2f + st1 {v26.16b}, [x11], #16 + st1 {v24.8b}, [x12], #8 + st1 {v30.8b}, [x13], #8 + cbz w9, 3f + +12: + sub w9, w9, #16 + + tbz w9, #3, 1f + ld3 {v0.8b, v1.8b, v2.8b}, [x10], #24 +1: tbz w9, #2, 1f + ld3 {v0.b, v1.b, v2.b}[8], [x10], #3 + ld3 {v0.b, v1.b, v2.b}[9], [x10], #3 + ld3 {v0.b, v1.b, v2.b}[10], [x10], #3 + ld3 {v0.b, v1.b, v2.b}[11], [x10], #3 +1: tbz w9, #1, 1f + ld3 {v0.b, v1.b, v2.b}[12], [x10], #3 + ld3 {v0.b, v1.b, v2.b}[13], [x10], #3 +1: tbz w9, #0, 13b + ld3 {v0.b, v1.b, v2.b}[14], [x10], #3 + b 13b + +2: + tbz w9, #3, 1f + st1 {v26.8b}, [x11], #8 + st1 {v24.s}[0], [x12], #4 + st1 {v30.s}[0], [x13], #4 +1: tbz w9, #2, 1f + st1 {v26.s}[2], [x11], #4 + st1 {v24.h}[2], [x12], #2 + st1 {v30.h}[2], [x13], #2 +1: tbz w9, #1, 1f + st1 {v26.h}[6], [x11], #2 + st1 {v24.b}[6], [x12], #1 + st1 {v30.b}[6], [x13], #1 +1: tbz w9, #0, 1f + st1 {v26.b}[14], [x11] + st1 {v24.b}[7], [x12] + st1 {v30.b}[7], [x13] +1: +3: + +// -------------------- Odd line body - Y only + + subs w5, w5, #1 + b.eq 90f + + subs w9, w4, #0 + add x0, x0, w14, SXTX + add x1, x1, w6, SXTX + mov x10, x0 + mov x11, x1 + b.lt 12f + + ld3 {v0.16b, v1.16b, v2.16b}, [x10], #48 + subs w9, w9, #16 + b.le 13f + +10: + uxtl v16.8h, v0.8b + uxtl v17.8h, v1.8b + uxtl v18.8h, v2.8b + + uxtl2 v20.8h, v0.16b + uxtl2 v21.8h, v1.16b + uxtl2 v22.8h, v2.16b + + // Testing shows it is faster to stack the smull/smlal ops together + // rather than interleave them between channels and indeed even the + // shift/add sections seem happier not interleaved + + // Y0 + smull v26.4s, v16.4h, v3.h[0] + smlal v26.4s, v17.4h, v4.h[0] + smlal v26.4s, v18.4h, v5.h[0] + smull2 v27.4s, v16.8h, v3.h[0] + smlal2 v27.4s, v17.8h, v4.h[0] + smlal2 v27.4s, v18.8h, v5.h[0] + // Y1 + smull v28.4s, v20.4h, v3.h[0] + smlal v28.4s, v21.4h, v4.h[0] + smlal v28.4s, v22.4h, v5.h[0] + smull2 v29.4s, v20.8h, v3.h[0] + smlal2 v29.4s, v21.8h, v4.h[0] + smlal2 v29.4s, v22.8h, v5.h[0] + + ld3 {v0.16b, v1.16b, v2.16b}, [x10], #48 + + shrn v26.4h, v26.4s, #12 + shrn2 v26.8h, v27.4s, #12 + add v26.8h, v26.8h, v6.8h // +128 (>> 3 = 16) + sqrshrun v26.8b, v26.8h, #3 + shrn v28.4h, v28.4s, #12 + shrn2 v28.8h, v29.4s, #12 + add v28.8h, v28.8h, v6.8h + sqrshrun2 v26.16b, v28.8h, #3 + // Y0/Y1 + + subs w9, w9, #16 + + st1 {v26.16b}, [x11], #16 + + b.gt 10b + +// -------------------- Odd line tail - Y +// If width % 16 == 0 then simply runs once with preloaded RGB +// If other then deals with preload & then does remaining tail + +13: + // Body is simple copy of main loop body minus preload + + uxtl v16.8h, v0.8b + uxtl v17.8h, v1.8b + uxtl v18.8h, v2.8b + + uxtl2 v20.8h, v0.16b + uxtl2 v21.8h, v1.16b + uxtl2 v22.8h, v2.16b + + // Y0 + smull v26.4s, v16.4h, v3.h[0] + smlal v26.4s, v17.4h, v4.h[0] + smlal v26.4s, v18.4h, v5.h[0] + smull2 v27.4s, v16.8h, v3.h[0] + smlal2 v27.4s, v17.8h, v4.h[0] + smlal2 v27.4s, v18.8h, v5.h[0] + // Y1 + smull v28.4s, v20.4h, v3.h[0] + smlal v28.4s, v21.4h, v4.h[0] + smlal v28.4s, v22.4h, v5.h[0] + smull2 v29.4s, v20.8h, v3.h[0] + smlal2 v29.4s, v21.8h, v4.h[0] + smlal2 v29.4s, v22.8h, v5.h[0] + + cmp w9, #-16 + + shrn v26.4h, v26.4s, #12 + shrn2 v26.8h, v27.4s, #12 + add v26.8h, v26.8h, v6.8h // +128 (>> 3 = 16) + sqrshrun v26.8b, v26.8h, #3 + shrn v28.4h, v28.4s, #12 + shrn2 v28.8h, v29.4s, #12 + add v28.8h, v28.8h, v6.8h + sqrshrun2 v26.16b, v28.8h, #3 + // Y0/Y1 + + // Here: + // w9 == 0 width % 16 == 0, tail done + // w9 > -16 1st tail done (16 pels), remainder still to go + // w9 == -16 shouldn't happen + // w9 > -32 2nd tail done + // w9 <= -32 shouldn't happen + + b.lt 2f + st1 {v26.16b}, [x11], #16 + cbz w9, 3f + +12: + sub w9, w9, #16 + + tbz w9, #3, 1f + ld3 {v0.8b, v1.8b, v2.8b}, [x10], #24 +1: tbz w9, #2, 1f + ld3 {v0.b, v1.b, v2.b}[8], [x10], #3 + ld3 {v0.b, v1.b, v2.b}[9], [x10], #3 + ld3 {v0.b, v1.b, v2.b}[10], [x10], #3 + ld3 {v0.b, v1.b, v2.b}[11], [x10], #3 +1: tbz w9, #1, 1f + ld3 {v0.b, v1.b, v2.b}[12], [x10], #3 + ld3 {v0.b, v1.b, v2.b}[13], [x10], #3 +1: tbz w9, #0, 13b + ld3 {v0.b, v1.b, v2.b}[14], [x10], #3 + b 13b + +2: + tbz w9, #3, 1f + st1 {v26.8b}, [x11], #8 +1: tbz w9, #2, 1f + st1 {v26.s}[2], [x11], #4 +1: tbz w9, #1, 1f + st1 {v26.h}[6], [x11], #2 +1: tbz w9, #0, 1f + st1 {v26.b}[14], [x11] +1: +3: + +// ------------------- Loop to start + + add x0, x0, w14, SXTX + add x1, x1, w6, SXTX + add x2, x2, w7, SXTX + add x3, x3, w7, SXTX + subs w5, w5, #1 + b.gt 11b +90: + ret +endfunc diff --git a/libswscale/rgb2rgb.c b/libswscale/rgb2rgb.c index e98fdac8ea..c3b9079d2b 100644 --- a/libswscale/rgb2rgb.c +++ b/libswscale/rgb2rgb.c @@ -83,6 +83,31 @@ void (*ff_rgb24toyv12)(const uint8_t *src, uint8_t *ydst, int width, int height, int lumStride, int chromStride, int srcStride, int32_t *rgb2yuv); +void (*ff_bgr24toyv12)(const uint8_t *src, uint8_t *ydst, + uint8_t *udst, uint8_t *vdst, + int width, int height, + int lumStride, int chromStride, int srcStride, + int32_t *rgb2yuv); +void (*ff_rgbxtoyv12)(const uint8_t *src, uint8_t *ydst, + uint8_t *udst, uint8_t *vdst, + int width, int height, + int lumStride, int chromStride, int srcStride, + int32_t *rgb2yuv); +void (*ff_bgrxtoyv12)(const uint8_t *src, uint8_t *ydst, + uint8_t *udst, uint8_t *vdst, + int width, int height, + int lumStride, int chromStride, int srcStride, + int32_t *rgb2yuv); +void (*ff_xrgbtoyv12)(const uint8_t *src, uint8_t *ydst, + uint8_t *udst, uint8_t *vdst, + int width, int height, + int lumStride, int chromStride, int srcStride, + int32_t *rgb2yuv); +void (*ff_xbgrtoyv12)(const uint8_t *src, uint8_t *ydst, + uint8_t *udst, uint8_t *vdst, + int width, int height, + int lumStride, int chromStride, int srcStride, + int32_t *rgb2yuv); void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height, int srcStride, int dstStride); void (*interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dst, diff --git a/libswscale/rgb2rgb.h b/libswscale/rgb2rgb.h index f3951d523e..a0dd3ffb79 100644 --- a/libswscale/rgb2rgb.h +++ b/libswscale/rgb2rgb.h @@ -79,6 +79,9 @@ void rgb12to15(const uint8_t *src, uint8_t *dst, int src_size); void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, int width, int height, int lumStride, int chromStride, int srcStride, int32_t *rgb2yuv); +void ff_bgr24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, + uint8_t *vdst, int width, int height, int lumStride, + int chromStride, int srcStride, int32_t *rgb2yuv); /** * Height should be a multiple of 2 and width should be a multiple of 16. @@ -128,6 +131,26 @@ extern void (*ff_rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, int width, int height, int lumStride, int chromStride, int srcStride, int32_t *rgb2yuv); +extern void (*ff_bgr24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, + int width, int height, + int lumStride, int chromStride, int srcStride, + int32_t *rgb2yuv); +extern void (*ff_rgbxtoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, + int width, int height, + int lumStride, int chromStride, int srcStride, + int32_t *rgb2yuv); +extern void (*ff_bgrxtoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, + int width, int height, + int lumStride, int chromStride, int srcStride, + int32_t *rgb2yuv); +extern void (*ff_xrgbtoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, + int width, int height, + int lumStride, int chromStride, int srcStride, + int32_t *rgb2yuv); +extern void (*ff_xbgrtoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, + int width, int height, + int lumStride, int chromStride, int srcStride, + int32_t *rgb2yuv); extern void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height, int srcStride, int dstStride); diff --git a/libswscale/rgb2rgb_template.c b/libswscale/rgb2rgb_template.c index 42c69801ba..e711589e1e 100644 --- a/libswscale/rgb2rgb_template.c +++ b/libswscale/rgb2rgb_template.c @@ -646,13 +646,14 @@ static inline void uyvytoyv12_c(const uint8_t *src, uint8_t *ydst, * others are ignored in the C version. * FIXME: Write HQ version. */ -void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, +static void rgb24toyv12_x(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, int width, int height, int lumStride, - int chromStride, int srcStride, int32_t *rgb2yuv) + int chromStride, int srcStride, int32_t *rgb2yuv, + const uint8_t x[9]) { - int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; - int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; - int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; + int32_t ry = rgb2yuv[x[0]], gy = rgb2yuv[x[1]], by = rgb2yuv[x[2]]; + int32_t ru = rgb2yuv[x[3]], gu = rgb2yuv[x[4]], bu = rgb2yuv[x[5]]; + int32_t rv = rgb2yuv[x[6]], gv = rgb2yuv[x[7]], bv = rgb2yuv[x[8]]; int y; const int chromWidth = width >> 1; @@ -678,6 +679,19 @@ void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; ydst[2 * i + 1] = Y; } + if ((width & 1) != 0) { + unsigned int b = src[6 * i + 0]; + unsigned int g = src[6 * i + 1]; + unsigned int r = src[6 * i + 2]; + + unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; + unsigned int V = ((rv * r + gv * g + bv * b) >> RGB2YUV_SHIFT) + 128; + unsigned int U = ((ru * r + gu * g + bu * b) >> RGB2YUV_SHIFT) + 128; + + udst[i] = U; + vdst[i] = V; + ydst[2 * i] = Y; + } ydst += lumStride; src += srcStride; @@ -700,6 +714,125 @@ void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; ydst[2 * i + 1] = Y; } + if ((width & 1) != 0) { + unsigned int b = src[6 * i + 0]; + unsigned int g = src[6 * i + 1]; + unsigned int r = src[6 * i + 2]; + + unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; + + ydst[2 * i] = Y; + } + udst += chromStride; + vdst += chromStride; + ydst += lumStride; + src += srcStride; + } +} + +static const uint8_t x_rgb[9] = { + RY_IDX, GY_IDX, BY_IDX, + RU_IDX, GU_IDX, BU_IDX, + RV_IDX, GV_IDX, BV_IDX, +}; + +static const uint8_t x_bgr[9] = { + BY_IDX, GY_IDX, RY_IDX, + BU_IDX, GU_IDX, RU_IDX, + BV_IDX, GV_IDX, RV_IDX, +}; + +void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, + uint8_t *vdst, int width, int height, int lumStride, + int chromStride, int srcStride, int32_t *rgb2yuv) +{ + rgb24toyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_rgb); +} + +void ff_bgr24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, + uint8_t *vdst, int width, int height, int lumStride, + int chromStride, int srcStride, int32_t *rgb2yuv) +{ + rgb24toyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_bgr); +} + +static void rgbxtoyv12_x(const uint8_t *src, uint8_t *ydst, uint8_t *udst, + uint8_t *vdst, int width, int height, int lumStride, + int chromStride, int srcStride, int32_t *rgb2yuv, + const uint8_t x[9]) +{ + int32_t ry = rgb2yuv[x[0]], gy = rgb2yuv[x[1]], by = rgb2yuv[x[2]]; + int32_t ru = rgb2yuv[x[3]], gu = rgb2yuv[x[4]], bu = rgb2yuv[x[5]]; + int32_t rv = rgb2yuv[x[6]], gv = rgb2yuv[x[7]], bv = rgb2yuv[x[8]]; + int y; + const int chromWidth = width >> 1; + + for (y = 0; y < height; y += 2) { + int i; + for (i = 0; i < chromWidth; i++) { + unsigned int b = src[8 * i + 2]; + unsigned int g = src[8 * i + 1]; + unsigned int r = src[8 * i + 0]; + + unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; + unsigned int V = ((rv * r + gv * g + bv * b) >> RGB2YUV_SHIFT) + 128; + unsigned int U = ((ru * r + gu * g + bu * b) >> RGB2YUV_SHIFT) + 128; + + udst[i] = U; + vdst[i] = V; + ydst[2 * i] = Y; + + b = src[8 * i + 6]; + g = src[8 * i + 5]; + r = src[8 * i + 4]; + + Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; + ydst[2 * i + 1] = Y; + } + if ((width & 1) != 0) { + unsigned int b = src[8 * i + 2]; + unsigned int g = src[8 * i + 1]; + unsigned int r = src[8 * i + 0]; + + unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; + unsigned int V = ((rv * r + gv * g + bv * b) >> RGB2YUV_SHIFT) + 128; + unsigned int U = ((ru * r + gu * g + bu * b) >> RGB2YUV_SHIFT) + 128; + + udst[i] = U; + vdst[i] = V; + ydst[2 * i] = Y; + } + ydst += lumStride; + src += srcStride; + + if (y+1 == height) + break; + + for (i = 0; i < chromWidth; i++) { + unsigned int b = src[8 * i + 2]; + unsigned int g = src[8 * i + 1]; + unsigned int r = src[8 * i + 0]; + + unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; + + ydst[2 * i] = Y; + + b = src[8 * i + 6]; + g = src[8 * i + 5]; + r = src[8 * i + 4]; + + Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; + ydst[2 * i + 1] = Y; + } + if ((width & 1) != 0) { + unsigned int b = src[8 * i + 2]; + unsigned int g = src[8 * i + 1]; + unsigned int r = src[8 * i + 0]; + + unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; + + ydst[2 * i] = Y; + } udst += chromStride; vdst += chromStride; ydst += lumStride; @@ -707,6 +840,37 @@ void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, } } +static void ff_rgbxtoyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, + uint8_t *vdst, int width, int height, int lumStride, + int chromStride, int srcStride, int32_t *rgb2yuv) +{ + rgbxtoyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_rgb); +} + +static void ff_bgrxtoyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, + uint8_t *vdst, int width, int height, int lumStride, + int chromStride, int srcStride, int32_t *rgb2yuv) +{ + rgbxtoyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_bgr); +} + +// As the general code does no SIMD-like ops simply adding 1 to the src address +// will fix the ignored alpha position +static void ff_xrgbtoyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, + uint8_t *vdst, int width, int height, int lumStride, + int chromStride, int srcStride, int32_t *rgb2yuv) +{ + rgbxtoyv12_x(src + 1, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_rgb); +} + +static void ff_xbgrtoyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, + uint8_t *vdst, int width, int height, int lumStride, + int chromStride, int srcStride, int32_t *rgb2yuv) +{ + rgbxtoyv12_x(src + 1, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_bgr); +} + + static void interleaveBytes_c(const uint8_t *src1, const uint8_t *src2, uint8_t *dest, int width, int height, int src1Stride, int src2Stride, int dstStride) @@ -980,6 +1144,11 @@ static av_cold void rgb2rgb_init_c(void) yuy2toyv12 = yuy2toyv12_c; planar2x = planar2x_c; ff_rgb24toyv12 = ff_rgb24toyv12_c; + ff_bgr24toyv12 = ff_bgr24toyv12_c; + ff_rgbxtoyv12 = ff_rgbxtoyv12_c; + ff_bgrxtoyv12 = ff_bgrxtoyv12_c; + ff_xrgbtoyv12 = ff_xrgbtoyv12_c; + ff_xbgrtoyv12 = ff_xbgrtoyv12_c; interleaveBytes = interleaveBytes_c; deinterleaveBytes = deinterleaveBytes_c; vu9_to_vu12 = vu9_to_vu12_c; diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c index 9af2e7ecc3..52469b2e4a 100644 --- a/libswscale/swscale_unscaled.c +++ b/libswscale/swscale_unscaled.c @@ -1654,6 +1654,91 @@ static int bgr24ToYv12Wrapper(SwsContext *c, const uint8_t *src[], return srcSliceH; } +static int rgb24ToYv12Wrapper(SwsContext *c, const uint8_t *src[], + int srcStride[], int srcSliceY, int srcSliceH, + uint8_t *dst[], int dstStride[]) +{ + ff_bgr24toyv12( + src[0], + dst[0] + srcSliceY * dstStride[0], + dst[1] + (srcSliceY >> 1) * dstStride[1], + dst[2] + (srcSliceY >> 1) * dstStride[2], + c->srcW, srcSliceH, + dstStride[0], dstStride[1], srcStride[0], + c->input_rgb2yuv_table); + if (dst[3]) + fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255); + return srcSliceH; +} + +static int bgrxToYv12Wrapper(SwsContext *c, const uint8_t *src[], + int srcStride[], int srcSliceY, int srcSliceH, + uint8_t *dst[], int dstStride[]) +{ + ff_bgrxtoyv12( + src[0], + dst[0] + srcSliceY * dstStride[0], + dst[1] + (srcSliceY >> 1) * dstStride[1], + dst[2] + (srcSliceY >> 1) * dstStride[2], + c->srcW, srcSliceH, + dstStride[0], dstStride[1], srcStride[0], + c->input_rgb2yuv_table); + if (dst[3]) + fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255); + return srcSliceH; +} + +static int rgbxToYv12Wrapper(SwsContext *c, const uint8_t *src[], + int srcStride[], int srcSliceY, int srcSliceH, + uint8_t *dst[], int dstStride[]) +{ + ff_rgbxtoyv12( + src[0], + dst[0] + srcSliceY * dstStride[0], + dst[1] + (srcSliceY >> 1) * dstStride[1], + dst[2] + (srcSliceY >> 1) * dstStride[2], + c->srcW, srcSliceH, + dstStride[0], dstStride[1], srcStride[0], + c->input_rgb2yuv_table); + if (dst[3]) + fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255); + return srcSliceH; +} + +static int xbgrToYv12Wrapper(SwsContext *c, const uint8_t *src[], + int srcStride[], int srcSliceY, int srcSliceH, + uint8_t *dst[], int dstStride[]) +{ + ff_xbgrtoyv12( + src[0], + dst[0] + srcSliceY * dstStride[0], + dst[1] + (srcSliceY >> 1) * dstStride[1], + dst[2] + (srcSliceY >> 1) * dstStride[2], + c->srcW, srcSliceH, + dstStride[0], dstStride[1], srcStride[0], + c->input_rgb2yuv_table); + if (dst[3]) + fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255); + return srcSliceH; +} + +static int xrgbToYv12Wrapper(SwsContext *c, const uint8_t *src[], + int srcStride[], int srcSliceY, int srcSliceH, + uint8_t *dst[], int dstStride[]) +{ + ff_xrgbtoyv12( + src[0], + dst[0] + srcSliceY * dstStride[0], + dst[1] + (srcSliceY >> 1) * dstStride[1], + dst[2] + (srcSliceY >> 1) * dstStride[2], + c->srcW, srcSliceH, + dstStride[0], dstStride[1], srcStride[0], + c->input_rgb2yuv_table); + if (dst[3]) + fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255); + return srcSliceH; +} + static int yvu9ToYv12Wrapper(SwsContext *c, const uint8_t *src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) @@ -1977,7 +2062,6 @@ void ff_get_unscaled_swscale(SwsContext *c) const enum AVPixelFormat dstFormat = c->dstFormat; const int flags = c->flags; const int dstH = c->dstH; - const int dstW = c->dstW; int needsDither; needsDither = isAnyRGB(dstFormat) && @@ -2035,8 +2119,34 @@ void ff_get_unscaled_swscale(SwsContext *c) /* bgr24toYV12 */ if (srcFormat == AV_PIX_FMT_BGR24 && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P) && - !(flags & SWS_ACCURATE_RND) && !(dstW&1)) + !(flags & SWS_ACCURATE_RND)) c->convert_unscaled = bgr24ToYv12Wrapper; + /* rgb24toYV12 */ + if (srcFormat == AV_PIX_FMT_RGB24 && + (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P) && + !(flags & SWS_ACCURATE_RND)) + c->convert_unscaled = rgb24ToYv12Wrapper; + + /* bgrxtoYV12 */ + if (((srcFormat == AV_PIX_FMT_BGRA && dstFormat == AV_PIX_FMT_YUV420P) || + (srcFormat == AV_PIX_FMT_BGR0 && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) && + !(flags & SWS_ACCURATE_RND)) + c->convert_unscaled = bgrxToYv12Wrapper; + /* rgbx24toYV12 */ + if (((srcFormat == AV_PIX_FMT_RGBA && dstFormat == AV_PIX_FMT_YUV420P) || + (srcFormat == AV_PIX_FMT_RGB0 && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) && + !(flags & SWS_ACCURATE_RND)) + c->convert_unscaled = rgbxToYv12Wrapper; + /* xbgrtoYV12 */ + if (((srcFormat == AV_PIX_FMT_ABGR && dstFormat == AV_PIX_FMT_YUV420P) || + (srcFormat == AV_PIX_FMT_0BGR && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) && + !(flags & SWS_ACCURATE_RND)) + c->convert_unscaled = xbgrToYv12Wrapper; + /* xrgb24toYV12 */ + if (((srcFormat == AV_PIX_FMT_ARGB && dstFormat == AV_PIX_FMT_YUV420P) || + (srcFormat == AV_PIX_FMT_0RGB && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) && + !(flags & SWS_ACCURATE_RND)) + c->convert_unscaled = xrgbToYv12Wrapper; /* RGB/BGR -> RGB/BGR (no dither needed forms) */ if (isAnyRGB(srcFormat) && isAnyRGB(dstFormat) && findRgbConvFn(c) diff --git a/libswscale/tests/swscale.c b/libswscale/tests/swscale.c index 6c38041ddb..12776ffec7 100644 --- a/libswscale/tests/swscale.c +++ b/libswscale/tests/swscale.c @@ -23,6 +23,7 @@ #include #include #include +#include #undef HAVE_AV_CONFIG_H #include "libavutil/cpu.h" @@ -78,6 +79,15 @@ struct Results { uint32_t crc; }; +static int time_rep = 0; + +static uint64_t utime(void) +{ + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + return ts.tv_nsec / 1000 + (uint64_t)ts.tv_sec * 1000000; +} + // test by ref -> src -> dst -> out & compare out against ref // ref & out are YV12 static int doTest(const uint8_t * const ref[4], int refStride[4], int w, int h, @@ -174,7 +184,7 @@ static int doTest(const uint8_t * const ref[4], int refStride[4], int w, int h, goto end; } - printf(" %s %dx%d -> %s %3dx%3d flags=%2d", + printf(" %s %4dx%4d -> %s %4dx%4d flags=%2d", desc_src->name, srcW, srcH, desc_dst->name, dstW, dstH, flags); @@ -182,6 +192,17 @@ static int doTest(const uint8_t * const ref[4], int refStride[4], int w, int h, sws_scale(dstContext, (const uint8_t * const*)src, srcStride, 0, srcH, dst, dstStride); + if (time_rep != 0) + { + const uint64_t now = utime(); + uint64_t done; + for (i = 1; i != time_rep; ++i) { + sws_scale(dstContext, (const uint8_t * const*)src, srcStride, 0, srcH, dst, dstStride); + } + done = utime(); + printf(" T=%7"PRId64"us ", done-now); + } + for (i = 0; i < 4 && dstStride[i]; i++) crc = av_crc(av_crc_get_table(AV_CRC_32_IEEE), crc, dst[i], dstStride[i] * dstH); @@ -355,56 +376,78 @@ static int fileTest(const uint8_t * const ref[4], int refStride[4], return 0; } -#define W 96 -#define H 96 - int main(int argc, char **argv) { + unsigned int W = 96; + unsigned int H = 96; + unsigned int W2; + unsigned int H2; + unsigned int S; enum AVPixelFormat srcFormat = AV_PIX_FMT_NONE; enum AVPixelFormat dstFormat = AV_PIX_FMT_NONE; - uint8_t *rgb_data = av_malloc(W * H * 4); - const uint8_t * const rgb_src[4] = { rgb_data, NULL, NULL, NULL }; - int rgb_stride[4] = { 4 * W, 0, 0, 0 }; - uint8_t *data = av_malloc(4 * W * H); - const uint8_t * const src[4] = { data, data + W * H, data + W * H * 2, data + W * H * 3 }; - int stride[4] = { W, W, W, W }; int x, y; struct SwsContext *sws; AVLFG rand; int res = -1; int i; FILE *fp = NULL; - - if (!rgb_data || !data) - return -1; + uint8_t *rgb_data; + uint8_t * rgb_src[4] = { NULL }; + int rgb_stride[4] = { 0 }; + uint8_t *data; + uint8_t * src[4] = { NULL }; + int stride[4] = { 0 }; for (i = 1; i < argc; i += 2) { + const char * const arg2 = argv[i+1]; + if (argv[i][0] != '-' || i + 1 == argc) goto bad_option; if (!strcmp(argv[i], "-ref")) { - fp = fopen(argv[i + 1], "r"); + fp = fopen(arg2, "r"); if (!fp) { - fprintf(stderr, "could not open '%s'\n", argv[i + 1]); + fprintf(stderr, "could not open '%s'\n", arg2); goto error; } } else if (!strcmp(argv[i], "-cpuflags")) { unsigned flags = av_get_cpu_flags(); - int ret = av_parse_cpu_caps(&flags, argv[i + 1]); + int ret = av_parse_cpu_caps(&flags, arg2); if (ret < 0) { - fprintf(stderr, "invalid cpu flags %s\n", argv[i + 1]); + fprintf(stderr, "invalid cpu flags %s\n", arg2); return ret; } av_force_cpu_flags(flags); } else if (!strcmp(argv[i], "-src")) { - srcFormat = av_get_pix_fmt(argv[i + 1]); + srcFormat = av_get_pix_fmt(arg2); if (srcFormat == AV_PIX_FMT_NONE) { - fprintf(stderr, "invalid pixel format %s\n", argv[i + 1]); + fprintf(stderr, "invalid pixel format %s\n", arg2); return -1; } } else if (!strcmp(argv[i], "-dst")) { - dstFormat = av_get_pix_fmt(argv[i + 1]); + dstFormat = av_get_pix_fmt(arg2); if (dstFormat == AV_PIX_FMT_NONE) { - fprintf(stderr, "invalid pixel format %s\n", argv[i + 1]); + fprintf(stderr, "invalid pixel format %s\n", arg2); + return -1; + } + } else if (!strcmp(argv[i], "-w")) { + char * p = NULL; + W = strtoul(arg2, &p, 0); + if (!W || *p) { + fprintf(stderr, "bad width %s\n", arg2); + return -1; + } + } else if (!strcmp(argv[i], "-h")) { + char * p = NULL; + H = strtoul(arg2, &p, 0); + if (!H || *p) { + fprintf(stderr, "bad height '%s'\n", arg2); + return -1; + } + } else if (!strcmp(argv[i], "-t")) { + char * p = NULL; + time_rep = (int)strtol(arg2, &p, 0); + if (*p) { + fprintf(stderr, "bad time repetitions '%s'\n", arg2); return -1; } } else { @@ -414,15 +457,34 @@ bad_option: } } - sws = sws_getContext(W / 12, H / 12, AV_PIX_FMT_RGB32, W, H, + S = (W + 15) & ~15; + rgb_data = av_mallocz(S * H * 4); + rgb_src[0] = rgb_data; + rgb_stride[0] = 4 * S; + data = av_mallocz(4 * S * H); + src[0] = data; + src[1] = data + S * H; + src[2] = data + S * H * 2; + src[3] = data + S * H * 3; + stride[0] = S; + stride[1] = S; + stride[2] = S; + stride[3] = S; + H2 = H < 96 ? 8 : H / 12; + W2 = W < 96 ? 8 : W / 12; + + if (!rgb_data || !data) + return -1; + + sws = sws_getContext(W2, H2, AV_PIX_FMT_RGB32, W, H, AV_PIX_FMT_YUVA420P, SWS_BILINEAR, NULL, NULL, NULL); av_lfg_init(&rand, 1); for (y = 0; y < H; y++) for (x = 0; x < W * 4; x++) - rgb_data[ x + y * 4 * W] = av_lfg_get(&rand); - res = sws_scale(sws, rgb_src, rgb_stride, 0, H / 12, (uint8_t * const *) src, stride); + rgb_data[ x + y * 4 * S] = av_lfg_get(&rand); + res = sws_scale(sws, (const uint8_t * const *)rgb_src, rgb_stride, 0, H2, (uint8_t * const *) src, stride); if (res < 0 || res != H) { res = -1; goto error; @@ -431,10 +493,10 @@ bad_option: av_free(rgb_data); if(fp) { - res = fileTest(src, stride, W, H, fp, srcFormat, dstFormat); + res = fileTest((const uint8_t * const *)src, stride, W, H, fp, srcFormat, dstFormat); fclose(fp); } else { - selfTest(src, stride, W, H, srcFormat, dstFormat); + selfTest((const uint8_t * const *)src, stride, W, H, srcFormat, dstFormat); res = 0; } error: diff --git a/pi-util/BUILD.txt b/pi-util/BUILD.txt new file mode 100644 index 0000000000..2b62d660c0 --- /dev/null +++ b/pi-util/BUILD.txt @@ -0,0 +1,67 @@ +Building Pi FFmpeg +================== + +Current only building on a Pi is supported. +This builds ffmpeg the way I've tested it + +Get all dependencies - the current package dependencies are good enough + +$ sudo apt-get build-dep ffmpeg + +Configure using the pi-util/conf_native.sh script +------------------------------------------------- + +This sets the normal release options and creates an ouutput dir to build into +The directory name will depend on system and options but will be under out/ + +There are a few choices here + --mmal build including the legacy mmal-based decoders and zero-copy code + this requires appropriate libraries which currently will exist for + armv7 but not arm64 + --noshared + Build a static image rather than a shared library one. Static is + easier for testing as there is no need to worry about library + paths being confused and therefore running the wrong code, Shared + is what is needed, in most cases, when building for use by other + programs. + --usr Set install dir to /usr (i.e. system default) rather than in + /install + +So for a static build +--------------------- + +$ pi-util/conf_native.sh --noshared + +$ make -j8 -C out/ + +You can now run ffmpeg directly from where it was built + +For a shared build +------------------ + +There are two choices here + +$ pi-util/conf_native.sh +$ make -j8 -C out/ install + +This sets the install prefix to /install and is probably what you +want if you don't want to overwrite the system files. + +You can now set LD_LIBRARY_PATH appropriately and run ffmpeg from where it was +built. You can copy the contents of /install to /usr and that mostly +works. The only downside is that paths in pkgconfig end up being set to the +install directory in your build directory which may be less than ideal when +building other packages. + +The alternative if you just want to replace the system libs is: + +$ pi-util/conf_native.sh --usr +$ make -j8 -C out/ +$ sudo pi-util/clean_usr_libs.sh +$ sudo make -j8 -C out/ install + +The clean_usr_libs.sh step wipes any existing libs & includes (for all +architectures) from the system which helps avoid confusion when running other +progs as you can be sure you're not running old code which is unfortunately +easy to do otherwise. + diff --git a/pi-util/NOTES.txt b/pi-util/NOTES.txt new file mode 100644 index 0000000000..fcce72226a --- /dev/null +++ b/pi-util/NOTES.txt @@ -0,0 +1,69 @@ +Notes on the hevc_rpi decoder & associated support code +------------------------------------------------------- + +There are 3 main parts to the existing code: + +1) The decoder - this is all in libavcodec as rpi_hevc*. + +2) A few filters to deal with Sand frames and a small patch to +automatically select the sand->i420 converter when required. + +3) A kludge in ffmpeg.c to display the decoded video. This could & should +be converted into a proper ffmpeg display module. + + +Decoder +------- + +The decoder is a modified version of the existing ffmpeg hevc decoder. +Generally it is ~100% faster than the existing ffmpeg hevc s/w decoder. +More complex bitstreams can be up to ~200% faster but particularly easy +streams can cut its advantage down to ~50%. This means that a Pi3+ can +display nearly all 8-bit 1080p30 streams and with some overclocking it can +display most lower bitrate 10-bit 1080p30 streams - this latter case is +not helped by the requirement to downsample to 8-bit before display on a +Pi. + +It has had co-processor offload added for inter-pred and large block +residual transform. Various parts have had optimized ARM NEON assembler +added and the existing ARM asm sections have been profiled and +re-optimized for A53. The main C code has been substantially reworked at +its lower levels in an attempt to optimize it and minimize memory +bandwidth. To some extent code paths that deal with frame types that it +doesn't support have been pruned. + +It outputs frames in Broadcom Sand format. This is a somewhat annoying +layout that doesn't fit into ffmpegs standard frame descriptions. It has +vertical stripes of 128 horizontal pixels (64 in 10 bit forms) with Y for +the stripe followed by interleaved U & V, that is then followed by the Y +for the next stripe, etc. The final stripe is always padded to +stripe-width. This is used in an attempt to help with cache locality and +cut down on the number of dram bank switches. It is annoying to use for +inter-pred with conventional processing but the way the Pi QPU (which is +used for inter-pred) works means that it has negligible downsides here and +the improved memory performance exceeds the overhead of the increased +complexity in the rest of the code. + +Frames must be allocated out of GPU memory (as otherwise they can't be +accessed by the co-processors). Utility functions (in rpi_zc.c) have been +written to make this easier. As the frames are already in GPU memory they +can be displayed by the Pi h/w without any further copying. + + +Known non-features +------------------ + +Frame allocation should probably be done in some other way in order to fit +into the standard framework better. + +Sand frames are currently declared as software frames, there is an +argument that they should be hardware frames but they aren't really. + +There must be a better way of auto-selecting the hevc_rpi decoder over the +normal s/w hevc decoder, but I became confused by the existing h/w +acceleration framework and what I wanted to do didn't seem to fit in +neatly. + +Display should be a proper device rather than a kludge in ffmpeg.c + + diff --git a/pi-util/TESTMESA.txt b/pi-util/TESTMESA.txt new file mode 100644 index 0000000000..92bc13a3df --- /dev/null +++ b/pi-util/TESTMESA.txt @@ -0,0 +1,82 @@ +# Setup & Build instructions for testing Argon30 mesa support (on Pi4) + +# These assume that the drm_mmal test for Sand8 has been built on this Pi +# as build relies on many of the same files + +# 1st get everything required to build ffmpeg +# If sources aren't already enabled on your Pi then enable them +sudo su +sed "s/#deb-src/deb-src/" /etc/apt/sources.list > /tmp/sources.list +sed "s/#deb-src/deb-src/" /etc/apt/sources.list.d/raspi.list > /tmp/raspi.list +mv /tmp/sources.list /etc/apt/ +mv /tmp/raspi.list /etc/apt/sources.list.d/ +apt update + +# Get dependancies +sudo apt build-dep ffmpeg + +sudo apt install meson libepoxy-dev libxcb-dri3-dev libxcb1-dev libx11-dev libx11-xcb-dev libdrm-dev + +# Enable H265 V4L2 request decoder +sudo su +echo dtoverlay=rpivid-v4l2 >> /boot/config.txt +# You may also want to add more CMA if you are going to try 4k videos +# Change the dtoverlay=vc4-fkms-v3d line in config.txt to read +# dtoverlay=vc4-fkms-v3d,cma-512 +reboot +# Check it has turned up +ls -la /dev/video* +# This should include video19 +# crw-rw----+ 1 root video 81, 7 Aug 4 17:25 /dev/video19 + +# Currently on the Pi the linux headers from the debian distro don't match +# the kernel that we ship and we need to update them - hopefully this step +# will be unneeded in the future +sudo apt install git bc bison flex libssl-dev make +git clone --depth=1 https://github.com/raspberrypi/linux --branch rpi-5.10.y +cd linux +KERNEL=kernel7l +make bcm2711_defconfig +make headers_install +sudo cp -r usr/include/linux /usr/include +cd .. + +# Config - this builds a staticly linked ffmpeg which is easier for testing +pi-util/conf_native.sh --noshared + +# Build (this is a bit dull) +# If you want to poke the source the libavdevice/egl_vout.c contains the +# output code - +cd out/armv7-static-rel + +# Check that you have actually configured V4L2 request +grep HEVC_V4L2REQUEST config.h +# You are hoping for +# #define CONFIG_HEVC_V4L2REQUEST_HWACCEL 1 +# if you get 0 then the config has failed + +make -j6 + +# Grab test streams +wget http://www.jell.yfish.us/media/jellyfish-3-mbps-hd-h264.mkv +wget http://www.jell.yfish.us/media/jellyfish-3-mbps-hd-hevc.mkv +wget http://www.jell.yfish.us/media/jellyfish-3-mbps-hd-hevc-10bit.mkv + +# Test i420 output (works currently) +./ffmpeg -no_cvt_hw -vcodec h264_v4l2m2m -i jellyfish-3-mbps-hd-h264.mkv -f vout_egl - + +# Test Sand8 output - doesn't currently work but should once you have +# Sand8 working in drm_mmal. I can't guarantee that this will work as +# I can't test this path with a known working format, but the debug looks +# good. If this doesn't work & drm_mmal does with sand8 then come back to me +# The "show_all 1" forces vout to display every frame otherwise it drops any +# frame that would cause it to block +./ffmpeg -no_cvt_hw -hwaccel drm -vcodec hevc -i jellyfish-3-mbps-hd-hevc.mkv -show_all 1 -f vout_egl - + +# Test Sand30 - doesn't currently work +# (Beware that when FFmpeg errors out it often leaves your teminal window +# in a state where you need to reset it) +./ffmpeg -no_cvt_hw -hwaccel drm -vcodec hevc -i jellyfish-3-mbps-hd-hevc-10bit.mkv -f vout_egl - + + + diff --git a/pi-util/clean_usr_libs.sh b/pi-util/clean_usr_libs.sh new file mode 100755 index 0000000000..01bd6a6a22 --- /dev/null +++ b/pi-util/clean_usr_libs.sh @@ -0,0 +1,42 @@ +set -e +U=/usr/include/arm-linux-gnueabihf +rm -rf $U/libavcodec +rm -rf $U/libavdevice +rm -rf $U/libavfilter +rm -rf $U/libavformat +rm -rf $U/libavutil +rm -rf $U/libswresample +rm -rf $U/libswscale +U=/usr/include/aarch64-linux-gnu +rm -rf $U/libavcodec +rm -rf $U/libavdevice +rm -rf $U/libavfilter +rm -rf $U/libavformat +rm -rf $U/libavutil +rm -rf $U/libswresample +rm -rf $U/libswscale +U=/usr/lib/arm-linux-gnueabihf +rm -f $U/libavcodec.* +rm -f $U/libavdevice.* +rm -f $U/libavfilter.* +rm -f $U/libavformat.* +rm -f $U/libavutil.* +rm -f $U/libswresample.* +rm -f $U/libswscale.* +U=/usr/lib/arm-linux-gnueabihf/neon/vfp +rm -f $U/libavcodec.* +rm -f $U/libavdevice.* +rm -f $U/libavfilter.* +rm -f $U/libavformat.* +rm -f $U/libavutil.* +rm -f $U/libswresample.* +rm -f $U/libswscale.* +U=/usr/lib/aarch64-linux-gnu +rm -f $U/libavcodec.* +rm -f $U/libavdevice.* +rm -f $U/libavfilter.* +rm -f $U/libavformat.* +rm -f $U/libavutil.* +rm -f $U/libswresample.* +rm -f $U/libswscale.* + diff --git a/pi-util/conf_arm64_native.sh b/pi-util/conf_arm64_native.sh new file mode 100644 index 0000000000..9e3bbfa190 --- /dev/null +++ b/pi-util/conf_arm64_native.sh @@ -0,0 +1,45 @@ +echo "Configure for ARM64 native build" + +#RPI_KEEPS="-save-temps=obj" + +SHARED_LIBS="--enable-shared" +if [ "$1" == "--noshared" ]; then + SHARED_LIBS="--disable-shared" + echo Static libs + OUT=out/arm64-static-rel +else + echo Shared libs + OUT=out/arm64-shared-rel +fi + +mkdir -p $OUT +cd $OUT + +A=aarch64-linux-gnu +USR_PREFIX=`pwd`/install +LIB_PREFIX=$USR_PREFIX/lib/$A +INC_PREFIX=$USR_PREFIX/include/$A + +../../configure \ + --prefix=$USR_PREFIX\ + --libdir=$LIB_PREFIX\ + --incdir=$INC_PREFIX\ + --disable-stripping\ + --disable-thumb\ + --disable-mmal\ + --enable-sand\ + --enable-v4l2-request\ + --enable-libdrm\ + --enable-epoxy\ + --enable-libudev\ + --enable-vout-drm\ + --enable-vout-egl\ + $SHARED_LIBS\ + --extra-cflags="-ggdb" + +# --enable-decoder=hevc_rpi\ +# --enable-extra-warnings\ +# --arch=armv71\ + +# gcc option for getting asm listing +# -Wa,-ahls diff --git a/pi-util/conf_h265.2016.csv b/pi-util/conf_h265.2016.csv new file mode 100644 index 0000000000..4efd5d1c67 --- /dev/null +++ b/pi-util/conf_h265.2016.csv @@ -0,0 +1,195 @@ +1,HEVC_v1/AMP_A_Samsung_7,AMP_A_Samsung_7.bin,AMP_A_Samsung_7.md5,8 +1,HEVC_v1/AMP_B_Samsung_7,AMP_B_Samsung_7.bin,AMP_B_Samsung_7.md5,8 +1,HEVC_v1/AMP_D_Hisilicon_3,AMP_D_Hisilicon.bit,AMP_D_Hisilicon_3.yuv.md5,8 +1,HEVC_v1/AMP_E_Hisilicon_3,AMP_E_Hisilicon.bit,AMP_E_Hisilicon_3.yuv.md5,8 +1,HEVC_v1/AMP_F_Hisilicon_3,AMP_F_Hisilicon_3.bit,AMP_F_Hisilicon_3.yuv.md5,8 +1,HEVC_v1/AMVP_A_MTK_4,AMVP_A_MTK_4.bit,AMVP_A_MTK_4.md5,8 +1,HEVC_v1/AMVP_B_MTK_4,AMVP_B_MTK_4.bit,AMVP_B_MTK_4.md5,8 +1,HEVC_v1/AMVP_C_Samsung_7,AMVP_C_Samsung_7.bin,AMVP_C_Samsung_7.md5,8 +1,HEVC_v1/BUMPING_A_ericsson_1,BUMPING_A_ericsson_1.bit,BUMPING_A_ericsson_1.md5,8 +1,HEVC_v1/CAINIT_A_SHARP_4,CAINIT_A_SHARP_4.bit,CAINIT_A_SHARP_4.md5,8 +1,HEVC_v1/CAINIT_B_SHARP_4,CAINIT_B_SHARP_4.bit,CAINIT_B_SHARP_4.md5,8 +1,HEVC_v1/CAINIT_C_SHARP_3,CAINIT_C_SHARP_3.bit,CAINIT_C_SHARP_3.md5,8 +1,HEVC_v1/CAINIT_D_SHARP_3,CAINIT_D_SHARP_3.bit,CAINIT_D_SHARP_3.md5,8 +1,HEVC_v1/CAINIT_E_SHARP_3,CAINIT_E_SHARP_3.bit,CAINIT_E_SHARP_3.md5,8 +1,HEVC_v1/CAINIT_F_SHARP_3,CAINIT_F_SHARP_3.bit,CAINIT_F_SHARP_3.md5,8 +1,HEVC_v1/CAINIT_G_SHARP_3,CAINIT_G_SHARP_3.bit,CAINIT_G_SHARP_3.md5,8 +1,HEVC_v1/CAINIT_H_SHARP_3,CAINIT_H_SHARP_3.bit,CAINIT_H_SHARP_3.md5,8 +1,HEVC_v1/CIP_A_Panasonic_3,CIP_A_Panasonic_3.bit,CIP_A_Panasonic_3_yuv.md5,8 +1,HEVC_v1/cip_B_NEC_3,cip_B_NEC_3.bit,cip_B_NEC_3.md5,8 +1,HEVC_v1/CIP_C_Panasonic_2,CIP_C_Panasonic_2.bit,CIP_C_Panasonic_2_yuv.md5,8 +1,HEVC_v1/CONFWIN_A_Sony_1,CONFWIN_A_Sony_1.bit,CONFWIN_A_Sony_1.md5,8 +1,HEVC_v1/DBLK_A_MAIN10_VIXS_4,DBLK_A_MAIN10_VIXS_4.bit,DBLK_A_MAIN10_VIXS_4.md5,10 +1,HEVC_v1/DBLK_A_SONY_3,DBLK_A_SONY_3.bit,DBLK_A_SONY_3.bit.yuv.md5,8 +1,HEVC_v1/DBLK_B_SONY_3,DBLK_B_SONY_3.bit,DBLK_B_SONY_3.bit.yuv.md5,8 +1,HEVC_v1/DBLK_C_SONY_3,DBLK_C_SONY_3.bit,DBLK_C_SONY_3.bit.yuv.md5,8 +1,HEVC_v1/DBLK_D_VIXS_2,DBLK_D_VIXS_2.bit,DBLK_D_VIXS_2_yuv.md5,8 +1,HEVC_v1/DBLK_E_VIXS_2,DBLK_E_VIXS_2.bit,DBLK_E_VIXS_2_yuv.md5,8 +1,HEVC_v1/DBLK_F_VIXS_2,DBLK_F_VIXS_2.bit,DBLK_F_VIXS_2_yuv.md5,8 +1,HEVC_v1/DBLK_G_VIXS_2,DBLK_G_VIXS_2.bit,DBLK_G_VIXS_2_yuv.md5,8 +1,HEVC_v1/DELTAQP_A_BRCM_4,DELTAQP_A_BRCM_4.bit,DELTAQP_A_BRCM_4_yuv.md5,8 +1,HEVC_v1/DELTAQP_B_SONY_3,DELTAQP_B_SONY_3.bit,DELTAQP_B_SONY_3.bit.yuv.md5,8 +1,HEVC_v1/DELTAQP_C_SONY_3,DELTAQP_C_SONY_3.bit,DELTAQP_C_SONY_3.bit.yuv.md5,8 +1,HEVC_v1/DSLICE_A_HHI_5,DSLICE_A_HHI_5.bin,DSLICE_A_HHI_5.md5,8 +1,HEVC_v1/DSLICE_B_HHI_5,DSLICE_B_HHI_5.bin,DSLICE_B_HHI_5.md5,8 +1,HEVC_v1/DSLICE_C_HHI_5,DSLICE_C_HHI_5.bin,DSLICE_C_HHI_5.md5,8 +1,HEVC_v1/ENTP_A_QUALCOMM_1,ENTP_A_Qualcomm_1.bit,ENTP_A_Qualcomm_1.md5,8 +1,HEVC_v1/ENTP_B_Qualcomm_1,ENTP_B_Qualcomm_1.bit,ENTP_B_Qualcomm_1.md5,8 +1,HEVC_v1/ENTP_C_Qualcomm_1,ENTP_C_Qualcomm_1.bit,ENTP_C_Qualcomm_1.md5,8 +1,HEVC_v1/EXT_A_ericsson_4,EXT_A_ericsson_4.bit,EXT_A_ericsson_4.md5,8 +1,HEVC_v1/FILLER_A_Sony_1,FILLER_A_Sony_1.bit,FILLER_A_Sony_1.md5,8 +1,HEVC_v1/HRD_A_Fujitsu_3,HRD_A_Fujitsu_3.bin,HRD_A_Fujitsu_3.md5,8 +1,HEVC_v1/INITQP_A_Sony_1,INITQP_A_Sony_1.bit,INITQP_A_Sony_1.md5,8 +1,HEVC_v1/INITQP_B_Main10_Sony_1,INITQP_B_Main10_Sony_1.bit,INITQP_B_Main10_Sony_1.md5,10 +1,HEVC_v1/ipcm_A_NEC_3,ipcm_A_NEC_3.bit,ipcm_A_NEC_3.md5,8 +1,HEVC_v1/ipcm_B_NEC_3,ipcm_B_NEC_3.bit,ipcm_B_NEC_3.md5,8 +1,HEVC_v1/ipcm_C_NEC_3,ipcm_C_NEC_3.bit,ipcm_C_NEC_3.md5,8 +1,HEVC_v1/ipcm_D_NEC_3,ipcm_D_NEC_3.bit,ipcm_D_NEC_3.md5,8 +1,HEVC_v1/ipcm_E_NEC_2,ipcm_E_NEC_2.bit,ipcm_E_NEC_2.md5,8 +1,HEVC_v1/IPRED_A_docomo_2,IPRED_A_docomo_2.bit,IPRED_A_docomo_2.md5,8 +1,HEVC_v1/IPRED_B_Nokia_3,IPRED_B_Nokia_3.bit,IPRED_B_Nokia_3_yuv.md5,8 +1,HEVC_v1/IPRED_C_Mitsubishi_3,IPRED_C_Mitsubishi_3.bit,IPRED_C_Mitsubishi_3_yuv.md5,8 +1,HEVC_v1/LS_A_Orange_2,LS_A_Orange_2.bit,LS_A_Orange_2_yuv.md5,8 +1,HEVC_v1/LS_B_Orange_4,LS_B_Orange_4.bit,LS_B_Orange_4_yuv.md5,8 +1,HEVC_v1/LTRPSPS_A_Qualcomm_1,LTRPSPS_A_Qualcomm_1.bit,LTRPSPS_A_Qualcomm_1.md5,8 +1,HEVC_v1/MAXBINS_A_TI_5,MAXBINS_A_TI_5.bit,MAXBINS_A_TI_5_yuv.md5,8 +1,HEVC_v1/MAXBINS_B_TI_5,MAXBINS_B_TI_5.bit,MAXBINS_B_TI_5_yuv.md5,8 +1,HEVC_v1/MAXBINS_C_TI_5,MAXBINS_C_TI_5.bit,MAXBINS_C_TI_5_yuv.md5,8 +1,HEVC_v1/MERGE_A_TI_3,MERGE_A_TI_3.bit,MERGE_A_TI_3.md5,8 +1,HEVC_v1/MERGE_B_TI_3,MERGE_B_TI_3.bit,MERGE_B_TI_3.md5,8 +1,HEVC_v1/MERGE_C_TI_3,MERGE_C_TI_3.bit,MERGE_C_TI_3.md5,8 +1,HEVC_v1/MERGE_D_TI_3,MERGE_D_TI_3.bit,MERGE_D_TI_3.md5,8 +1,HEVC_v1/MERGE_E_TI_3,MERGE_E_TI_3.bit,MERGE_E_TI_3.md5,8 +1,HEVC_v1/MERGE_F_MTK_4,MERGE_F_MTK_4.bit,MERGE_F_MTK_4.md5,8 +1,HEVC_v1/MERGE_G_HHI_4,MERGE_G_HHI_4.bit,MERGE_G_HHI_4.md5,8 +1,HEVC_v1/MVCLIP_A_qualcomm_3,MVCLIP_A_qualcomm_3.bit,MVCLIP_A_qualcomm_3.yuv.md5,8 +1,HEVC_v1/MVDL1ZERO_A_docomo_4,MVDL1ZERO_A_docomo_4.bit,MVDL1ZERO_A_docomo_4.md5,8 +1,HEVC_v1/MVEDGE_A_qualcomm_3,MVEDGE_A_qualcomm_3.bit,MVEDGE_A_qualcomm_3.yuv.md5,8 +1,HEVC_v1/NoOutPrior_A_Qualcomm_1,NoOutPrior_A_Qualcomm_1.bit,NoOutPrior_A_Qualcomm_1.md5,8 +1,HEVC_v1/NoOutPrior_B_Qualcomm_1,NoOutPrior_B_Qualcomm_1.bit,NoOutPrior_B_Qualcomm_1.md5,8 +1,HEVC_v1/NUT_A_ericsson_5,NUT_A_ericsson_5.bit,NUT_A_ericsson_5.md5,8 +1,HEVC_v1/OPFLAG_A_Qualcomm_1,OPFLAG_A_Qualcomm_1.bit,OPFLAG_A_Qualcomm_1.md5,8 +1,HEVC_v1/OPFLAG_B_Qualcomm_1,OPFLAG_B_Qualcomm_1.bit,OPFLAG_B_Qualcomm_1.md5,8 +1,HEVC_v1/OPFLAG_C_Qualcomm_1,OPFLAG_C_Qualcomm_1.bit,OPFLAG_C_Qualcomm_1.md5,8 +1,HEVC_v1/PICSIZE_A_Bossen_1,PICSIZE_A_Bossen_1.bin,PICSIZE_A_Bossen_1.md5,8 +1,HEVC_v1/PICSIZE_B_Bossen_1,PICSIZE_B_Bossen_1.bin,PICSIZE_B_Bossen_1.md5,8 +1,HEVC_v1/PICSIZE_C_Bossen_1,PICSIZE_C_Bossen_1.bin,PICSIZE_C_Bossen_1.md5,8 +1,HEVC_v1/PICSIZE_D_Bossen_1,PICSIZE_D_Bossen_1.bin,PICSIZE_D_Bossen_1.md5,8 +1,HEVC_v1/PMERGE_A_TI_3,PMERGE_A_TI_3.bit,PMERGE_A_TI_3.md5,8 +1,HEVC_v1/PMERGE_B_TI_3,PMERGE_B_TI_3.bit,PMERGE_B_TI_3.md5,8 +1,HEVC_v1/PMERGE_C_TI_3,PMERGE_C_TI_3.bit,PMERGE_C_TI_3.md5,8 +1,HEVC_v1/PMERGE_D_TI_3,PMERGE_D_TI_3.bit,PMERGE_D_TI_3.md5,8 +1,HEVC_v1/PMERGE_E_TI_3,PMERGE_E_TI_3.bit,PMERGE_E_TI_3.md5,8 +1,HEVC_v1/POC_A_Bossen_3,POC_A_Bossen_3.bin,POC_A_Bossen_3.md5,8 +1,HEVC_v1/PPS_A_qualcomm_7,PPS_A_qualcomm_7.bit,PPS_A_qualcomm_7.yuv.md5,8 +1,HEVC_v1/PS_B_VIDYO_3,PS_B_VIDYO_3.bit,PS_B_VIDYO_3_yuv.md5,8 +1,HEVC_v1/RAP_A_docomo_6,RAP_A_docomo_6.bit,RAP_A_docomo_6.md5,8 +1,HEVC_v1/RAP_B_Bossen_2,RAP_B_Bossen_2.bit,RAP_B_Bossen_2.md5,8 +1,HEVC_v1/RPLM_A_qualcomm_4,RPLM_A_qualcomm_4.bit,RPLM_A_qualcomm_4.yuv.md5,8 +1,HEVC_v1/RPLM_B_qualcomm_4,RPLM_B_qualcomm_4.bit,RPLM_B_qualcomm_4.yuv.md5,8 +1,HEVC_v1/RPS_A_docomo_5,RPS_A_docomo_5.bit,RPS_A_docomo_5.md5,8 +1,HEVC_v1/RPS_B_qualcomm_5,RPS_B_qualcomm_5.bit,RPS_B_qualcomm_5.yuv.md5,8 +1,HEVC_v1/RPS_C_ericsson_5,RPS_C_ericsson_5.bit,RPS_C_ericsson_5.md5,8 +1,HEVC_v1/RPS_D_ericsson_6,RPS_D_ericsson_6.bit,RPS_D_ericsson_6.md5,8 +1,HEVC_v1/RPS_E_qualcomm_5,RPS_E_qualcomm_5.bit,RPS_E_qualcomm_5.yuv.md5,8 +1,HEVC_v1/RPS_F_docomo_2,RPS_F_docomo_2.bit,RPS_F_docomo_2.md5,8 +1,HEVC_v1/RQT_A_HHI_4,RQT_A_HHI_4.bit,RQT_A_HHI_4.md5,8 +1,HEVC_v1/RQT_B_HHI_4,RQT_B_HHI_4.bit,RQT_B_HHI_4.md5,8 +1,HEVC_v1/RQT_C_HHI_4,RQT_C_HHI_4.bit,RQT_C_HHI_4.md5,8 +1,HEVC_v1/RQT_D_HHI_4,RQT_D_HHI_4.bit,RQT_D_HHI_4.md5,8 +1,HEVC_v1/RQT_E_HHI_4,RQT_E_HHI_4.bit,RQT_E_HHI_4.md5,8 +1,HEVC_v1/RQT_F_HHI_4,RQT_F_HHI_4.bit,RQT_F_HHI_4.md5,8 +1,HEVC_v1/RQT_G_HHI_4,RQT_G_HHI_4.bit,RQT_G_HHI_4.md5,8 +1,HEVC_v1/SAO_A_MediaTek_4,SAO_A_MediaTek_4.bit,SAO_A_MediaTek_4.md5,8 +1,HEVC_v1/SAO_B_MediaTek_5,SAO_B_MediaTek_5.bit,SAO_B_MediaTek_5.md5,8 +1,HEVC_v1/SAO_C_Samsung_5,SAO_C_Samsung_5.bin,SAO_C_Samsung_5.md5,8 +1,HEVC_v1/SAO_D_Samsung_5,SAO_D_Samsung_5.bin,SAO_D_Samsung_5.md5,8 +1,HEVC_v1/SAO_E_Canon_4,SAO_E_Canon_4.bit,SAO_E_Canon_4.md5,8 +1,HEVC_v1/SAO_F_Canon_3,SAO_F_Canon_3.bit,SAO_F_Canon_3.md5,8 +1,HEVC_v1/SAO_G_Canon_3,SAO_G_Canon_3.bit,SAO_G_Canon_3.md5,8 +1,HEVC_v1/SAO_H_Parabola_1,SAO_H_Parabola_1.bit,SAO_H_Parabola_1.md5,8 +1,HEVC_v1/SAODBLK_A_MainConcept_4,SAODBLK_A_MainConcept_4.bin,SAODBLK_A_MainConcept_4_md5.txt,8 +1,HEVC_v1/SAODBLK_B_MainConcept_4,SAODBLK_B_MainConcept_4.bin,SAODBLK_B_MainConcept_4_md5.txt,8 +1,HEVC_v1/SDH_A_Orange_4,SDH_A_Orange_4.bit,SDH_A_Orange_4_yuv.md5,8 +1,HEVC_v1/SLICES_A_Rovi_3,SLICES_A_Rovi_3.bin,SLICES_A_Rovi_3.md5,8 +1,HEVC_v1/SLIST_A_Sony_5,SLIST_A_Sony_5.bin,SLIST_A_Sony_5_yuv.md5,8 +1,HEVC_v1/SLIST_B_Sony_9,SLIST_B_Sony_9.bin,SLIST_B_Sony_9_yuv.md5,8 +1,HEVC_v1/SLIST_C_Sony_4,SLIST_C_Sony_4.bin,SLIST_C_Sony_4_yuv.md5,8 +1,HEVC_v1/SLIST_D_Sony_9,str.bin,SLIST_D_Sony_9_yuv.md5,8 +1,HEVC_v1/SLPPLP_A_VIDYO_2,SLPPLP_A_VIDYO_2.bit,SLPPLP_A_VIDYO_2_yuv.md5,8 +1,HEVC_v1/STRUCT_A_Samsung_7,STRUCT_A_Samsung_7.bin,STRUCT_A_Samsung_7.md5,8 +1,HEVC_v1/STRUCT_B_Samsung_7,STRUCT_B_Samsung_7.bin,STRUCT_B_Samsung_7.md5,8 +1,HEVC_v1/TILES_A_Cisco_2,TILES_A_Cisco_2.bin,TILES_A_Cisco_2_yuv.md5,8 +1,HEVC_v1/TILES_B_Cisco_1,TILES_B_Cisco_1.bin,TILES_B_Cisco_1_yuv.md5,8 +1,HEVC_v1/TMVP_A_MS_3,TMVP_A_MS_3.bit,TMVP_A_MS_3.yuv.md5,8 +1,HEVC_v1/TSCL_A_VIDYO_5,TSCL_A_VIDYO_5.bit,TSCL_A_VIDYO_5_yuv.md5,8 +1,HEVC_v1/TSCL_B_VIDYO_4,TSCL_B_VIDYO_4.bit,TSCL_B_VIDYO_4_yuv.md5,8 +1,HEVC_v1/TSKIP_A_MS_3,TSKIP_A_MS_3.bit,TSKIP_A_MS_3.yuv.md5,8 +3,HEVC_v1/TSUNEQBD_A_MAIN10_Technicolor_2,TSUNEQBD_A_MAIN10_Technicolor_2.bit,TSUNEQBD_A_MAIN10_Technicolor_2_yuv.md5, # unequal bit depth,10 +1,HEVC_v1/TUSIZE_A_Samsung_1,TUSIZE_A_Samsung_1.bin,TUSIZE_A_Samsung_1.md5,8 +1,HEVC_v1/VPSID_A_VIDYO_2,VPSID_A_VIDYO_2.bit,VPSID_A_VIDYO_2_yuv.md5,8 +3,HEVC_v1/VPSSPSPPS_A_MainConcept_1,VPSSPSPPS_A_MainConcept_1.bin,VPSSPSPPS_A_MainConcept_1_md5.txt, # ???,8 +1,HEVC_v1/WP_A_MAIN10_Toshiba_3,WP_A_MAIN10_Toshiba_3.bit,WP_A_MAIN10_Toshiba_3_yuv.md5,10 +1,HEVC_v1/WP_A_Toshiba_3,WP_A_Toshiba_3.bit,WP_A_Toshiba_3_yuv.md5,8 +1,HEVC_v1/WP_B_Toshiba_3,WP_B_Toshiba_3.bit,WP_B_Toshiba_3_yuv.md5,8 +1,HEVC_v1/WP_MAIN10_B_Toshiba_3,WP_MAIN10_B_Toshiba_3.bit,WP_MAIN10_B_Toshiba_3_yuv.md5,10 +1,HEVC_v1/WPP_A_ericsson_MAIN10_2,WPP_A_ericsson_MAIN10_2.bit,WPP_A_ericsson_MAIN10_yuv.md5,10 +1,HEVC_v1/WPP_A_ericsson_MAIN_2,WPP_A_ericsson_MAIN_2.bit,WPP_A_ericsson_MAIN_2_yuv.md5,8 +1,HEVC_v1/WPP_B_ericsson_MAIN10_2,WPP_B_ericsson_MAIN10_2.bit,WPP_B_ericsson_MAIN10_yuv.md5,10 +1,HEVC_v1/WPP_B_ericsson_MAIN_2,WPP_B_ericsson_MAIN_2.bit,WPP_B_ericsson_MAIN_2_yuv.md5,8 +1,HEVC_v1/WPP_C_ericsson_MAIN10_2,WPP_C_ericsson_MAIN10_2.bit,WPP_C_ericsson_MAIN10_yuv.md5,10 +1,HEVC_v1/WPP_C_ericsson_MAIN_2,WPP_C_ericsson_MAIN_2.bit,WPP_C_ericsson_MAIN_2_yuv.md5,8 +1,HEVC_v1/WPP_D_ericsson_MAIN10_2,WPP_D_ericsson_MAIN10_2.bit,WPP_D_ericsson_MAIN10_yuv.md5,10 +1,HEVC_v1/WPP_D_ericsson_MAIN_2,WPP_D_ericsson_MAIN_2.bit,WPP_D_ericsson_MAIN_2_yuv.md5,8 +1,HEVC_v1/WPP_E_ericsson_MAIN10_2,WPP_E_ericsson_MAIN10_2.bit,WPP_E_ericsson_MAIN10_yuv.md5,10 +1,HEVC_v1/WPP_E_ericsson_MAIN_2,WPP_E_ericsson_MAIN_2.bit,WPP_E_ericsson_MAIN_2_yuv.md5,8 +1,HEVC_v1/WPP_F_ericsson_MAIN10_2,WPP_F_ericsson_MAIN10_2.bit,WPP_F_ericsson_MAIN10_yuv.md5,10 +1,HEVC_v1/WPP_F_ericsson_MAIN_2,WPP_F_ericsson_MAIN_2.bit,WPP_F_ericsson_MAIN_2_yuv.md5,8 +1,RExt/ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_2,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_2.bit,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_yuv_2.md5,0 +0,RExt/Bitdepth_A_RExt_Sony_1,Bitdepth_A_RExt_Sony_1.bin,md5sum.txt,8 +0,RExt/Bitdepth_B_RExt_Sony_1,Bitdepth_B_RExt_Sony_1.bin,md5sum.txt,8 +0,RExt/CCP_10bit_RExt_QCOM,CCP_10bit_RExt_QCOM.bin,CCP_10bit_RExt_QCOM_md5sum.txt,10 +0,RExt/CCP_12bit_RExt_QCOM,CCP_12bit_RExt_QCOM.bin,CCP_12bit_RExt_QCOM_md5sum.txt,8 +0,RExt/CCP_8bit_RExt_QCOM,CCP_8bit_RExt_QCOM.bin,CCP_8bit_RExt_QCOM_md5sum.txt,8 +1,RExt/ExplicitRdpcm_A_BBC_1,ExplicitRdpcm_A_BBC_1.bit,md5sum.txt,0 +0,RExt/ExplicitRdpcm_B_BBC_2,ExplicitRdpcm_B_BBC_1.bit,md5sum.txt,8 +0,RExt/EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_10BIT_RExt_Sony_1,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_10BIT_RExt_Sony_1.bit,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_10BIT_RExt_Sony_1.md5,10 +0,RExt/EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_12BIT_RExt_Sony_1,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_12BIT_RExt_Sony_1.bit,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_12BIT_RExt_Sony_1.md5,8 +0,RExt/EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_16BIT_RExt_Sony_1,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_16BIT_RExt_Sony_1.bit,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_16BIT_RExt_Sony_1.md5,8 +0,RExt/EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_8BIT_RExt_Sony_1,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_8BIT_RExt_Sony_1.bit,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_8BIT_RExt_Sony_1.md5,8 +0,RExt/EXTPREC_MAIN_444_16_INTRA_10BIT_RExt_Sony_1,EXTPREC_MAIN_444_16_INTRA_10BIT_RExt_Sony_1.bit,EXTPREC_MAIN_444_16_INTRA_10BIT_RExt_Sony_1.md5,10 +0,RExt/EXTPREC_MAIN_444_16_INTRA_12BIT_RExt_Sony_1,EXTPREC_MAIN_444_16_INTRA_12BIT_RExt_Sony_1.bit,EXTPREC_MAIN_444_16_INTRA_12BIT_RExt_Sony_1.md5,8 +0,RExt/EXTPREC_MAIN_444_16_INTRA_16BIT_RExt_Sony_1,EXTPREC_MAIN_444_16_INTRA_16BIT_RExt_Sony_1.bit,EXTPREC_MAIN_444_16_INTRA_16BIT_RExt_Sony_1.md5,8 +0,RExt/EXTPREC_MAIN_444_16_INTRA_8BIT_RExt_Sony_1,EXTPREC_MAIN_444_16_INTRA_8BIT_RExt_Sony_1.bit,EXTPREC_MAIN_444_16_INTRA_8BIT_RExt_Sony_1.md5,8 +1,RExt/GENERAL_10b_420_RExt_Sony_1,GENERAL_10b_420_RExt_Sony_1.bit,GENERAL_10b_420_RExt_Sony_1.md5,10 +1,RExt/GENERAL_10b_422_RExt_Sony_1,GENERAL_10b_422_RExt_Sony_1.bit,GENERAL_10b_422_RExt_Sony_1.md5,0 +1,RExt/GENERAL_10b_444_RExt_Sony_2,GENERAL_10b_444_RExt_Sony_2.bit,GENERAL_10b_444_RExt_Sony_2.md5,0 +1,RExt/GENERAL_12b_400_RExt_Sony_1,GENERAL_12b_400_RExt_Sony_1.bit,GENERAL_12b_400_RExt_Sony_1.md5,0 +1,RExt/GENERAL_12b_420_RExt_Sony_1,GENERAL_12b_420_RExt_Sony_1.bit,GENERAL_12b_420_RExt_Sony_1.md5,0 +1,RExt/GENERAL_12b_422_RExt_Sony_1,GENERAL_12b_422_RExt_Sony_1.bit,GENERAL_12b_422_RExt_Sony_1.md5,0 +1,RExt/GENERAL_12b_444_RExt_Sony_2,GENERAL_12b_444_RExt_Sony_2.bit,GENERAL_12b_444_RExt_Sony_2.md5,0 +0,RExt/GENERAL_16b_400_RExt_Sony_1,GENERAL_16b_400_RExt_Sony_1.bit,GENERAL_16b_400_RExt_Sony_1.md5,0 +0,RExt/GENERAL_16b_444_highThroughput_RExt_Sony_2,GENERAL_16b_444_highThroughput_RExt_Sony_2.bit,GENERAL_16b_444_highThroughput_RExt_Sony_2.md5,8 +0,RExt/GENERAL_16b_444_RExt_Sony_2,GENERAL_16b_444_RExt_Sony_2.bit,GENERAL_16b_444_RExt_Sony_2.md5,8 +1,RExt/GENERAL_8b_400_RExt_Sony_1,GENERAL_8b_400_RExt_Sony_1.bit,GENERAL_8b_400_RExt_Sony_1.md5,0 +1,RExt/GENERAL_8b_420_RExt_Sony_1,GENERAL_8b_420_RExt_Sony_1.bit,GENERAL_8b_420_RExt_Sony_1.md5,8 +1,RExt/GENERAL_8b_444_RExt_Sony_2,GENERAL_8b_444_RExt_Sony_2.bit,GENERAL_8b_444_RExt_Sony_2.md5,0 +1,RExt/IPCM_A_RExt_NEC_2,IPCM_A_RExt_NEC_2.bit,IPCM_A_RExt_NEC_2_yuv.md5,0 +1,RExt/IPCM_B_RExt_NEC,IPCM_B_RExt_NEC.bit,IPCM_B_RExt_NEC_yuv.md5,0 +1,RExt/Main_422_10_A_RExt_Sony_2,Main_422_10_A_RExt_Sony_2.bin,md5sum.txt,0 +1,RExt/Main_422_10_B_RExt_Sony_2,Main_422_10_B_RExt_Sony_2.bin,md5sum.txt,0 +1,RExt/PERSIST_RPARAM_A_RExt_Sony_3,PERSIST_RPARAM_A_RExt_Sony_3.bit,PERSIST_RPARAM_A_RExt_Sony_3.md5,0 +1,RExt/QMATRIX_A_RExt_Sony_1,QMATRIX_A_RExt_Sony_1.bit,QMATRIX_A_RExt_Sony_1.md5,0 +0,RExt/SAO_A_RExt_MediaTek_1,SAO_A_RExt_MediaTek_1.bit,SAO_A_RExt_MediaTek_1.md5, # Runs out of memory - could be fixed,8 +0,RExt/TSCTX_10bit_I_RExt_SHARP_1,TSCTX_10bit_I_RExt_SHARP_1.bin,TSCTX_10bit_I_RExt_SHARP_1.md5,10 +0,RExt/TSCTX_10bit_RExt_SHARP_1,TSCTX_10bit_RExt_SHARP_1.bin,TSCTX_10bit_RExt_SHARP_1.md5,10 +0,RExt/TSCTX_12bit_I_RExt_SHARP_1,TSCTX_12bit_I_RExt_SHARP_1.bin,TSCTX_12bit_I_RExt_SHARP_1.md5,8 +0,RExt/TSCTX_12bit_RExt_SHARP_1,TSCTX_12bit_RExt_SHARP_1.bin,TSCTX_12bit_RExt_SHARP_1.md5,8 +0,RExt/TSCTX_8bit_I_RExt_SHARP_1,TSCTX_8bit_I_RExt_SHARP_1.bin,TSCTX_8bit_I_RExt_SHARP_1.md5,8 +0,RExt/TSCTX_8bit_RExt_SHARP_1,TSCTX_8bit_RExt_SHARP_1.bin,TSCTX_8bit_RExt_SHARP_1.md5,8 +0,RExt/WAVETILES_RExt_Sony_2,WAVETILES_RExt_Sony_2.bit,WAVETILES_RExt_Sony_2.md5,8 +1,local/sao_cu16_mobile_344x280,sao_cu16_mobile_344x280.265,sao_cu16_mobile_344x280.md5,8 +1,local/dblk_cu16_mobile_344x280,dblk_cu16_mobile_344x280.265,dblk_cu16_mobile_344x280.md5,8 +1,local/dblksao_cu16_mobile_344x280,dblksao_cu16_mobile_344x280.265,dblksao_cu16_mobile_344x280.md5,8 +1,local/dblk_pu32_horses_832x448,dblk_pu32_horses_832x448.265,dblk_pu32_horses_832x448.md5,8 +1,local/intra_pred_21_laps,intra_pred_21_laps.265,intra_pred_21_laps.md5,8 diff --git a/pi-util/conf_h265.2016_HEVC_v1.csv b/pi-util/conf_h265.2016_HEVC_v1.csv new file mode 100644 index 0000000000..6082641271 --- /dev/null +++ b/pi-util/conf_h265.2016_HEVC_v1.csv @@ -0,0 +1,147 @@ +1,AMP_A_Samsung_7,AMP_A_Samsung_7.bin,AMP_A_Samsung_7.md5 +1,AMP_B_Samsung_7,AMP_B_Samsung_7.bin,AMP_B_Samsung_7.md5 +1,AMP_D_Hisilicon_3,AMP_D_Hisilicon.bit,AMP_D_Hisilicon_3.yuv.md5 +1,AMP_E_Hisilicon_3,AMP_E_Hisilicon.bit,AMP_E_Hisilicon_3.yuv.md5 +1,AMP_F_Hisilicon_3,AMP_F_Hisilicon_3.bit,AMP_F_Hisilicon_3.yuv.md5 +1,AMVP_A_MTK_4,AMVP_A_MTK_4.bit,AMVP_A_MTK_4.md5 +1,AMVP_B_MTK_4,AMVP_B_MTK_4.bit,AMVP_B_MTK_4.md5 +1,AMVP_C_Samsung_7,AMVP_C_Samsung_7.bin,AMVP_C_Samsung_7.md5 +1,BUMPING_A_ericsson_1,BUMPING_A_ericsson_1.bit,BUMPING_A_ericsson_1.md5 +1,CAINIT_A_SHARP_4,CAINIT_A_SHARP_4.bit,CAINIT_A_SHARP_4.md5 +1,CAINIT_B_SHARP_4,CAINIT_B_SHARP_4.bit,CAINIT_B_SHARP_4.md5 +1,CAINIT_C_SHARP_3,CAINIT_C_SHARP_3.bit,CAINIT_C_SHARP_3.md5 +1,CAINIT_D_SHARP_3,CAINIT_D_SHARP_3.bit,CAINIT_D_SHARP_3.md5 +1,CAINIT_E_SHARP_3,CAINIT_E_SHARP_3.bit,CAINIT_E_SHARP_3.md5 +1,CAINIT_F_SHARP_3,CAINIT_F_SHARP_3.bit,CAINIT_F_SHARP_3.md5 +1,CAINIT_G_SHARP_3,CAINIT_G_SHARP_3.bit,CAINIT_G_SHARP_3.md5 +1,CAINIT_H_SHARP_3,CAINIT_H_SHARP_3.bit,CAINIT_H_SHARP_3.md5 +1,CIP_A_Panasonic_3,CIP_A_Panasonic_3.bit,CIP_A_Panasonic_3_yuv.md5 +1,cip_B_NEC_3,cip_B_NEC_3.bit,cip_B_NEC_3.md5 +1,CIP_C_Panasonic_2,CIP_C_Panasonic_2.bit,CIP_C_Panasonic_2_yuv.md5 +1,CONFWIN_A_Sony_1,CONFWIN_A_Sony_1.bit,CONFWIN_A_Sony_1.md5 +1,DBLK_A_MAIN10_VIXS_4,DBLK_A_MAIN10_VIXS_4.bit,DBLK_A_MAIN10_VIXS_4.md5 +1,DBLK_A_SONY_3,DBLK_A_SONY_3.bit,DBLK_A_SONY_3.bit.yuv.md5 +1,DBLK_B_SONY_3,DBLK_B_SONY_3.bit,DBLK_B_SONY_3.bit.yuv.md5 +1,DBLK_C_SONY_3,DBLK_C_SONY_3.bit,DBLK_C_SONY_3.bit.yuv.md5 +1,DBLK_D_VIXS_2,DBLK_D_VIXS_2.bit,DBLK_D_VIXS_2_yuv.md5 +1,DBLK_E_VIXS_2,DBLK_E_VIXS_2.bit,DBLK_E_VIXS_2_yuv.md5 +1,DBLK_F_VIXS_2,DBLK_F_VIXS_2.bit,DBLK_F_VIXS_2_yuv.md5 +1,DBLK_G_VIXS_2,DBLK_G_VIXS_2.bit,DBLK_G_VIXS_2_yuv.md5 +1,DELTAQP_A_BRCM_4,DELTAQP_A_BRCM_4.bit,DELTAQP_A_BRCM_4_yuv.md5 +1,DELTAQP_B_SONY_3,DELTAQP_B_SONY_3.bit,DELTAQP_B_SONY_3.bit.yuv.md5 +1,DELTAQP_C_SONY_3,DELTAQP_C_SONY_3.bit,DELTAQP_C_SONY_3.bit.yuv.md5 +1,DSLICE_A_HHI_5,DSLICE_A_HHI_5.bin,DSLICE_A_HHI_5.md5 +1,DSLICE_B_HHI_5,DSLICE_B_HHI_5.bin,DSLICE_B_HHI_5.md5 +1,DSLICE_C_HHI_5,DSLICE_C_HHI_5.bin,DSLICE_C_HHI_5.md5 +1,ENTP_A_QUALCOMM_1,ENTP_A_Qualcomm_1.bit,ENTP_A_Qualcomm_1.md5 +1,ENTP_B_Qualcomm_1,ENTP_B_Qualcomm_1.bit,ENTP_B_Qualcomm_1.md5 +1,ENTP_C_Qualcomm_1,ENTP_C_Qualcomm_1.bit,ENTP_C_Qualcomm_1.md5 +1,EXT_A_ericsson_4,EXT_A_ericsson_4.bit,EXT_A_ericsson_4.md5 +1,FILLER_A_Sony_1,FILLER_A_Sony_1.bit,FILLER_A_Sony_1.md5 +1,HRD_A_Fujitsu_3,HRD_A_Fujitsu_3.bin,HRD_A_Fujitsu_3.md5 +1,INITQP_A_Sony_1,INITQP_A_Sony_1.bit,INITQP_A_Sony_1.md5 +1,INITQP_B_Main10_Sony_1,INITQP_B_Main10_Sony_1.bit,INITQP_B_Main10_Sony_1.md5 +1,ipcm_A_NEC_3,ipcm_A_NEC_3.bit,ipcm_A_NEC_3.md5 +1,ipcm_B_NEC_3,ipcm_B_NEC_3.bit,ipcm_B_NEC_3.md5 +1,ipcm_C_NEC_3,ipcm_C_NEC_3.bit,ipcm_C_NEC_3.md5 +1,ipcm_D_NEC_3,ipcm_D_NEC_3.bit,ipcm_D_NEC_3.md5 +1,ipcm_E_NEC_2,ipcm_E_NEC_2.bit,ipcm_E_NEC_2.md5 +1,IPRED_A_docomo_2,IPRED_A_docomo_2.bit,IPRED_A_docomo_2.md5 +1,IPRED_B_Nokia_3,IPRED_B_Nokia_3.bit,IPRED_B_Nokia_3_yuv.md5 +1,IPRED_C_Mitsubishi_3,IPRED_C_Mitsubishi_3.bit,IPRED_C_Mitsubishi_3_yuv.md5 +1,LS_A_Orange_2,LS_A_Orange_2.bit,LS_A_Orange_2_yuv.md5 +1,LS_B_Orange_4,LS_B_Orange_4.bit,LS_B_Orange_4_yuv.md5 +1,LTRPSPS_A_Qualcomm_1,LTRPSPS_A_Qualcomm_1.bit,LTRPSPS_A_Qualcomm_1.md5 +1,MAXBINS_A_TI_5,MAXBINS_A_TI_5.bit,MAXBINS_A_TI_5_yuv.md5 +1,MAXBINS_B_TI_5,MAXBINS_B_TI_5.bit,MAXBINS_B_TI_5_yuv.md5 +1,MAXBINS_C_TI_5,MAXBINS_C_TI_5.bit,MAXBINS_C_TI_5_yuv.md5 +1,MERGE_A_TI_3,MERGE_A_TI_3.bit,MERGE_A_TI_3.md5 +1,MERGE_B_TI_3,MERGE_B_TI_3.bit,MERGE_B_TI_3.md5 +1,MERGE_C_TI_3,MERGE_C_TI_3.bit,MERGE_C_TI_3.md5 +1,MERGE_D_TI_3,MERGE_D_TI_3.bit,MERGE_D_TI_3.md5 +1,MERGE_E_TI_3,MERGE_E_TI_3.bit,MERGE_E_TI_3.md5 +1,MERGE_F_MTK_4,MERGE_F_MTK_4.bit,MERGE_F_MTK_4.md5 +1,MERGE_G_HHI_4,MERGE_G_HHI_4.bit,MERGE_G_HHI_4.md5 +1,MVCLIP_A_qualcomm_3,MVCLIP_A_qualcomm_3.bit,MVCLIP_A_qualcomm_3.yuv.md5 +1,MVDL1ZERO_A_docomo_4,MVDL1ZERO_A_docomo_4.bit,MVDL1ZERO_A_docomo_4.md5 +1,MVEDGE_A_qualcomm_3,MVEDGE_A_qualcomm_3.bit,MVEDGE_A_qualcomm_3.yuv.md5 +1,NoOutPrior_A_Qualcomm_1,NoOutPrior_A_Qualcomm_1.bit,NoOutPrior_A_Qualcomm_1.md5 +1,NoOutPrior_B_Qualcomm_1,NoOutPrior_B_Qualcomm_1.bit,NoOutPrior_B_Qualcomm_1.md5 +1,NUT_A_ericsson_5,NUT_A_ericsson_5.bit,NUT_A_ericsson_5.md5 +1,OPFLAG_A_Qualcomm_1,OPFLAG_A_Qualcomm_1.bit,OPFLAG_A_Qualcomm_1.md5 +1,OPFLAG_B_Qualcomm_1,OPFLAG_B_Qualcomm_1.bit,OPFLAG_B_Qualcomm_1.md5 +1,OPFLAG_C_Qualcomm_1,OPFLAG_C_Qualcomm_1.bit,OPFLAG_C_Qualcomm_1.md5 +1,PICSIZE_A_Bossen_1,PICSIZE_A_Bossen_1.bin,PICSIZE_A_Bossen_1.md5 +1,PICSIZE_B_Bossen_1,PICSIZE_B_Bossen_1.bin,PICSIZE_B_Bossen_1.md5 +1,PICSIZE_C_Bossen_1,PICSIZE_C_Bossen_1.bin,PICSIZE_C_Bossen_1.md5 +1,PICSIZE_D_Bossen_1,PICSIZE_D_Bossen_1.bin,PICSIZE_D_Bossen_1.md5 +1,PMERGE_A_TI_3,PMERGE_A_TI_3.bit,PMERGE_A_TI_3.md5 +1,PMERGE_B_TI_3,PMERGE_B_TI_3.bit,PMERGE_B_TI_3.md5 +1,PMERGE_C_TI_3,PMERGE_C_TI_3.bit,PMERGE_C_TI_3.md5 +1,PMERGE_D_TI_3,PMERGE_D_TI_3.bit,PMERGE_D_TI_3.md5 +1,PMERGE_E_TI_3,PMERGE_E_TI_3.bit,PMERGE_E_TI_3.md5 +1,POC_A_Bossen_3,POC_A_Bossen_3.bin,POC_A_Bossen_3.md5 +1,PPS_A_qualcomm_7,PPS_A_qualcomm_7.bit,PPS_A_qualcomm_7.yuv.md5 +1,PS_B_VIDYO_3,PS_B_VIDYO_3.bit,PS_B_VIDYO_3_yuv.md5 +1,RAP_A_docomo_6,RAP_A_docomo_6.bit,RAP_A_docomo_6.md5 +1,RAP_B_Bossen_2,RAP_B_Bossen_2.bit,RAP_B_Bossen_2.md5 +1,RPLM_A_qualcomm_4,RPLM_A_qualcomm_4.bit,RPLM_A_qualcomm_4.yuv.md5 +1,RPLM_B_qualcomm_4,RPLM_B_qualcomm_4.bit,RPLM_B_qualcomm_4.yuv.md5 +1,RPS_A_docomo_5,RPS_A_docomo_5.bit,RPS_A_docomo_5.md5 +1,RPS_B_qualcomm_5,RPS_B_qualcomm_5.bit,RPS_B_qualcomm_5.yuv.md5 +1,RPS_C_ericsson_5,RPS_C_ericsson_5.bit,RPS_C_ericsson_5.md5 +1,RPS_D_ericsson_6,RPS_D_ericsson_6.bit,RPS_D_ericsson_6.md5 +1,RPS_E_qualcomm_5,RPS_E_qualcomm_5.bit,RPS_E_qualcomm_5.yuv.md5 +1,RPS_F_docomo_2,RPS_F_docomo_2.bit,RPS_F_docomo_2.md5 +1,RQT_A_HHI_4,RQT_A_HHI_4.bit,RQT_A_HHI_4.md5 +1,RQT_B_HHI_4,RQT_B_HHI_4.bit,RQT_B_HHI_4.md5 +1,RQT_C_HHI_4,RQT_C_HHI_4.bit,RQT_C_HHI_4.md5 +1,RQT_D_HHI_4,RQT_D_HHI_4.bit,RQT_D_HHI_4.md5 +1,RQT_E_HHI_4,RQT_E_HHI_4.bit,RQT_E_HHI_4.md5 +1,RQT_F_HHI_4,RQT_F_HHI_4.bit,RQT_F_HHI_4.md5 +1,RQT_G_HHI_4,RQT_G_HHI_4.bit,RQT_G_HHI_4.md5 +1,SAO_A_MediaTek_4,SAO_A_MediaTek_4.bit,SAO_A_MediaTek_4.md5 +1,SAO_B_MediaTek_5,SAO_B_MediaTek_5.bit,SAO_B_MediaTek_5.md5 +1,SAO_C_Samsung_5,SAO_C_Samsung_5.bin,SAO_C_Samsung_5.md5 +1,SAO_D_Samsung_5,SAO_D_Samsung_5.bin,SAO_D_Samsung_5.md5 +1,SAO_E_Canon_4,SAO_E_Canon_4.bit,SAO_E_Canon_4.md5 +1,SAO_F_Canon_3,SAO_F_Canon_3.bit,SAO_F_Canon_3.md5 +1,SAO_G_Canon_3,SAO_G_Canon_3.bit,SAO_G_Canon_3.md5 +1,SAO_H_Parabola_1,SAO_H_Parabola_1.bit,SAO_H_Parabola_1.md5 +2,SAODBLK_A_MainConcept_4,SAODBLK_A_MainConcept_4.bin,SAODBLK_A_MainConcept_4_md5.txt +2,SAODBLK_B_MainConcept_4,SAODBLK_B_MainConcept_4.bin,SAODBLK_B_MainConcept_4_md5.txt +1,SDH_A_Orange_4,SDH_A_Orange_4.bit,SDH_A_Orange_4_yuv.md5 +1,SLICES_A_Rovi_3,SLICES_A_Rovi_3.bin,SLICES_A_Rovi_3.md5 +1,SLIST_A_Sony_5,SLIST_A_Sony_5.bin,SLIST_A_Sony_5_yuv.md5 +1,SLIST_B_Sony_9,SLIST_B_Sony_9.bin,SLIST_B_Sony_9_yuv.md5 +1,SLIST_C_Sony_4,SLIST_C_Sony_4.bin,SLIST_C_Sony_4_yuv.md5 +1,SLIST_D_Sony_9,str.bin,SLIST_D_Sony_9_yuv.md5 +1,SLPPLP_A_VIDYO_2,SLPPLP_A_VIDYO_2.bit,SLPPLP_A_VIDYO_2_yuv.md5 +1,STRUCT_A_Samsung_7,STRUCT_A_Samsung_7.bin,STRUCT_A_Samsung_7.md5 +1,STRUCT_B_Samsung_7,STRUCT_B_Samsung_7.bin,STRUCT_B_Samsung_7.md5 +1,TILES_A_Cisco_2,TILES_A_Cisco_2.bin,TILES_A_Cisco_2_yuv.md5 +1,TILES_B_Cisco_1,TILES_B_Cisco_1.bin,TILES_B_Cisco_1_yuv.md5 +1,TMVP_A_MS_3,TMVP_A_MS_3.bit,TMVP_A_MS_3.yuv.md5 +1,TSCL_A_VIDYO_5,TSCL_A_VIDYO_5.bit,TSCL_A_VIDYO_5_yuv.md5 +1,TSCL_B_VIDYO_4,TSCL_B_VIDYO_4.bit,TSCL_B_VIDYO_4_yuv.md5 +1,TSKIP_A_MS_3,TSKIP_A_MS_3.bit,TSKIP_A_MS_3.yuv.md5 +3,TSUNEQBD_A_MAIN10_Technicolor_2,TSUNEQBD_A_MAIN10_Technicolor_2.bit,TSUNEQBD_A_MAIN10_Technicolor_2_yuv.md5, # unequal bit depth +1,TUSIZE_A_Samsung_1,TUSIZE_A_Samsung_1.bin,TUSIZE_A_Samsung_1.md5 +1,VPSID_A_VIDYO_2,VPSID_A_VIDYO_2.bit,VPSID_A_VIDYO_2_yuv.md5 +3,VPSSPSPPS_A_MainConcept_1,VPSSPSPPS_A_MainConcept_1.bin,VPSSPSPPS_A_MainConcept_1_md5.txt, # ??? +1,WP_A_MAIN10_Toshiba_3,WP_A_MAIN10_Toshiba_3.bit,WP_A_MAIN10_Toshiba_3_yuv.md5 +1,WP_A_Toshiba_3,WP_A_Toshiba_3.bit,WP_A_Toshiba_3_yuv.md5 +1,WP_B_Toshiba_3,WP_B_Toshiba_3.bit,WP_B_Toshiba_3_yuv.md5 +1,WP_MAIN10_B_Toshiba_3,WP_MAIN10_B_Toshiba_3.bit,WP_MAIN10_B_Toshiba_3_yuv.md5 +1,WPP_A_ericsson_MAIN10_2,WPP_A_ericsson_MAIN10_2.bit,WPP_A_ericsson_MAIN10_yuv.md5 +1,WPP_A_ericsson_MAIN_2,WPP_A_ericsson_MAIN_2.bit,WPP_A_ericsson_MAIN_2_yuv.md5 +1,WPP_B_ericsson_MAIN10_2,WPP_B_ericsson_MAIN10_2.bit,WPP_B_ericsson_MAIN10_yuv.md5 +1,WPP_B_ericsson_MAIN_2,WPP_B_ericsson_MAIN_2.bit,WPP_B_ericsson_MAIN_2_yuv.md5 +1,WPP_C_ericsson_MAIN10_2,WPP_C_ericsson_MAIN10_2.bit,WPP_C_ericsson_MAIN10_yuv.md5 +1,WPP_C_ericsson_MAIN_2,WPP_C_ericsson_MAIN_2.bit,WPP_C_ericsson_MAIN_2_yuv.md5 +1,WPP_D_ericsson_MAIN10_2,WPP_D_ericsson_MAIN10_2.bit,WPP_D_ericsson_MAIN10_yuv.md5 +1,WPP_D_ericsson_MAIN_2,WPP_D_ericsson_MAIN_2.bit,WPP_D_ericsson_MAIN_2_yuv.md5 +1,WPP_E_ericsson_MAIN10_2,WPP_E_ericsson_MAIN10_2.bit,WPP_E_ericsson_MAIN10_yuv.md5 +1,WPP_E_ericsson_MAIN_2,WPP_E_ericsson_MAIN_2.bit,WPP_E_ericsson_MAIN_2_yuv.md5 +1,WPP_F_ericsson_MAIN10_2,WPP_F_ericsson_MAIN10_2.bit,WPP_F_ericsson_MAIN10_yuv.md5 +1,WPP_F_ericsson_MAIN_2,WPP_F_ericsson_MAIN_2.bit,WPP_F_ericsson_MAIN_2_yuv.md5 diff --git a/pi-util/conf_h265.csv b/pi-util/conf_h265.csv new file mode 100644 index 0000000000..fc14f2a3c2 --- /dev/null +++ b/pi-util/conf_h265.csv @@ -0,0 +1,144 @@ +1,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_1,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_1.bit,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_1.md5 +1,AMP_A_Samsung_6,AMP_A_Samsung_6.bin,AMP_A_Samsung_6.md5 +1,AMP_B_Samsung_6,AMP_B_Samsung_6.bin,AMP_B_Samsung_6.md5 +1,AMP_D_Hisilicon_3,AMP_D_Hisilicon.bit,AMP_D_Hisilicon_3.yuv.md5 +1,AMP_E_Hisilicon_3,AMP_E_Hisilicon.bit,AMP_E_Hisilicon_3.yuv.md5 +1,AMP_F_Hisilicon_3,AMP_F_Hisilicon_3.bit,AMP_F_Hisilicon_3.yuv.md5 +1,AMVP_A_MTK_4,AMVP_A_MTK_4.bit,AMVP_A_MTK_4.md5 +1,AMVP_B_MTK_4,AMVP_B_MTK_4.bit,AMVP_B_MTK_4.md5 +1,AMVP_C_Samsung_6,AMVP_C_Samsung_6.bin,AMVP_C_Samsung_6.md5 +1,BUMPING_A_ericsson_1,BUMPING_A_ericsson_1.bit,BUMPING_A_ericsson_1.md5 +1,CAINIT_A_SHARP_4,CAINIT_A_SHARP_4.bit,CAINIT_A_SHARP_4.md5 +1,CAINIT_B_SHARP_4,CAINIT_B_SHARP_4.bit,CAINIT_B_SHARP_4.md5 +1,CAINIT_C_SHARP_3,CAINIT_C_SHARP_3.bit,CAINIT_C_SHARP_3.md5 +1,CAINIT_D_SHARP_3,CAINIT_D_SHARP_3.bit,CAINIT_D_SHARP_3.md5 +1,CAINIT_E_SHARP_3,CAINIT_E_SHARP_3.bit,CAINIT_E_SHARP_3.md5 +1,CAINIT_F_SHARP_3,CAINIT_F_SHARP_3.bit,CAINIT_F_SHARP_3.md5 +1,CAINIT_G_SHARP_3,CAINIT_G_SHARP_3.bit,CAINIT_G_SHARP_3.md5 +1,CAINIT_H_SHARP_3,CAINIT_H_SHARP_3.bit,CAINIT_H_SHARP_3.md5 +1,CIP_A_Panasonic_3,CIP_A_Panasonic_3.bit,CIP_A_Panasonic_3_yuv.md5 +1,cip_B_NEC_3,cip_B_NEC_3.bit,cip_B_NEC_3.md5 +1,CIP_C_Panasonic_2,CIP_C_Panasonic_2.bit,CIP_C_Panasonic_2_yuv.md5 +1,CONFWIN_A_Sony_1,CONFWIN_A_Sony_1.bit,CONFWIN_A_Sony_1.md5 +1,DBLK_A_MAIN10_VIXS_3,DBLK_A_MAIN10_VIXS_3.bit,DBLK_A_MAIN10_VIXS_3.md5 +1,DBLK_A_SONY_3,DBLK_A_SONY_3.bit,DBLK_A_SONY_3.bit.yuv.md5 +1,DBLK_B_SONY_3,DBLK_B_SONY_3.bit,DBLK_B_SONY_3.bit.yuv.md5 +1,DBLK_C_SONY_3,DBLK_C_SONY_3.bit,DBLK_C_SONY_3.bit.yuv.md5 +1,DBLK_D_VIXS_2,DBLK_D_VIXS_2.bit,DBLK_D_VIXS_2_yuv.md5 +1,DBLK_E_VIXS_2,DBLK_E_VIXS_2.bit,DBLK_E_VIXS_2_yuv.md5 +1,DBLK_F_VIXS_2,DBLK_F_VIXS_2.bit,DBLK_F_VIXS_2_yuv.md5 +1,DBLK_G_VIXS_2,DBLK_G_VIXS_2.bit,DBLK_G_VIXS_2_yuv.md5 +1,DELTAQP_A_BRCM_4,DELTAQP_A_BRCM_4.bit,DELTAQP_A_BRCM_4_yuv.md5 +1,DELTAQP_B_SONY_3,DELTAQP_B_SONY_3.bit,DELTAQP_B_SONY_3.bit.yuv.md5 +1,DELTAQP_C_SONY_3,DELTAQP_C_SONY_3.bit,DELTAQP_C_SONY_3.bit.yuv.md5 +1,DSLICE_A_HHI_5,DSLICE_A_HHI_5.bin,DSLICE_A_HHI_5.md5 +1,DSLICE_B_HHI_5,DSLICE_B_HHI_5.bin,DSLICE_B_HHI_5.md5 +1,DSLICE_C_HHI_5,DSLICE_C_HHI_5.bin,DSLICE_C_HHI_5.md5 +1,ENTP_A_QUALCOMM_1,ENTP_A_Qualcomm_1.bit,ENTP_A_Qualcomm_1.md5 +1,ENTP_B_Qualcomm_1,ENTP_B_Qualcomm_1.bit,ENTP_B_Qualcomm_1.md5 +1,ENTP_C_Qualcomm_1,ENTP_C_Qualcomm_1.bit,ENTP_C_Qualcomm_1.md5 +1,EXT_A_ericsson_4,EXT_A_ericsson_4.bit,EXT_A_ericsson_4.md5 +1,FILLER_A_Sony_1,FILLER_A_Sony_1.bit,FILLER_A_Sony_1.md5 +1,HRD_A_Fujitsu_3,HRD_A_Fujitsu_3.bin,HRD_A_Fujitsu_3.md5 +1,INITQP_A_Sony_1,INITQP_A_Sony_1.bit,INITQP_A_Sony_1.md5 +1,INITQP_B_Main10_Sony_1,INITQP_B_Main10_Sony_1.bit,INITQP_B_Main10_Sony_1.md5 +1,ipcm_A_NEC_3,ipcm_A_NEC_3.bit,ipcm_A_NEC_3.md5 +1,ipcm_B_NEC_3,ipcm_B_NEC_3.bit,ipcm_B_NEC_3.md5 +1,ipcm_C_NEC_3,ipcm_C_NEC_3.bit,ipcm_C_NEC_3.md5 +1,ipcm_D_NEC_3,ipcm_D_NEC_3.bit,ipcm_D_NEC_3.md5 +1,ipcm_E_NEC_2,ipcm_E_NEC_2.bit,ipcm_E_NEC_2.md5 +1,IPRED_A_docomo_2,IPRED_A_docomo_2.bit,IPRED_A_docomo_2.md5 +1,IPRED_B_Nokia_3,IPRED_B_Nokia_3.bit,IPRED_B_Nokia_3_yuv.md5 +1,IPRED_C_Mitsubishi_3,IPRED_C_Mitsubishi_3.bit,IPRED_C_Mitsubishi_3_yuv.md5 +1,LS_A_Orange_2,LS_A_Orange_2.bit,LS_A_Orange_2_yuv.md5 +1,LS_B_Orange_4,LS_B_Orange_4.bit,LS_B_Orange_4_yuv.md5 +1,LTRPSPS_A_Qualcomm_1,LTRPSPS_A_Qualcomm_1.bit,LTRPSPS_A_Qualcomm_1.md5 +1,MAXBINS_A_TI_4,MAXBINS_A_TI_4.bit,MAXBINS_A_TI_4.md5 +1,MAXBINS_B_TI_4,MAXBINS_B_TI_4.bit,MAXBINS_B_TI_4.md5 +1,MAXBINS_C_TI_4,MAXBINS_C_TI_4.bit,MAXBINS_C_TI_4.md5 +1,MERGE_A_TI_3,MERGE_A_TI_3.bit,MERGE_A_TI_3.md5 +1,MERGE_B_TI_3,MERGE_B_TI_3.bit,MERGE_B_TI_3.md5 +1,MERGE_C_TI_3,MERGE_C_TI_3.bit,MERGE_C_TI_3.md5 +1,MERGE_D_TI_3,MERGE_D_TI_3.bit,MERGE_D_TI_3.md5 +1,MERGE_E_TI_3,MERGE_E_TI_3.bit,MERGE_E_TI_3.md5 +1,MERGE_F_MTK_4,MERGE_F_MTK_4.bit,MERGE_F_MTK_4.md5 +1,MERGE_G_HHI_4,MERGE_G_HHI_4.bit,MERGE_G_HHI_4.md5 +1,MVCLIP_A_qualcomm_3,MVCLIP_A_qualcomm_3.bit,MVCLIP_A_qualcomm_3.yuv.md5 +1,MVDL1ZERO_A_docomo_4,MVDL1ZERO_A_docomo_4.bit,MVDL1ZERO_A_docomo_4.md5 +1,MVEDGE_A_qualcomm_3,MVEDGE_A_qualcomm_3.bit,MVEDGE_A_qualcomm_3.yuv.md5 +1,NoOutPrior_A_Qualcomm_1,NoOutPrior_A_Qualcomm_1.bit,NoOutPrior_A_Qualcomm_1.md5 +1,NoOutPrior_B_Qualcomm_1,NoOutPrior_B_Qualcomm_1.bit,NoOutPrior_B_Qualcomm_1.md5 +1,NUT_A_ericsson_5,NUT_A_ericsson_5.bit,NUT_A_ericsson_5.md5 +1,OPFLAG_A_Qualcomm_1,OPFLAG_A_Qualcomm_1.bit,OPFLAG_A_Qualcomm_1.md5 +1,OPFLAG_B_Qualcomm_1,OPFLAG_B_Qualcomm_1.bit,OPFLAG_B_Qualcomm_1.md5 +1,OPFLAG_C_Qualcomm_1,OPFLAG_C_Qualcomm_1.bit,OPFLAG_C_Qualcomm_1.md5 +1,PICSIZE_A_Bossen_1,PICSIZE_A_Bossen_1.bin,PICSIZE_A_Bossen_1.md5 +1,PICSIZE_B_Bossen_1,PICSIZE_B_Bossen_1.bin,PICSIZE_B_Bossen_1.md5 +1,PICSIZE_C_Bossen_1,PICSIZE_C_Bossen_1.bin,PICSIZE_C_Bossen_1.md5 +1,PICSIZE_D_Bossen_1,PICSIZE_D_Bossen_1.bin,PICSIZE_D_Bossen_1.md5 +1,PMERGE_A_TI_3,PMERGE_A_TI_3.bit,PMERGE_A_TI_3.md5 +1,PMERGE_B_TI_3,PMERGE_B_TI_3.bit,PMERGE_B_TI_3.md5 +1,PMERGE_C_TI_3,PMERGE_C_TI_3.bit,PMERGE_C_TI_3.md5 +1,PMERGE_D_TI_3,PMERGE_D_TI_3.bit,PMERGE_D_TI_3.md5 +1,PMERGE_E_TI_3,PMERGE_E_TI_3.bit,PMERGE_E_TI_3.md5 +1,POC_A_Bossen_3,POC_A_Bossen_3.bin,POC_A_Bossen_3.md5 +1,PPS_A_qualcomm_7,PPS_A_qualcomm_7.bit,PPS_A_qualcomm_7.yuv.md5 +1,PS_B_VIDYO_3,PS_B_VIDYO_3.bit,PS_B_VIDYO_3_yuv.md5 +1,RAP_A_docomo_6,RAP_A_docomo_6.bit,RAP_A_docomo_6.md5 +1,RAP_B_Bossen_2,RAP_B_Bossen_2.bit,RAP_B_Bossen_2.md5 +1,RPLM_A_qualcomm_4,RPLM_A_qualcomm_4.bit,RPLM_A_qualcomm_4.yuv.md5 +1,RPLM_B_qualcomm_4,RPLM_B_qualcomm_4.bit,RPLM_B_qualcomm_4.yuv.md5 +1,RPS_A_docomo_5,RPS_A_docomo_5.bit,RPS_A_docomo_5.md5 +1,RPS_B_qualcomm_5,RPS_B_qualcomm_5.bit,RPS_B_qualcomm_5.yuv.md5 +1,RPS_C_ericsson_5,RPS_C_ericsson_5.bit,RPS_C_ericsson_5.md5 +1,RPS_D_ericsson_6,RPS_D_ericsson_6.bit,RPS_D_ericsson_6.md5 +1,RPS_E_qualcomm_5,RPS_E_qualcomm_5.bit,RPS_E_qualcomm_5.yuv.md5 +1,RPS_F_docomo_2,RPS_F_docomo_2.bit,RPS_F_docomo_2.md5 +1,RQT_A_HHI_4,RQT_A_HHI_4.bit,RQT_A_HHI_4.md5 +1,RQT_B_HHI_4,RQT_B_HHI_4.bit,RQT_B_HHI_4.md5 +1,RQT_C_HHI_4,RQT_C_HHI_4.bit,RQT_C_HHI_4.md5 +1,RQT_D_HHI_4,RQT_D_HHI_4.bit,RQT_D_HHI_4.md5 +1,RQT_E_HHI_4,RQT_E_HHI_4.bit,RQT_E_HHI_4.md5 +1,RQT_F_HHI_4,RQT_F_HHI_4.bit,RQT_F_HHI_4.md5 +1,RQT_G_HHI_4,RQT_G_HHI_4.bit,RQT_G_HHI_4.md5 +1,SAO_A_MediaTek_4,SAO_A_MediaTek_4.bit,SAO_A_MediaTek_4.md5 +1,SAO_B_MediaTek_5,SAO_B_MediaTek_5.bit,SAO_B_MediaTek_5.md5 +1,SAO_C_Samsung_5,SAO_C_Samsung_5.bin,SAO_C_Samsung_5.md5 +1,SAO_D_Samsung_5,SAO_D_Samsung_5.bin,SAO_D_Samsung_5.md5 +1,SAO_E_Canon_4,SAO_E_Canon_4.bit,SAO_E_Canon_4.md5 +1,SAO_F_Canon_3,SAO_F_Canon_3.bit,SAO_F_Canon_3.md5 +1,SAO_G_Canon_3,SAO_G_Canon_3.bit,SAO_G_Canon_3.md5 +1,SDH_A_Orange_4,SDH_A_Orange_4.bit,SDH_A_Orange_4_yuv.md5 +1,SLICES_A_Rovi_3,SLICES_A_Rovi_3.bin,SLICES_A_Rovi_3.md5 +1,SLIST_A_Sony_4,str.bin,SLIST_A_Sony_4_yuv.md5 +1,SLIST_B_Sony_8,str.bin,SLIST_B_Sony_8_yuv.md5 +1,SLIST_C_Sony_3,str.bin,SLIST_C_Sony_3_yuv.md5 +1,SLIST_D_Sony_9,str.bin,SLIST_D_Sony_9_yuv.md5 +1,SLPPLP_A_VIDYO_2,SLPPLP_A_VIDYO_2.bit,SLPPLP_A_VIDYO_2_yuv.md5 +1,STRUCT_A_Samsung_6,STRUCT_A_Samsung_6.bin,STRUCT_A_Samsung_6.md5 +1,STRUCT_B_Samsung_6,STRUCT_B_Samsung_6.bin,STRUCT_B_Samsung_6.md5 +1,TILES_A_Cisco_2,TILES_A_Cisco_2.bin,TILES_A_Cisco_2_yuv.md5 +1,TILES_B_Cisco_1,TILES_B_Cisco_1.bin,TILES_B_Cisco_1_yuv.md5 +1,TMVP_A_MS_3,TMVP_A_MS_3.bit,TMVP_A_MS_3.yuv.md5 +1,TSCL_A_VIDYO_5,TSCL_A_VIDYO_5.bit,TSCL_A_VIDYO_5_yuv.md5 +1,TSCL_B_VIDYO_4,TSCL_B_VIDYO_4.bit,TSCL_B_VIDYO_4_yuv.md5 +1,TSKIP_A_MS_3,TSKIP_A_MS_3.bit,TSKIP_A_MS_3.yuv.md5 +0,TSUNEQBD_A_MAIN10_Technicolor_2,TSUNEQBD_A_MAIN10_Technicolor_2.bit,TSUNEQBD_A_MAIN10_Technicolor_2_yuv.md5, # Y/C bit depth unmatched +1,TUSIZE_A_Samsung_1,TUSIZE_A_Samsung_1.bin,TUSIZE_A_Samsung_1.md5 +1,VPSID_A_VIDYO_2,VPSID_A_VIDYO_2.bit,VPSID_A_VIDYO_2_yuv.md5 +1,WP_A_MAIN10_Toshiba_3,WP_A_MAIN10_Toshiba_3.bit,WP_A_MAIN10_Toshiba_3_yuv.md5 +1,WP_A_Toshiba_3,WP_A_Toshiba_3.bit,WP_A_Toshiba_3_yuv.md5 +1,WP_B_Toshiba_3,WP_B_Toshiba_3.bit,WP_B_Toshiba_3_yuv.md5 +1,WP_MAIN10_B_Toshiba_3,WP_MAIN10_B_Toshiba_3.bit,WP_MAIN10_B_Toshiba_3_yuv.md5 +1,WPP_A_ericsson_MAIN10_2,WPP_A_ericsson_MAIN10_2.bit,WPP_A_ericsson_MAIN10_yuv.md5 +1,WPP_A_ericsson_MAIN_2,WPP_A_ericsson_MAIN_2.bit,WPP_A_ericsson_MAIN_2_yuv.md5 +1,WPP_B_ericsson_MAIN10_2,WPP_B_ericsson_MAIN10_2.bit,WPP_B_ericsson_MAIN10_yuv.md5 +1,WPP_B_ericsson_MAIN_2,WPP_B_ericsson_MAIN_2.bit,WPP_B_ericsson_MAIN_2_yuv.md5 +1,WPP_C_ericsson_MAIN10_2,WPP_C_ericsson_MAIN10_2.bit,WPP_C_ericsson_MAIN10_yuv.md5 +1,WPP_C_ericsson_MAIN_2,WPP_C_ericsson_MAIN_2.bit,WPP_C_ericsson_MAIN_2_yuv.md5 +1,WPP_D_ericsson_MAIN10_2,WPP_D_ericsson_MAIN10_2.bit,WPP_D_ericsson_MAIN10_yuv.md5 +1,WPP_D_ericsson_MAIN_2,WPP_D_ericsson_MAIN_2.bit,WPP_D_ericsson_MAIN_2_yuv.md5 +1,WPP_E_ericsson_MAIN10_2,WPP_E_ericsson_MAIN10_2.bit,WPP_E_ericsson_MAIN10_yuv.md5 +1,WPP_E_ericsson_MAIN_2,WPP_E_ericsson_MAIN_2.bit,WPP_E_ericsson_MAIN_2_yuv.md5 +1,WPP_F_ericsson_MAIN10_2,WPP_F_ericsson_MAIN10_2.bit,WPP_F_ericsson_MAIN10_yuv.md5 +1,WPP_F_ericsson_MAIN_2,WPP_F_ericsson_MAIN_2.bit,WPP_F_ericsson_MAIN_2_yuv.md5 diff --git a/pi-util/conf_native.sh b/pi-util/conf_native.sh new file mode 100755 index 0000000000..1dbbcf154a --- /dev/null +++ b/pi-util/conf_native.sh @@ -0,0 +1,157 @@ +echo "Configure for native build" + +FFSRC=`pwd` +MC=`dpkg --print-architecture` +BUILDBASE=$FFSRC/out + +#RPI_KEEPS="-save-temps=obj" +RPI_KEEPS="" + +NOSHARED= +MMAL= +USR_PREFIX= +DO_MAKE= +DO_INSTALL= +INSTALL_SUDO= + +while [ "$1" != "" ] ; do + case $1 in + --noshared) + NOSHARED=1 + ;; + --mmal) + MMAL=1 + ;; + --usr) + INSTALL_SUDO=1 + USR_PREFIX=/usr + ;; + --make) + DO_MAKE=1 + ;; + --install) + DO_MAKE=1 + DO_INSTALL=1 + ;; + *) + echo "Usage $0: [--noshared] [--mmal] [--usr]" + echo " noshared Build static libs and executable - good for testing" + echo " mmal Build mmal decoders" + echo " usr Set install prefix to /usr [default=/install]" + echo " make Make after configure" + echo " install Make & install after configure - does sudo on install if --usr" + exit 1 + ;; + esac + shift +done + + +MCOPTS= +RPI_INCLUDES= +RPI_LIBDIRS= +RPI_DEFINES= +RPI_EXTRALIBS= + +# uname -m gives kernel type which may not have the same +# 32/64bitness as userspace :-( getconf shoudl provide the answer +# but use uname to check we are on the right processor +MC=`uname -m` +LB=`getconf LONG_BIT` +if [ "$MC" == "armv7l" ] || [ "$MC" == "aarch64" ]; then + if [ "$LB" == "32" ]; then + echo "M/C armv7" + A=arm-linux-gnueabihf + B=armv7 + MCOPTS="--arch=armv6t2 --cpu=cortex-a7" + RPI_DEFINES=-mfpu=neon-vfpv4 + elif [ "$LB" == "64" ]; then + echo "M/C aarch64" + A=aarch64-linux-gnu + B=arm64 + else + echo "Unknown LONG_BIT name: $LB" + exit 1 + fi +else + echo "Unknown machine name: $MC" + exit 1 +fi + +if [ $MMAL ]; then + RPI_OPT_VC=/opt/vc + RPI_INCLUDES="-I$RPI_OPT_VC/include -I$RPI_OPT_VC/include/interface/vcos/pthreads -I$RPI_OPT_VC/include/interface/vmcs_host/linux" + RPI_LIBDIRS="-L$RPI_OPT_VC/lib" + RPI_DEFINES="$RPI_DEFINES -D__VCCOREVER__=0x4000000" + RPI_EXTRALIBS="-Wl,--start-group -lbcm_host -lmmal -lmmal_util -lmmal_core -lvcos -lvcsm -lvchostif -lvchiq_arm -Wl,--end-group" + RPIOPTS="--enable-mmal" +else + RPIOPTS="--disable-mmal" +fi + +C=`lsb_release -sc` +V=`cat RELEASE` + +SHARED_LIBS="--enable-shared" +if [ $NOSHARED ]; then + SHARED_LIBS="--disable-shared" + OUT=$BUILDBASE/$B-$C-$V-static-rel + echo Static libs +else + echo Shared libs + OUT=$BUILDBASE/$B-$C-$V-shared-rel +fi + +if [ ! $USR_PREFIX ]; then + USR_PREFIX=$OUT/install +fi +LIB_PREFIX=$USR_PREFIX/lib/$A +INC_PREFIX=$USR_PREFIX/include/$A + +echo Destination directory: $OUT +mkdir -p $OUT +# Nothing under here need worry git - including this .gitignore! +echo "**" > $BUILDBASE/.gitignore +cd $OUT + +$FFSRC/configure \ + --prefix=$USR_PREFIX\ + --libdir=$LIB_PREFIX\ + --incdir=$INC_PREFIX\ + $MCOPTS\ + --disable-stripping\ + --disable-thumb\ + --enable-sand\ + --enable-v4l2-request\ + --enable-libdrm\ + --enable-vout-egl\ + --enable-vout-drm\ + --enable-gpl\ + $SHARED_LIBS\ + $RPIOPTS\ + --extra-cflags="-ggdb $RPI_KEEPS $RPI_DEFINES $RPI_INCLUDES"\ + --extra-cxxflags="$RPI_DEFINES $RPI_INCLUDES"\ + --extra-ldflags="$RPI_LIBDIRS"\ + --extra-libs="$RPI_EXTRALIBS"\ + --extra-version="rpi" + +echo "Configured into $OUT" + +if [ $DO_MAKE ]; then + echo "Making..." + make -j8 + echo "Made" +fi +if [ $DO_INSTALL ]; then + echo "Installing..." + if [ $INSTALL_SUDO ]; then + sudo make -j8 install + else + make -j8 install + fi + echo "Installed" +fi + + +# gcc option for getting asm listing +# -Wa,-ahls diff --git a/pi-util/ffconf.py b/pi-util/ffconf.py new file mode 100755 index 0000000000..657568014e --- /dev/null +++ b/pi-util/ffconf.py @@ -0,0 +1,215 @@ +#!/usr/bin/env python3 + +import string +import os +import subprocess +import re +import argparse +import sys +import csv +from stat import * + +CODEC_HEVC_RPI = 1 +HWACCEL_RPI = 2 +HWACCEL_DRM = 3 +HWACCEL_VAAPI = 4 + +def testone(fileroot, srcname, es_file, md5_file, pix, dectype, vcodec, ffmpeg_exec): + hwaccel = "" + if dectype == HWACCEL_RPI: + hwaccel = "rpi" + elif dectype == HWACCEL_DRM: + hwaccel = "drm" + elif dectype == HWACCEL_VAAPI: + hwaccel = "vaapi" + + pix_fmt = [] + if pix == "8": + pix_fmt = ["-pix_fmt", "yuv420p"] + elif pix == "10": + pix_fmt = ["-pix_fmt", "yuv420p10le"] + elif pix == "12": + pix_fmt = ["-pix_fmt", "yuv420p12le"] + + tmp_root = "/tmp" + + names = srcname.split('/') + while len(names) > 1: + tmp_root = os.path.join(tmp_root, names[0]) + del names[0] + name = names[0] + + if not os.path.exists(tmp_root): + os.makedirs(tmp_root) + + dec_file = os.path.join(tmp_root, name + ".dec.md5") + try: + os.remove(dec_file) + except: + pass + + flog = open(os.path.join(tmp_root, name + ".log"), "wt") + + ffargs = [ffmpeg_exec, "-flags", "unaligned", "-hwaccel", hwaccel, "-vcodec", "hevc", "-i", os.path.join(fileroot, es_file)] + pix_fmt + ["-f", "md5", dec_file] + + # Unaligned needed for cropping conformance + if hwaccel: + rstr = subprocess.call(ffargs, stdout=flog, stderr=subprocess.STDOUT) + else: + rstr = subprocess.call( + [ffmpeg_exec, "-flags", "unaligned", "-vcodec", vcodec, "-i", os.path.join(fileroot, es_file), "-f", "md5", dec_file], + stdout=flog, stderr=subprocess.STDOUT) + + try: + m1 = None + m2 = None + with open(os.path.join(fileroot, md5_file)) as f: + for line in f: + m1 = re.search("[0-9a-f]{32}", line.lower()) + if m1: + break + + with open(dec_file) as f: + m2 = re.search("[0-9a-f]{32}", f.readline()) + except: + pass + + if m1 and m2 and m1.group() == m2.group(): + print("Match: " + m1.group(), file=flog) + rv = 0 + elif not m1: + print("****** Cannot find m1", file=flog) + rv = 3 + elif not m2: + print("****** Cannot find m2", file=flog) + rv = 2 + else: + print("****** Mismatch: " + m1.group() + " != " + m2.group(), file=flog) + rv = 1 + flog.close() + return rv + +def scandir(root): + aconf = [] + ents = os.listdir(root) + ents.sort(key=str.lower) + for name in ents: + test_path = os.path.join(root, name) + if S_ISDIR(os.stat(test_path).st_mode): + files = os.listdir(test_path) + es_file = "?" + md5_file = "?" + for f in files: + (base, ext) = os.path.splitext(f) + if base[0] == '.': + pass + elif ext == ".bit" or ext == ".bin": + es_file = f + elif ext == ".md5" or (ext == ".txt" and (base[-4:] == "_md5" or base[-6:] == "md5sum")): + if md5_file == "?": + md5_file = f + elif base[-3:] == "yuv": + md5_file = f + aconf.append((1, name, es_file, md5_file)) + return aconf + +def runtest(name, tests): + if not tests: + return True + for t in tests: + if name[0:len(t)] == t or name.find("/" + t) != -1: + return True + return False + +def doconf(csva, tests, test_root, vcodec, dectype, ffmpeg_exec): + unx_failures = [] + unx_success = [] + failures = 0 + successes = 0 + for a in csva: + exp_test = int(a[0]) + if (exp_test and runtest(a[1], tests)): + name = a[1] + print ("==== ", name, end="") + sys.stdout.flush() + + rv = testone(os.path.join(test_root, name), name, a[2], a[3], a[4], dectype=dectype, vcodec=vcodec, ffmpeg_exec=ffmpeg_exec) + if (rv == 0): + successes += 1 + else: + failures += 1 + + if (rv == 0): + if exp_test == 2: + print(": * OK *") + unx_success.append(name) + else: + print(": ok") + elif exp_test == 2 and rv == 1: + print(": fail") + elif exp_test == 3 and rv == 2: + # Call an expected "crash" an abort + print(": abort") + else: + unx_failures.append(name) + if rv == 1: + print(": * FAIL *") + elif (rv == 2) : + print(": * CRASH *") + elif (rv == 3) : + print(": * MD5 MISSING *") + else : + print(": * BANG *") + + if unx_failures or unx_success: + print("Unexpected Failures:", unx_failures) + print("Unexpected Success: ", unx_success) + else: + print("All tests normal:", successes, "ok,", failures, "failed") + + +class ConfCSVDialect(csv.Dialect): + delimiter = ',' + doublequote = True + lineterminator = '\n' + quotechar='"' + quoting = csv.QUOTE_MINIMAL + skipinitialspace = True + strict = True + +if __name__ == '__main__': + + argp = argparse.ArgumentParser(description="FFmpeg h265 conformance tester") + argp.add_argument("tests", nargs='*') + argp.add_argument("--pi4", action='store_true', help="Force pi4 cmd line") + argp.add_argument("--drm", action='store_true', help="Force v4l2 drm cmd line") + argp.add_argument("--vaapi", action='store_true', help="Force vaapi cmd line") + argp.add_argument("--test_root", default="/opt/conform/h265.2016", help="Root dir for test") + argp.add_argument("--csvgen", action='store_true', help="Generate CSV file for dir") + argp.add_argument("--csv", default="pi-util/conf_h265.2016.csv", help="CSV filename") + argp.add_argument("--vcodec", default="hevc_rpi", help="vcodec name to use") + argp.add_argument("--ffmpeg", default="./ffmpeg", help="ffmpeg exec name") + args = argp.parse_args() + + if args.csvgen: + csv.writer(sys.stdout).writerows(scandir(args.test_root)) + exit(0) + + with open(args.csv, 'rt') as csvfile: + csva = [a for a in csv.reader(csvfile, ConfCSVDialect())] + + dectype = CODEC_HEVC_RPI + if os.path.exists("/dev/rpivid-hevcmem"): + dectype = HWACCEL_RPI + if args.drm or os.path.exists("/sys/module/rpivid_hevc"): + dectype = HWACCEL_DRM + + if args.pi4: + dectype = HWACCEL_RPI + elif args.drm: + dectype = HWACCEL_DRM + elif args.vaapi: + dectype = HWACCEL_VAAPI + + doconf(csva, args.tests, args.test_root, args.vcodec, dectype, args.ffmpeg) + diff --git a/pi-util/ffperf.py b/pi-util/ffperf.py new file mode 100755 index 0000000000..65c5224cd8 --- /dev/null +++ b/pi-util/ffperf.py @@ -0,0 +1,128 @@ +#!/usr/bin/env python3 + +import time +import string +import os +import tempfile +import subprocess +import re +import argparse +import sys +import csv +from stat import * + +class tstats: + close_threshold = 0.01 + + def __init__(self, stats_dict=None): + if stats_dict != None: + self.name = stats_dict["name"] + self.elapsed = float(stats_dict["elapsed"]) + self.user = float(stats_dict["user"]) + self.sys = float(stats_dict["sys"]) + + def times_str(self): + ctime = self.sys + self.user + return "time=%6.2f, cpu=%6.2f (%4.2f%%)" % (self.elapsed, ctime, (ctime * 100.0) / self.elapsed) + + def dict(self): + return {"name":self.name, "elapsed":self.elapsed, "user":self.user, "sys":self.sys} + + def is_close(self, other): + return abs(self.elapsed - other.elapsed) / self.elapsed < self.close_threshold + + def __lt__(self, other): + return self.elapsed < other.elapsed + def __gt__(self, other): + return self.elapsed > other.elapsed + + def time_file(name, prefix, ffmpeg="./ffmpeg"): + stats = tstats() + stats.name = name + start_time = time.clock_gettime(time.CLOCK_MONOTONIC); + cproc = subprocess.Popen([ffmpeg, "-no_cvt_hw", + "-vcodec", "hevc_rpi", + "-t", "30", "-i", prefix + name, + "-f", "vout_rpi", os.devnull], bufsize=-1, stdout=flog, stderr=flog); + pinfo = os.wait4(cproc.pid, 0) + end_time = time.clock_gettime(time.CLOCK_MONOTONIC); + stats.elapsed = end_time - start_time + stats.user = pinfo[2].ru_utime + stats.sys = pinfo[2].ru_stime + return stats + + +def common_prefix(s1, s2): + for i in range(min(len(s1),len(s2))): + if s1[i] != s2[i]: + return s1[:i] + return s1[:i+1] + +def main(): + global flog + + argp = argparse.ArgumentParser(description="FFmpeg performance tester", epilog=""" +To blank the screen before starting use "xdg-screensaver activate" +(For some reason this doesn't seem to work from within python). +""") + + argp.add_argument("streams", nargs='*') + argp.add_argument("--csv_out", default="ffperf_out.csv", help="CSV output filename") + argp.add_argument("--csv_in", help="CSV input filename") + argp.add_argument("--prefix", help="Filename prefix (include terminal '/' if a directory).") + argp.add_argument("--repeat", default=3, type=int, help="Run repeat count") + argp.add_argument("--ffmpeg", default="./ffmpeg", help="FFmpeg executable") + + args = argp.parse_args() + + csv_out = csv.DictWriter(open(args.csv_out, 'w', newline=''), ["name", "elapsed", "user", "sys"]) + csv_out.writeheader() + + stats_in = {} + if args.csv_in != None: + with open(args.csv_in, 'r', newline='') as f_in: + stats_in = {x["name"]:tstats(x) for x in csv.DictReader(f_in)} + + flog = open(os.path.join(tempfile.gettempdir(), "ffperf.log"), "wt") + + streams = args.streams + if not streams: + if not stats_in: + print ("No source streams specified") + return 1 + prefix = "" if args.prefix == None else args.prefix + streams = [k for k in stats_in] + elif args.prefix != None: + prefix = args.prefix + else: + prefix = streams[0] + for f in streams[1:]: + prefix = common_prefix(prefix, f) + pp = prefix.rpartition(os.sep) + prefix = pp[0] + pp[1] + streams = [s[len(prefix):] for s in streams] + + for f in sorted(streams, key=lambda x : "~" * x.count(os.sep) + x.lower()): + print ("====", f) + + t0 = tstats({"name":f, "elapsed":999, "user":999, "sys":999}) + for i in range(args.repeat): + t = tstats.time_file(f, prefix, args.ffmpeg) + print ("...", t.times_str()) + if t0 > t: + t0 = t + + if t0.name in stats_in: + pstat = stats_in[t0.name] + print("---" if pstat.is_close(t0) else "<<<" if t0 < pstat else ">>>", pstat.times_str()) + + csv_out.writerow(t0.dict()) + + print () + + return 0 + + +if __name__ == '__main__': + exit(main()) + diff --git a/pi-util/genpatch.sh b/pi-util/genpatch.sh new file mode 100755 index 0000000000..0948a68a7a --- /dev/null +++ b/pi-util/genpatch.sh @@ -0,0 +1,35 @@ +set -e + +NOPATCH= +if [ "$1" == "--notag" ]; then + shift + NOPATCH=1 +fi + +if [ "$1" == "" ]; then + echo Usage: $0 [--notag] \ + echo e.g.: $0 mmal_4 + exit 1 +fi + +VERSION=`cat RELEASE` +if [ "$VERSION" == "" ]; then + echo Can\'t find version RELEASE + exit 1 +fi + +PATCHFILE=../ffmpeg-$VERSION-$1.patch + +if [ $NOPATCH ]; then + echo Not tagged +else + # Only continue if we are all comitted + git diff --name-status --exit-code + + PATCHTAG=pi/$VERSION/$1 + echo Tagging: $PATCHTAG + + git tag $PATCHTAG +fi +echo Generating patch: $PATCHFILE +git diff n$VERSION -- > $PATCHFILE diff --git a/pi-util/make_array.py b/pi-util/make_array.py new file mode 100755 index 0000000000..67b22d2d51 --- /dev/null +++ b/pi-util/make_array.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python + +# Usage +# make_array file.bin +# Produces file.h with array of bytes. +# +import sys +for file in sys.argv[1:]: + prefix,suffix = file.split('.') + assert suffix=='bin' + name=prefix.split('/')[-1] + print 'Converting',file + with open(prefix+'.h','wb') as out: + print >>out, 'static const unsigned char',name,'[] = {' + with open(file,'rb') as fd: + i = 0 + for byte in fd.read(): + print >>out, '0x%02x, ' % ord(byte), + i = i + 1 + if i % 8 == 0: + print >>out, ' // %04x' % (i - 8) + print >>out,'};' + diff --git a/pi-util/mkinst.sh b/pi-util/mkinst.sh new file mode 100755 index 0000000000..271a39e846 --- /dev/null +++ b/pi-util/mkinst.sh @@ -0,0 +1,5 @@ +set -e + +make install + +cp -r install/* ../vlc/sysroot/raspian_stretch_pi1-sysroot/usr diff --git a/pi-util/patkodi.sh b/pi-util/patkodi.sh new file mode 100644 index 0000000000..dcd05a606e --- /dev/null +++ b/pi-util/patkodi.sh @@ -0,0 +1,9 @@ +set -e +KODIBASE=/home/jc/rpi/kodi/xbmc +JOBS=-j20 +make $JOBS +git diff xbmc/release/4.3-kodi > $KODIBASE/tools/depends/target/ffmpeg/pfcd_hevc_optimisations.patch +make -C $KODIBASE/tools/depends/target/ffmpeg $JOBS +make -C $KODIBASE/build install + + diff --git a/pi-util/perfcmp.py b/pi-util/perfcmp.py new file mode 100755 index 0000000000..e44cfa0c3c --- /dev/null +++ b/pi-util/perfcmp.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python3 + +import time +import string +import os +import tempfile +import subprocess +import re +import argparse +import sys +import csv +from stat import * + +class tstats: + close_threshold = 0.01 + + def __init__(self, stats_dict=None): + if stats_dict != None: + self.name = stats_dict["name"] + self.elapsed = float(stats_dict["elapsed"]) + self.user = float(stats_dict["user"]) + self.sys = float(stats_dict["sys"]) + + def times_str(self): + ctime = self.sys + self.user + return "time=%6.2f, cpu=%6.2f (%4.2f%%)" % (self.elapsed, ctime, (ctime * 100.0) / self.elapsed) + + def dict(self): + return {"name":self.name, "elapsed":self.elapsed, "user":self.user, "sys":self.sys} + + def is_close(self, other): + return abs(self.elapsed - other.elapsed) / self.elapsed < self.close_threshold + + def __lt__(self, other): + return self.elapsed < other.elapsed + def __gt__(self, other): + return self.elapsed > other.elapsed + + def time_file(name, prefix): + stats = tstats() + stats.name = name + start_time = time.clock_gettime(time.CLOCK_MONOTONIC); + cproc = subprocess.Popen(["./ffmpeg", "-t", "30", "-i", prefix + name, + "-f", "null", os.devnull], bufsize=-1, stdout=flog, stderr=flog); + pinfo = os.wait4(cproc.pid, 0) + end_time = time.clock_gettime(time.CLOCK_MONOTONIC); + stats.elapsed = end_time - start_time + stats.user = pinfo[2].ru_utime + stats.sys = pinfo[2].ru_stime + return stats + + +def common_prefix(s1, s2): + for i in range(min(len(s1),len(s2))): + if s1[i] != s2[i]: + return s1[:i] + return s1[:i+1] + +def main(): + argp = argparse.ArgumentParser(description="FFmpeg performance compare") + + argp.add_argument("stream0", help="CSV to compare") + argp.add_argument("stream1", nargs='?', default="ffperf_out.csv", help="CSV to compare") + + args = argp.parse_args() + + with open(args.stream0, 'r', newline='') as f_in: + stats0 = {x["name"]:tstats(x) for x in csv.DictReader(f_in)} + with open(args.stream1, 'r', newline='') as f_in: + stats1 = {x["name"]:tstats(x) for x in csv.DictReader(f_in)} + + print (args.stream0, "<<-->>", args.stream1) + print () + + for f in sorted(stats0.keys() | stats1.keys(), key=lambda x : "~" * x.count(os.sep) + x.lower()): + if not (f in stats0) : + print (" XX :", f) + continue + if not (f in stats1) : + print (" XX :", f) + continue + + s0 = stats0[f] + s1 = stats1[f] + + pcent = ((s0.elapsed - s1.elapsed) / s0.elapsed) * 100.0 + thresh = 0.3 + tc = 6 + + nchar = min(tc - 1, int(abs(pcent) / thresh)) + cc = " -- " if nchar == 0 else "<" * nchar + " " * (tc - nchar) if pcent < 0 else " " * (tc - nchar) + ">" * nchar + + print ("%6.2f %s%6.2f (%+5.2f) : %s" % + (s0.elapsed, cc, s1.elapsed, pcent, f)) + + return 0 + + +if __name__ == '__main__': + exit(main()) + diff --git a/pi-util/qem.sh b/pi-util/qem.sh new file mode 100755 index 0000000000..a4dbb6eacd --- /dev/null +++ b/pi-util/qem.sh @@ -0,0 +1,9 @@ +TARGET_DIR=../src/eupton_vc4dev_2012a/software/vc4/DEV/applications/tutorials/user_shader_example_tex +QASM=python\ ../local/bin/qasm.py +SRC_FILE=libavcodec/rpi_hevc_shader.qasm +DST_BASE=shader + +cp libavcodec/rpi_hevc_shader_cmd.h $TARGET_DIR +$QASM -mc_c:$DST_BASE,$DST_BASE,$DST_BASE $SRC_FILE > $TARGET_DIR/$DST_BASE.c +$QASM -mc_h:$DST_BASE,$DST_BASE,$DST_BASE $SRC_FILE > $TARGET_DIR/$DST_BASE.h + diff --git a/pi-util/testfilt.py b/pi-util/testfilt.py new file mode 100755 index 0000000000..b322dac0c2 --- /dev/null +++ b/pi-util/testfilt.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python3 + +import string +import os +import subprocess +import re +import argparse +import sys +import csv +from stat import * + +class validator: + def __init__(self): + self.ok = False + + def isok(self): + return self.ok + + def setok(self): + self.ok = True + +class valid_regex(validator): + def __init__(self, regex): + super().__init__() + self.regex = re.compile(regex) + + def scanline(self, line): + if self.isok() or self.regex.search(line): + self.setok() + + +def validate(validators, flog): + for line in flog: + for v in validators: + v.scanline(line) + + ok = True + for v in validators: + if not v.isok(): + ok = False + # complain + print("Test failed") + + if ok: + print("OK") + return ok + +def runtest(name, ffmpeg, args, suffix, validators): + log_root = os.path.join("/tmp", "testfilt", name) + ofilename = os.path.join(log_root, name + suffix) + + if not os.path.exists(log_root): + os.makedirs(log_root) + + try: + os.remove(ofilename) + except: + pass + + flog = open(os.path.join(log_root, name + ".log"), "wb") + ffargs = [ffmpeg] + args + [ofilename] + + subprocess.call(ffargs, stdout=flog, stderr=subprocess.STDOUT, text=False) + flog.close + + flog = open(os.path.join(log_root, name + ".log"), "rt") + return validate(validators, flog) + +def sayok(log_root, flog): + print("Woohoo") + return True + +if __name__ == '__main__': + + argp = argparse.ArgumentParser(description="FFmpeg filter tester") + argp.add_argument("--ffmpeg", default="./ffmpeg", help="ffmpeg exec name") + args = argp.parse_args() + + runtest("ATest", args.ffmpeg, ["-v", "verbose", "-no_cvt_hw", "-an", "-c:v", "h264_v4l2m2m", "-i", + "/home/johncox/server/TestMedia/Sony/jellyfish-10-mbps-hd-h264.mkv", +# "/home/jc/rpi/streams/jellyfish-3-mbps-hd-h264.mkv", + "-c:v", "h264_v4l2m2m", "-b:v", "2M"], ".mkv", + [valid_regex(r'Output stream #0:0 \(video\): 900 frames encoded; 900 packets muxed')]) diff --git a/pi-util/v3dusage.py b/pi-util/v3dusage.py new file mode 100755 index 0000000000..5935a11ca5 --- /dev/null +++ b/pi-util/v3dusage.py @@ -0,0 +1,128 @@ +#!/usr/bin/env python + +import sys +import argparse +import re + +def do_logparse(logname): + + rmatch = re.compile(r'^([0-9]+\.[0-9]{3}): (done )?((vpu0)|(vpu1)|(qpu1)) ([A-Z_]+) cb:([0-9a-f]+) ') + rqcycle = re.compile(r'^([0-9]+\.[0-9]{3}): v3d: QPU Total clock cycles for all QPUs doing vertex/coordinate shading +([0-9]+)$') + rqtscycle = re.compile(r'^([0-9]+\.[0-9]{3}): v3d: QPU Total clock cycles for all QPUs stalled waiting for TMUs +([0-9]+)$') + rl2hits = re.compile(r'^([0-9]+\.[0-9]{3}): v3d: L2C Total Level 2 cache ([a-z]+) +([0-9]+)$') + + ttotal = {'idle':0.0} + tstart = {} + qctotal = {} + qtstotal = {} + l2hits = {} + l2total = {} + time0 = None + idle_start = None + qpu_op_no = 0 + op_count = 0 + + with open(logname, "rt") as infile: + for line in infile: + match = rmatch.match(line) + if match: +# print match.group(1), ":", match.group(2), ":", match.group(3), ":", match.group(7), ":" + time = float(match.group(1)) + unit = match.group(3) + opstart = not match.group(2) + optype = match.group(7) + hascb = match.group(8) != "0" + + if unit == 'qpu1': + unit = unit + "." + str(qpu_op_no) + if not opstart: + if hascb or optype == 'EXECUTE_SYNC': + qpu_op_no = 0 + else: + qpu_op_no += 1 + + # Ignore sync type + if optype == 'EXECUTE_SYNC': + continue + + if not time0: + time0 = time + + if opstart: + tstart[unit] = time; + elif unit in tstart: + op_count += 1 + if not unit in ttotal: + ttotal[unit] = 0.0 + ttotal[unit] += time - tstart[unit] + del tstart[unit] + + if not idle_start and not tstart: + idle_start = time + elif idle_start and tstart: + ttotal['idle'] += time - idle_start + idle_start = None + + match = rqcycle.match(line) + if match: + unit = "qpu1." + str(qpu_op_no) + if not unit in qctotal: + qctotal[unit] = 0 + qctotal[unit] += int(match.group(2)) + + match = rqtscycle.match(line) + if match: + unit = "qpu1." + str(qpu_op_no) + if not unit in qtstotal: + qtstotal[unit] = 0 + qtstotal[unit] += int(match.group(2)) + + match = rl2hits.match(line) + if match: + unit = "qpu1." + str(qpu_op_no) + if not unit in l2total: + l2total[unit] = 0 + l2hits[unit] = 0 + l2total[unit] += int(match.group(3)) + if match.group(2) == "hits": + l2hits[unit] += int(match.group(3)) + + + if not time0: + print "No v3d profile records found" + else: + tlogged = time - time0 + + print "Logged time:", tlogged, " Op count:", op_count + for unit in sorted(ttotal): + print b'%6s: %10.3f %7.3f%%' % (unit, ttotal[unit], ttotal[unit] * 100.0 / tlogged) + print + for unit in sorted(qctotal): + if not unit in qtstotal: + qtstotal[unit] = 0; + print b'%6s: Qcycles: %10d, TMU stall: %10d (%7.3f%%)' % (unit, qctotal[unit], qtstotal[unit], (qtstotal[unit] * 100.0)/qctotal[unit]) + if unit in l2total: + print b' L2Total: %10d, hits: %10d (%7.3f%%)' % (l2total[unit], l2hits[unit], (l2hits[unit] * 100.0)/l2total[unit]) + + + +if __name__ == '__main__': + argp = argparse.ArgumentParser( + formatter_class=argparse.RawDescriptionHelpFormatter, + description="QPU/VPU perf summary from VC logging", + epilog = """ +Will also summarise TMU stalls if logging requests set in qpu noflush param +in the profiled code. + +Example use: + vcgencmd set_logging level=0xc0 + + sudo vcdbg log msg >& t.log + v3dusage.py t.log +""") + + argp.add_argument("logfile") + args = argp.parse_args() + + do_logparse(args.logfile) +