1 #ifndef CAFFE2_VIDEO_VIDEO_DECODER_H_ 2 #define CAFFE2_VIDEO_VIDEO_DECODER_H_ 4 #include <caffe2/core/logging.h> 11 #include <libavformat/avformat.h> 12 #include <libavformat/avio.h> 17 #define VIO_BUFFER_SZ 32768 18 #define MAX_DECODING_FRAMES 10000 26 SAMPLE_ALL_FRAMES = -1,
27 SAMPLE_TIMESTAMP_ONLY = -2,
39 USE_MINIMAL_WIDTH_HEIGHT = 1,
63 SampleInterval() : timestamp(-1), fps(SpecialFps::SAMPLE_ALL_FRAMES) {}
66 return (timestamp < itvl.timestamp);
73 bool keyFrames_ =
false;
76 AVPixelFormat pixelFormat_ = AVPixelFormat::AV_PIX_FMT_RGB24;
80 int streamIndex_ = -1;
84 int maximumOutputFrames_ = -1;
87 int video_res_type_ = VideoResType::USE_WIDTH_HEIGHT;
88 int crop_height_ = -1;
96 int decode_type_ = DecodeType::DO_TMP_JITTER;
97 int num_of_required_frame_ = -1;
103 std::vector<SampleInterval> intervals_ = {{0, SpecialFps::SAMPLE_ALL_FRAMES}};
114 intervals_.emplace_back(0, v);
122 pixelFormat_ = pixelFormat;
130 keyFrames_ = keyFrames;
138 streamIndex_ = index;
146 maximumOutputFrames_ = count;
171 void operator()(
unsigned char* p)
const {
175 using AvDataPtr = std::unique_ptr<uint8_t, avDeleter>;
188 double timestamp_ = 0;
191 bool keyFrame_ =
false;
197 int outputFrameIndex_ = -1;
203 : workBuffersize_(VIO_BUFFER_SZ),
204 workBuffer_((uint8_t*)av_malloc(workBuffersize_)),
206 inputBuffer_(
nullptr),
207 inputBufferSize_(0) {
208 inputFile_ = fopen(fname.c_str(),
"rb");
209 if (inputFile_ ==
nullptr) {
210 LOG(ERROR) <<
"Error opening video file " << fname;
212 ctx_ = avio_alloc_context(
213 static_cast<unsigned char*>(workBuffer_.get()),
217 &VideoIOContext::readFile,
219 &VideoIOContext::seekFile);
223 : workBuffersize_(VIO_BUFFER_SZ),
224 workBuffer_((uint8_t*)av_malloc(workBuffersize_)),
226 inputBuffer_(buffer),
227 inputBufferSize_(size) {
228 ctx_ = avio_alloc_context(
229 static_cast<unsigned char*>(workBuffer_.get()),
233 &VideoIOContext::readMemory,
235 &VideoIOContext::seekMemory);
245 int read(
unsigned char* buf,
int buf_size) {
247 return readMemory(
this, buf, buf_size);
248 }
else if (inputFile_) {
249 return readFile(
this, buf, buf_size);
255 int64_t seek(int64_t offset,
int whence) {
257 return seekMemory(
this, offset, whence);
258 }
else if (inputFile_) {
259 return seekFile(
this, offset, whence);
265 static int readFile(
void* opaque,
unsigned char* buf,
int buf_size) {
267 if (feof(h->inputFile_)) {
270 size_t ret = fread(buf, 1, buf_size, h->inputFile_);
271 if (ret < buf_size) {
272 if (ferror(h->inputFile_)) {
279 static int64_t seekFile(
void* opaque, int64_t offset,
int whence) {
285 return fseek(h->inputFile_, static_cast<long>(offset), whence);
288 int64_t cur = ftell(h->inputFile_);
289 fseek(h->inputFile_, 0L, SEEK_END);
290 int64_t size = ftell(h->inputFile_);
291 fseek(h->inputFile_, cur, SEEK_SET);
298 static int readMemory(
void* opaque,
unsigned char* buf,
int buf_size) {
304 int reminder = h->inputBufferSize_ - h->offset_;
305 int r = buf_size < reminder ? buf_size : reminder;
310 memcpy(buf, h->inputBuffer_ + h->offset_, r);
315 static int64_t seekMemory(
void* opaque, int64_t offset,
int whence) {
319 h->offset_ += offset;
322 h->offset_ = h->inputBufferSize_ + offset;
328 return h->inputBufferSize_;
333 AVIOContext* get_avio() {
339 DecodedFrame::AvDataPtr workBuffer_;
344 const char* inputBuffer_;
345 int inputBufferSize_;
355 enum AVMediaType codec_type;
356 AVPixelFormat pixFormat;
361 codec_type(AVMEDIA_TYPE_VIDEO),
362 pixFormat(AVPixelFormat::AV_PIX_FMT_RGB24) {}
370 const std::string& filename,
373 std::vector<std::unique_ptr<DecodedFrame>>& sampledFrames);
380 std::vector<std::unique_ptr<DecodedFrame>>& sampledFrames);
383 std::string ffmpegErrorStr(
int result);
385 void ResizeAndKeepAspectRatio(
386 const int origHeight,
394 const std::string& videoName,
398 std::vector<std::unique_ptr<DecodedFrame>>& sampledFrames);
402 #endif // CAFFE2_VIDEO_VIDEO_DECODER_H_ Params & outputHeight(int height)
Output frame height, default to video height.
Params & keyFrames(bool keyFrames)
Return all key-frames.
Params & outputWidth(int width)
Output frame width, default to video width.
Params & streamIndex(int index)
Index of video stream to process, defaults to the first video stream.
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Params & fps(float v)
FPS of output frames setting here will reset intervals_ and force decoding at target FPS This can be ...
Params & pixelFormat(AVPixelFormat pixelFormat)
Pixel format of output buffer, default PIX_FMT_RGB24.
Params & maxOutputFrames(int count)
Only output this many frames, default to no limit.