[FFmpeg-devel] [PATCH v3 01/18] avutil/frame: Subtitle Filtering - Add AVMediaType property to AVFrame

Sat Sep 11 09:02:37 EEST 2021

This is the root commit for adding subtitle filtering capabilities.
Adding the media type property to AVFrame replaces the previous
way of distinction which was based on checking width and height
to determine whether a frame is audio or video.

Signed-off-by: softworkz <softworkz at hotmail.com>
---
 libavutil/frame.c   | 78 +++++++++++++++++++++++++++++++++++----------
 libavutil/frame.h   | 39 +++++++++++++++++++++--
 libavutil/version.h |  2 +-
 3 files changed, 99 insertions(+), 20 deletions(-)

diff --git a/libavutil/frame.c b/libavutil/frame.c
index b0ceaf7145..09149721c0 100644
--- a/libavutil/frame.c
+++ b/libavutil/frame.c
@@ -244,22 +244,39 @@ static int get_audio_buffer(AVFrame *frame, int align)
 }
 
 int av_frame_get_buffer(AVFrame *frame, int align)
+{
+    if (frame->width > 0 && frame->height > 0)
+        return av_frame_get_buffer2(frame, AVMEDIA_TYPE_VIDEO, align);
+    else if (frame->nb_samples > 0 && (frame->channel_layout || frame->channels > 0))
+        return av_frame_get_buffer2(frame, AVMEDIA_TYPE_AUDIO, align);
+
+    return AVERROR(EINVAL);
+}
+
+int av_frame_get_buffer2(AVFrame *frame, enum AVMediaType type, int align)
 {
     if (frame->format < 0)
         return AVERROR(EINVAL);
 
-    if (frame->width > 0 && frame->height > 0)
+    frame->type = type;
+
+    switch(frame->type) {
+    case AVMEDIA_TYPE_VIDEO:
         return get_video_buffer(frame, align);
-    else if (frame->nb_samples > 0 && (frame->channel_layout || frame->channels > 0))
+    case AVMEDIA_TYPE_AUDIO:
         return get_audio_buffer(frame, align);
-
-    return AVERROR(EINVAL);
+    case AVMEDIA_TYPE_SUBTITLE:
+        return 0;
+    default:
+        return AVERROR(EINVAL);
+    }
 }
 
 static int frame_copy_props(AVFrame *dst, const AVFrame *src, int force_copy)
 {
     int ret, i;
 
+    dst->type                   = src->type;
     dst->key_frame              = src->key_frame;
     dst->pict_type              = src->pict_type;
     dst->sample_aspect_ratio    = src->sample_aspect_ratio;
@@ -331,6 +348,7 @@ int av_frame_ref(AVFrame *dst, const AVFrame *src)
     av_assert1(dst->width == 0 && dst->height == 0);
     av_assert1(dst->channels == 0);
 
+    dst->type           = src->type;
     dst->format         = src->format;
     dst->width          = src->width;
     dst->height         = src->height;
@@ -344,7 +362,7 @@ int av_frame_ref(AVFrame *dst, const AVFrame *src)
 
     /* duplicate the frame data if it's not refcounted */
     if (!src->buf[0]) {
-        ret = av_frame_get_buffer(dst, 0);
+        ret = av_frame_get_buffer2(dst, dst->type, 0);
         if (ret < 0)
             goto fail;
 
@@ -499,6 +517,7 @@ int av_frame_make_writable(AVFrame *frame)
         return 0;
 
     memset(&tmp, 0, sizeof(tmp));
+    tmp.type           = frame->type;
     tmp.format         = frame->format;
     tmp.width          = frame->width;
     tmp.height         = frame->height;
@@ -509,7 +528,7 @@ int av_frame_make_writable(AVFrame *frame)
     if (frame->hw_frames_ctx)
         ret = av_hwframe_get_buffer(frame->hw_frames_ctx, &tmp, 0);
     else
-        ret = av_frame_get_buffer(&tmp, 0);
+        ret = av_frame_get_buffer2(&tmp, tmp.type, 0);
     if (ret < 0)
         return ret;
 
@@ -544,14 +563,22 @@ AVBufferRef *av_frame_get_plane_buffer(AVFrame *frame, int plane)
     uint8_t *data;
     int planes, i;
 
-    if (frame->nb_samples) {
-        int channels = frame->channels;
-        if (!channels)
-            return NULL;
-        CHECK_CHANNELS_CONSISTENCY(frame);
-        planes = av_sample_fmt_is_planar(frame->format) ? channels : 1;
-    } else
+    switch(frame->type) {
+    case AVMEDIA_TYPE_VIDEO:
         planes = 4;
+        break;
+    case AVMEDIA_TYPE_AUDIO:
+        {
+            int channels = frame->channels;
+            if (!channels)
+                return NULL;
+            CHECK_CHANNELS_CONSISTENCY(frame);
+            planes = av_sample_fmt_is_planar(frame->format) ? channels : 1;
+            break;
+        }
+    default:
+        return NULL;
+    }
 
     if (plane < 0 || plane >= planes || !frame->extended_data[plane])
         return NULL;
@@ -675,17 +702,34 @@ static int frame_copy_audio(AVFrame *dst, const AVFrame *src)
     return 0;
 }
 
+static int frame_copy_subtitles(AVFrame *dst, const AVFrame *src)
+{
+    dst->type = AVMEDIA_TYPE_SUBTITLE;
+    dst->format = src->format;
+
+    if (src->buf[0]) {
+        dst->buf[0] = av_buffer_ref(src->buf[0]);
+        dst->data[0] = src->data[0];
+    }
+
+    return 0;
+}
+
 int av_frame_copy(AVFrame *dst, const AVFrame *src)
 {
     if (dst->format != src->format || dst->format < 0)
         return AVERROR(EINVAL);
 
-    if (dst->width > 0 && dst->height > 0)
+    switch(dst->type) {
+    case AVMEDIA_TYPE_VIDEO:
         return frame_copy_video(dst, src);
-    else if (dst->nb_samples > 0 && dst->channels > 0)
+    case AVMEDIA_TYPE_AUDIO:
         return frame_copy_audio(dst, src);
-
-    return AVERROR(EINVAL);
+    case AVMEDIA_TYPE_SUBTITLE:
+        return frame_copy_subtitles(dst, src);
+    default:
+        return AVERROR(EINVAL);
+    }
 }
 
 void av_frame_remove_side_data(AVFrame *frame, enum AVFrameSideDataType type)
diff --git a/libavutil/frame.h b/libavutil/frame.h
index ff2540a20f..c104815df9 100644
--- a/libavutil/frame.h
+++ b/libavutil/frame.h
@@ -271,7 +271,7 @@ typedef struct AVRegionOfInterest {
 } AVRegionOfInterest;
 
 /**
- * This structure describes decoded (raw) audio or video data.
+ * This structure describes decoded (raw) audio, video or subtitle data.
  *
  * AVFrame must be allocated using av_frame_alloc(). Note that this only
  * allocates the AVFrame itself, the buffers for the data must be managed
@@ -302,6 +302,13 @@ typedef struct AVRegionOfInterest {
  */
 typedef struct AVFrame {
 #define AV_NUM_DATA_POINTERS 8
+    /**
+     * Media type of the frame (audio, video, subtitles..)
+     *
+     * See AVMEDIA_TYPE_xxx
+     */
+    enum AVMediaType type;
+
     /**
      * pointer to the picture/channel planes.
      * This might be different from the first allocated byte
@@ -371,7 +378,7 @@ typedef struct AVFrame {
     /**
      * format of the frame, -1 if unknown or unset
      * Values correspond to enum AVPixelFormat for video frames,
-     * enum AVSampleFormat for audio)
+     * enum AVSampleFormat for audio, AVSubtitleType for subtitles)
      */
     int format;
 
@@ -721,6 +728,8 @@ void av_frame_move_ref(AVFrame *dst, AVFrame *src);
 /**
  * Allocate new buffer(s) for audio or video data.
  *
+ * Note: For subtitle data, use av_frame_get_buffer2
+ *
  * The following fields must be set on frame before calling this function:
  * - format (pixel format for video, sample format for audio)
  * - width and height for video
@@ -743,6 +752,32 @@ void av_frame_move_ref(AVFrame *dst, AVFrame *src);
  */
 int av_frame_get_buffer(AVFrame *frame, int align);
 
+/**
+ * Allocate new buffer(s) for audio, video or subtitle data.
+ *
+ * The following fields must be set on frame before calling this function:
+ * - format (pixel format for video, sample format for audio)
+ * - width and height for video
+ * - nb_samples and channel_layout for audio
+ *
+ * This function will fill AVFrame.data and AVFrame.buf arrays and, if
+ * necessary, allocate and fill AVFrame.extended_data and AVFrame.extended_buf.
+ * For planar formats, one buffer will be allocated for each plane.
+ *
+ * @warning: if frame already has been allocated, calling this function will
+ *           leak memory. In addition, undefined behavior can occur in certain
+ *           cases.
+ *
+ * @param frame frame in which to store the new buffers.
+ * @param type  media type to set for the frame.
+ * @param align Required buffer size alignment. If equal to 0, alignment will be
+ *              chosen automatically for the current CPU. It is highly
+ *              recommended to pass 0 here unless you know what you are doing.
+ *
+ * @return 0 on success, a negative AVERROR on error.
+ */
+int av_frame_get_buffer2(AVFrame *frame, enum AVMediaType type, int align);
+
 /**
  * Check if the frame data is writable.
  *
diff --git a/libavutil/version.h b/libavutil/version.h
index f220e192c8..2b5ac540a4 100644
--- a/libavutil/version.h
+++ b/libavutil/version.h
@@ -79,7 +79,7 @@
  */
 
 #define LIBAVUTIL_VERSION_MAJOR  57
-#define LIBAVUTIL_VERSION_MINOR   5
+#define LIBAVUTIL_VERSION_MINOR   6
 #define LIBAVUTIL_VERSION_MICRO 100
 
 #define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
-- 
2.30.2.windows.1