[FFmpeg-devel] [PATCH 04/12] Initialize ass library only once!

Tue May 3 19:13:20 EEST 2022

---
 fftools/ffmpeg.c            |  50 ++++-------
 libavcodec/Makefile         |   1 +
 libavcodec/avcodec.h        |   9 ++
 libavcodec/text_to_bitmap.c | 170 ++++++++++++++++++++++++++++++++++++
 libavcodec/text_to_bitmap.h |  18 ++++
 libavfilter/vf_subtitles.c  |  97 --------------------
 6 files changed, 217 insertions(+), 128 deletions(-)
 create mode 100644 libavcodec/text_to_bitmap.c
 create mode 100644 libavcodec/text_to_bitmap.h

diff --git a/fftools/ffmpeg.c b/fftools/ffmpeg.c
index a74800bb68..31acf08a6a 100644
--- a/fftools/ffmpeg.c
+++ b/fftools/ffmpeg.c
@@ -71,6 +71,10 @@
 # include "libavfilter/buffersrc.h"
 # include "libavfilter/buffersink.h"
 
+#if CONFIG_LIBASS
+#include "libavcodec/text_to_bitmap.h"
+#endif
+
 #if HAVE_SYS_RESOURCE_H
 #include <sys/time.h>
 #include <sys/types.h>
@@ -2325,8 +2329,6 @@ fail:
     return err < 0 ? err : ret;
 }
 
-void render_avsub_ass(InputStream *, AVSubtitle *, int, int);
-
 static void print_subtitle(AVSubtitle sub)
 {
     printf("sub.format: %u\n", sub.format);
@@ -2434,30 +2436,8 @@ static int transcode_subtitles(InputStream *ist, AVPacket *pkt, int *got_output,
         // Support text to bitmap
         if (avcodec_descriptor_get(ost->enc_ctx->codec_id)->props & AV_CODEC_PROP_BITMAP_SUB)
             if (avcodec_descriptor_get(ist->dec_ctx->codec_id)->props & AV_CODEC_PROP_TEXT_SUB)
-                if (!rendered) {
-                    // Try to get a height and width from a video
-                    int width = 0, height = 0;
-                    // Try output streams
-                    for (int j = 0; j < nb_output_streams; j++)
-                        if (output_streams[j]->enc_ctx->codec_type == AVMEDIA_TYPE_VIDEO)
-                        {
-                            width = output_streams[j]->enc_ctx->width;
-                            height = output_streams[j]->enc_ctx->height;
-                            break;
-                        }
-                    if (width == 0)
-                        // Try input streams
-                        for (int j = 0; j < nb_input_streams; j++)
-                            if (input_streams[j]->dec_ctx->codec_type == AVMEDIA_TYPE_VIDEO)
-                            {
-                                width = input_streams[j]->dec_ctx->width;
-                                height = input_streams[j]->dec_ctx->height;
-                                break;
-                            }
-                    if (width == 0) {
-                        // TODO Error: Cannot render without a video stream
-                    }
-                    render_avsub_ass(ist, &subtitle, width, height);
+                if (!rendered) {        // Make sure not to render twice
+                    render_avsub_ass(ist->dec_ctx->ass_context, &subtitle);
                     for (int r = 0; r < subtitle.num_rects; r++)
                         subtitle.rects[r]->type = SUBTITLE_BITMAP;
                     rendered = 1;
@@ -2757,6 +2737,9 @@ static int init_input_stream(int ist_index, char *error, int error_len)
          * audio, and video decoders such as cuvid or mediacodec */
         ist->dec_ctx->pkt_timebase = ist->st->time_base;
 
+        // For text to bitmap rendering
+        ist->dec_ctx->ass_context = NULL;
+
         if (!av_dict_get(ist->decoder_opts, "threads", NULL, 0))
             av_dict_set(&ist->decoder_opts, "threads", "auto", 0);
         /* Attached pics are sparse, therefore we would not want to delay their decoding till EOF. */
@@ -3285,16 +3268,21 @@ static int init_output_stream(OutputStream *ost, AVFrame *frame,
                 input_props = input_descriptor->props & (AV_CODEC_PROP_TEXT_SUB | AV_CODEC_PROP_BITMAP_SUB);
             if (output_descriptor)
                 output_props = output_descriptor->props & (AV_CODEC_PROP_TEXT_SUB | AV_CODEC_PROP_BITMAP_SUB);
-            /*if (input_props && output_props && input_props != output_props) {
+#if CONFIG_LIBASS
+            if (input_props == AV_CODEC_PROP_BITMAP_SUB && output_props == AV_CODEC_PROP_TEXT_SUB) {
+                snprintf(error, error_len, "Subtitle encoding from bitmap to text currently not possible");
+                return AVERROR_INVALIDDATA;
+            }
+            if (input_props == AV_CODEC_PROP_TEXT_SUB && output_props == AV_CODEC_PROP_BITMAP_SUB)
+                init_ass_context(ist, ost);
+#else
+            if (input_props && output_props && input_props != output_props) {
                 snprintf(error, error_len,
                          "Subtitle encoding currently only possible from text to text "
                          "or bitmap to bitmap");
                 return AVERROR_INVALIDDATA;
-            }*/
-            if (input_props == AV_CODEC_PROP_BITMAP_SUB && output_props == AV_CODEC_PROP_TEXT_SUB) {
-                snprintf(error, error_len, "Subtitle encoding from bitmap to text currently not possible");
-                return AVERROR_INVALIDDATA;
             }
+#endif
         }
 
         if ((ret = avcodec_open2(ost->enc_ctx, codec, &ost->encoder_opts)) < 0) {
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index cfaa6f196a..6d28513129 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -151,6 +151,7 @@ OBJS-$(CONFIG_RV34DSP)                 += rv34dsp.o
 OBJS-$(CONFIG_SINEWIN)                 += sinewin.o
 OBJS-$(CONFIG_SNAPPY)                  += snappy.o
 OBJS-$(CONFIG_STARTCODE)               += startcode.o
+OBJS-$(CONFIG_LIBASS)                  += text_to_bitmap.o
 OBJS-$(CONFIG_TEXTUREDSP)              += texturedsp.o
 OBJS-$(CONFIG_TEXTUREDSPENC)           += texturedspenc.o
 OBJS-$(CONFIG_TPELDSP)                 += tpeldsp.o
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 4dae23d06e..530c01f193 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -2055,6 +2055,15 @@ typedef struct AVCodecContext {
      *             The decoder can then override during decoding as needed.
      */
     AVChannelLayout ch_layout;
+
+    /**
+     * Pointer to ASS_Library instance (cast to void *)
+     * - encoding: unused
+     * - decoding: is set to NULL initially in init_input_stream, and if
+     *             there must be a text to bitmap conversion, is set to a
+     *             new instance of ASS_Library in init_output_stream
+     */
+     void *ass_context;
 } AVCodecContext;
 
 /**
diff --git a/libavcodec/text_to_bitmap.c b/libavcodec/text_to_bitmap.c
new file mode 100644
index 0000000000..87c46985d9
--- /dev/null
+++ b/libavcodec/text_to_bitmap.c
@@ -0,0 +1,170 @@
+//
+// Created by traian on 2022-05-02.
+//
+
+#include "text_to_bitmap.h"
+
+#include "fftools/ffmpeg.h"
+#include "avcodec.h"
+#include "ass_split.h"
+
+struct ASS_Context {
+    ASS_Library *library;
+    ASS_Renderer *renderer;
+    ASS_Track *track;
+    ASSSplitContext *ass_split_context;
+};
+
+void init_ass_context(InputStream *ist, OutputStream *ost)
+{
+    if (ist->dec_ctx->ass_context) return;
+    ASS_Context *context = (ASS_Context *)malloc(sizeof(ASS_Context));
+    context->library = ass_library_init();
+    ass_set_extract_fonts(context->library, 1);
+    // TODO: ass_add_font(context->library, );
+
+    // Try to get a height and width from somewhere
+    int width = 0, height = 0;
+    do
+    {
+        // Try input stream
+        if (ost->enc_ctx->width != 0 && ost->enc_ctx->height != 0)
+        {
+            width = ost->enc_ctx->width;
+            height = ost->enc_ctx->height;
+            break;
+        }
+        // Try output stream
+        if (ist->dec_ctx->width != 0 && ist->dec_ctx->height != 0)
+        {
+            width = ist->dec_ctx->width;
+            height = ist->dec_ctx->height;
+            break;
+        }
+        // Try output streams
+        for (int j = 0; j < nb_output_streams; j++)
+            if (output_streams[j]->enc_ctx->codec_type == AVMEDIA_TYPE_VIDEO)
+            {
+                width = output_streams[j]->enc_ctx->width;
+                height = output_streams[j]->enc_ctx->height;
+                break;
+            }
+        if (width && height) break;
+        // Try input streams
+        for (int j = 0; j < nb_input_streams; j++)
+            if (input_streams[j]->dec_ctx->codec_type == AVMEDIA_TYPE_VIDEO)
+            {
+                width = input_streams[j]->dec_ctx->width;
+                height = input_streams[j]->dec_ctx->height;
+                break;
+            }
+        if (width && height) break;
+
+        // TODO Error: Cannot render without a width and height
+    } while (0);
+
+    context->renderer = ass_renderer_init(context->library);
+    ass_set_frame_size(context->renderer, width, height);
+    ass_set_pixel_aspect(context->renderer, 1);
+    ass_set_storage_size(context->renderer, width, height);
+    ass_set_shaper(context->renderer, 0);
+    ass_set_fonts(context->renderer, NULL, NULL, 1, NULL, 1);
+
+    context->track = ass_read_memory(context->library, (char *)ist->dec_ctx->subtitle_header, ist->dec_ctx->subtitle_header_size, NULL);
+    context->ass_split_context = ff_ass_split((char *)ist->dec_ctx->subtitle_header);
+
+    ist->dec_ctx->ass_context = context;
+}
+
+void free_ass_context(ASS_Context *context) {
+    ass_library_done(context->library);
+    ass_renderer_done(context->renderer);
+    ass_free_track(context->track);
+    ff_ass_split_free(context->ass_split_context);
+    free(context);
+}
+
+/* libass stores an RGBA color in the format RRGGBBTT, where TT is the transparency level */
+#define AR(c)  ((c)>>24)
+#define AG(c)  (((c)>>16)&0xFF)
+#define AB(c)  (((c)>>8) &0xFF)
+#define AA(c)  ((0xFF-(c)) &0xFF)
+
+#define ALPHA_THRESHOLD 0b10000000
+
+void render_avsub_ass(ASS_Context *context, AVSubtitle *sub)
+{
+    printf("render_avsub_ass\n");
+    for (int r = 0; r < sub->num_rects; r++)
+    {
+        AVSubtitleRect *rect = sub->rects[r];
+        if (rect->data[0]) continue;
+
+        ASSDialog *dialog = ff_ass_split_dialog(context->ass_split_context, rect->ass);
+        ASS_Track *track = context->track;
+        if (track->n_events > 0)
+            ass_free_event(track, 0);
+        track->n_events = 0;
+        ass_alloc_event(track);
+        track->n_events = track->max_events = 1;
+        track->events[0].Start = sub->start_display_time + sub->pts / (AV_TIME_BASE / 1000);
+        track->events[0].Duration = sub->end_display_time - sub->start_display_time;
+        track->events[0].Effect = strdup(dialog->effect);
+        track->events[0].Layer = dialog->layer;
+        track->events[0].MarginL = dialog->margin_l;
+        track->events[0].MarginR = dialog->margin_r;
+        track->events[0].MarginV = dialog->margin_v;
+        track->events[0].Name = strdup(dialog->name);
+        track->events[0].Text = strdup(dialog->text);
+        track->events[0].ReadOrder = dialog->readorder;
+        track->events[0].Style = 0;
+        for (int style = 0; style < track->n_styles; style++)
+            if (!strcmp(track->styles[style].Name, dialog->style))
+                track->events[0].Style = style;
+        track->events[0].render_priv = NULL;
+        ff_ass_free_dialog(&dialog);
+
+        ASS_Image *image = ass_render_frame(context->renderer, track,
+            track->events[0].Start + track->events[0].Duration / 2, NULL);
+        if (image == NULL) printf("WARNING: failed to render ass\n");
+
+        rect->x = image ? image->dst_x : 0; rect->w = 0;
+        rect->y = image ? image->dst_y : 0; rect->h = 0;
+        rect->nb_colors = 1;    // Transparent background counts as a color
+        for (ASS_Image *img = image; img != NULL; img = img->next)
+        {
+            // Set image bounds to encompass all images
+            if (img->dst_x < rect->x) rect->x = img->dst_x;
+            if (img->dst_y < rect->y) rect->y = img->dst_y;
+            if (img->dst_x + img->w > rect->x + rect->w)
+                rect->w = img->dst_x + img->w - rect->x;
+            if (img->dst_y + img->h > rect->y + rect->h)
+                rect->h = img->dst_y + img->h - rect->y;
+            rect->nb_colors++;
+        }
+        rect->linesize[0] = rect->w;
+        rect->data[0] = (uint8_t *)malloc(rect->w * rect->h * sizeof(uint8_t));
+        rect->data[1] = (uint8_t *)malloc(4 * rect->nb_colors * sizeof(uint8_t));
+        memset(rect->data[0], 0, rect->w * rect->h);        // Set all to transparent
+        memset(rect->data[1], 0, 4);                        // Set transparent color
+        memset(&rect->linesize[1], 0, 3 * sizeof(int));
+        rect->data[2] = rect->data[3] = NULL;
+        for (int color = 1; image != NULL; image = image->next, color++)
+        {
+            // Set color
+            rect->data[1][4 * color + 0] = AR(image->color);
+            rect->data[1][4 * color + 1] = AG(image->color);
+            rect->data[1][4 * color + 2] = AB(image->color);
+            rect->data[1][4 * color + 3] = AA(image->color);
+            // Set pixels
+            for (int y = 0; y < image->h; y++)
+                for (int x = 0; x < image->w; x++)
+                    if (image->bitmap[y * image->stride + x] >= ALPHA_THRESHOLD)
+                    {
+                        int x_rect = image->dst_x + x - rect->x;
+                        int y_rect = image->dst_y + y - rect->y;
+                        rect->data[0][y_rect * rect->w + x_rect] = color;
+                    }
+        }
+    }
+}
\ No newline at end of file
diff --git a/libavcodec/text_to_bitmap.h b/libavcodec/text_to_bitmap.h
new file mode 100644
index 0000000000..4cba5889f4
--- /dev/null
+++ b/libavcodec/text_to_bitmap.h
@@ -0,0 +1,18 @@
+//
+// Created by traian on 2022-05-02.
+//
+
+#ifndef FFMPEG_TEXT_TO_BITMAP_H
+#define FFMPEG_TEXT_TO_BITMAP_H
+
+#include <ass/ass.h>
+#include "fftools/ffmpeg.h"
+
+struct ASS_Context;
+typedef struct ASS_Context ASS_Context;
+
+void init_ass_context(InputStream *ist, OutputStream *ost);
+void render_avsub_ass(ASS_Context *, AVSubtitle *);
+void free_ass_context(ASS_Context *context);
+
+#endif //FFMPEG_TEXT_TO_BITMAP_H
diff --git a/libavfilter/vf_subtitles.c b/libavfilter/vf_subtitles.c
index 7226911f6c..703dbec37d 100644
--- a/libavfilter/vf_subtitles.c
+++ b/libavfilter/vf_subtitles.c
@@ -217,103 +217,6 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *picref)
     return ff_filter_frame(outlink, picref);
 }
 
-#include "../fftools/ffmpeg.h"
-#include "libavcodec/avcodec.h"
-#include "../libavcodec/ass_split.h"
-
-#define ALPHA_THRESHOLD 0b10000000
-
-void render_avsub_ass(InputStream *, AVSubtitle *, int, int);
-void render_avsub_ass(InputStream *ist, AVSubtitle *sub, int frame_width, int frame_height)
-{
-    ASS_Library *library = ass_library_init();
-    ass_set_extract_fonts(library, 1);
-    ASS_Renderer *renderer = ass_renderer_init(library);
-    ass_set_frame_size(renderer, frame_width, frame_height);
-    ass_set_pixel_aspect(renderer, 1);
-    ass_set_storage_size(renderer, frame_width, frame_height);
-    ass_set_shaper(renderer, 0);
-    ass_set_fonts(renderer, NULL, NULL, 1, NULL, 1);
-
-    for (int r = 0; r < sub->num_rects; r++)
-    {
-        AVSubtitleRect *rect = sub->rects[r];
-        if (rect->data[0]) continue;
-
-        ASSSplitContext *ass_context = ff_ass_split((char *)ist->dec_ctx->subtitle_header);
-        ASSDialog *dialog = ff_ass_split_dialog(ass_context, rect->ass);
-
-        ASS_Track *track = ass_read_memory(library, (char *)ist->dec_ctx->subtitle_header, ist->dec_ctx->subtitle_header_size, NULL);
-        track->n_events = track->max_events = 1;
-        track->events = (ASS_Event *)malloc(sizeof(ASS_Event));
-        track->events[0].Start = sub->start_display_time + sub->pts / (AV_TIME_BASE / 1000);
-        track->events[0].Duration = sub->end_display_time - sub->start_display_time;
-        track->events[0].Effect = strdup(dialog->effect);
-        track->events[0].Layer = dialog->layer;
-        track->events[0].MarginL = dialog->margin_l;
-        track->events[0].MarginR = dialog->margin_r;
-        track->events[0].MarginV = dialog->margin_v;
-        track->events[0].Name = strdup(dialog->name);
-        track->events[0].Text = strdup(dialog->text);
-        track->events[0].ReadOrder = dialog->readorder;
-        track->events[0].Style = 0;
-        for (int style = 0; style < track->n_styles; style++)
-            if (!strcmp(track->styles[style].Name, dialog->style))
-                track->events[0].Style = style;
-        track->events[0].render_priv = NULL;
-        ff_ass_free_dialog(&dialog);
-        ff_ass_split_free(ass_context);
-
-        int change;
-        ASS_Image *image = ass_render_frame(renderer, track, track->events[0].Start + 1, &change);   // Don't have to free it for some reason
-        printf("image: %p\n", image);
-
-        rect->x = image->dst_x; rect->w = 0;
-        rect->y = image->dst_y; rect->h = 0;
-        rect->nb_colors = 1;    // Transparent background counts as a color
-        for (ASS_Image *img = image; img != NULL; img = img->next)
-        {
-            // Set image bounds to encompass all images
-            if (img->dst_x < rect->x) rect->x = img->dst_x;
-            if (img->dst_y < rect->y) rect->y = img->dst_y;
-            if (img->dst_x + img->w > rect->x + rect->w)
-                rect->w = img->dst_x + img->w - rect->x;
-            if (img->dst_y + img->h > rect->y + rect->h)
-                rect->h = img->dst_y + img->h - rect->y;
-            rect->nb_colors++;
-        }
-        rect->linesize[0] = rect->w;
-        rect->data[0] = (uint8_t *)malloc(rect->w * rect->h * sizeof(uint8_t));
-        rect->data[1] = (uint8_t *)malloc(4 * rect->nb_colors * sizeof(uint8_t));
-        memset(rect->data[0], 0, rect->w * rect->h);        // Set all to transparent
-        memset(rect->data[1], 0, 4);                        // Set transparent color
-        memset(&rect->linesize[1], 0, 3 * sizeof(int));
-        rect->data[2] = rect->data[3] = NULL;
-        for (int color = 1; image != NULL; image = image->next, color++)
-        {
-            // Set color
-            rect->data[1][4 * color + 0] = AR(image->color);
-            rect->data[1][4 * color + 1] = AG(image->color);
-            rect->data[1][4 * color + 2] = AB(image->color);
-            rect->data[1][4 * color + 3] = AA(image->color);
-            // Set pixels
-            for (int y = 0; y < image->h; y++)
-                for (int x = 0; x < image->w; x++)
-                    if (image->bitmap[y * image->stride + x] >= ALPHA_THRESHOLD)
-                    {
-                        int x_rect = image->dst_x + x - rect->x;
-                        int y_rect = image->dst_y + y - rect->y;
-                        rect->data[0][y_rect * rect->w + x_rect] = color;
-                    }
-        }
-
-        ass_free_track(track);
-    }
-
-    ass_renderer_done(renderer);
-    ass_library_done(library);
-}
-
 static const AVFilterPad ass_inputs[] = {
     {
         .name             = "default",
-- 
2.34.1