[FFmpeg-devel] [PATCH 04/12] Initialize ass library only once!
Traian Coza
traian.coza at gmail.com
Tue May 3 19:13:20 EEST 2022
---
fftools/ffmpeg.c | 50 ++++-------
libavcodec/Makefile | 1 +
libavcodec/avcodec.h | 9 ++
libavcodec/text_to_bitmap.c | 170 ++++++++++++++++++++++++++++++++++++
libavcodec/text_to_bitmap.h | 18 ++++
libavfilter/vf_subtitles.c | 97 --------------------
6 files changed, 217 insertions(+), 128 deletions(-)
create mode 100644 libavcodec/text_to_bitmap.c
create mode 100644 libavcodec/text_to_bitmap.h
diff --git a/fftools/ffmpeg.c b/fftools/ffmpeg.c
index a74800bb68..31acf08a6a 100644
--- a/fftools/ffmpeg.c
+++ b/fftools/ffmpeg.c
@@ -71,6 +71,10 @@
# include "libavfilter/buffersrc.h"
# include "libavfilter/buffersink.h"
+#if CONFIG_LIBASS
+#include "libavcodec/text_to_bitmap.h"
+#endif
+
#if HAVE_SYS_RESOURCE_H
#include <sys/time.h>
#include <sys/types.h>
@@ -2325,8 +2329,6 @@ fail:
return err < 0 ? err : ret;
}
-void render_avsub_ass(InputStream *, AVSubtitle *, int, int);
-
static void print_subtitle(AVSubtitle sub)
{
printf("sub.format: %u\n", sub.format);
@@ -2434,30 +2436,8 @@ static int transcode_subtitles(InputStream *ist, AVPacket *pkt, int *got_output,
// Support text to bitmap
if (avcodec_descriptor_get(ost->enc_ctx->codec_id)->props & AV_CODEC_PROP_BITMAP_SUB)
if (avcodec_descriptor_get(ist->dec_ctx->codec_id)->props & AV_CODEC_PROP_TEXT_SUB)
- if (!rendered) {
- // Try to get a height and width from a video
- int width = 0, height = 0;
- // Try output streams
- for (int j = 0; j < nb_output_streams; j++)
- if (output_streams[j]->enc_ctx->codec_type == AVMEDIA_TYPE_VIDEO)
- {
- width = output_streams[j]->enc_ctx->width;
- height = output_streams[j]->enc_ctx->height;
- break;
- }
- if (width == 0)
- // Try input streams
- for (int j = 0; j < nb_input_streams; j++)
- if (input_streams[j]->dec_ctx->codec_type == AVMEDIA_TYPE_VIDEO)
- {
- width = input_streams[j]->dec_ctx->width;
- height = input_streams[j]->dec_ctx->height;
- break;
- }
- if (width == 0) {
- // TODO Error: Cannot render without a video stream
- }
- render_avsub_ass(ist, &subtitle, width, height);
+ if (!rendered) { // Make sure not to render twice
+ render_avsub_ass(ist->dec_ctx->ass_context, &subtitle);
for (int r = 0; r < subtitle.num_rects; r++)
subtitle.rects[r]->type = SUBTITLE_BITMAP;
rendered = 1;
@@ -2757,6 +2737,9 @@ static int init_input_stream(int ist_index, char *error, int error_len)
* audio, and video decoders such as cuvid or mediacodec */
ist->dec_ctx->pkt_timebase = ist->st->time_base;
+ // For text to bitmap rendering
+ ist->dec_ctx->ass_context = NULL;
+
if (!av_dict_get(ist->decoder_opts, "threads", NULL, 0))
av_dict_set(&ist->decoder_opts, "threads", "auto", 0);
/* Attached pics are sparse, therefore we would not want to delay their decoding till EOF. */
@@ -3285,16 +3268,21 @@ static int init_output_stream(OutputStream *ost, AVFrame *frame,
input_props = input_descriptor->props & (AV_CODEC_PROP_TEXT_SUB | AV_CODEC_PROP_BITMAP_SUB);
if (output_descriptor)
output_props = output_descriptor->props & (AV_CODEC_PROP_TEXT_SUB | AV_CODEC_PROP_BITMAP_SUB);
- /*if (input_props && output_props && input_props != output_props) {
+#if CONFIG_LIBASS
+ if (input_props == AV_CODEC_PROP_BITMAP_SUB && output_props == AV_CODEC_PROP_TEXT_SUB) {
+ snprintf(error, error_len, "Subtitle encoding from bitmap to text currently not possible");
+ return AVERROR_INVALIDDATA;
+ }
+ if (input_props == AV_CODEC_PROP_TEXT_SUB && output_props == AV_CODEC_PROP_BITMAP_SUB)
+ init_ass_context(ist, ost);
+#else
+ if (input_props && output_props && input_props != output_props) {
snprintf(error, error_len,
"Subtitle encoding currently only possible from text to text "
"or bitmap to bitmap");
return AVERROR_INVALIDDATA;
- }*/
- if (input_props == AV_CODEC_PROP_BITMAP_SUB && output_props == AV_CODEC_PROP_TEXT_SUB) {
- snprintf(error, error_len, "Subtitle encoding from bitmap to text currently not possible");
- return AVERROR_INVALIDDATA;
}
+#endif
}
if ((ret = avcodec_open2(ost->enc_ctx, codec, &ost->encoder_opts)) < 0) {
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index cfaa6f196a..6d28513129 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -151,6 +151,7 @@ OBJS-$(CONFIG_RV34DSP) += rv34dsp.o
OBJS-$(CONFIG_SINEWIN) += sinewin.o
OBJS-$(CONFIG_SNAPPY) += snappy.o
OBJS-$(CONFIG_STARTCODE) += startcode.o
+OBJS-$(CONFIG_LIBASS) += text_to_bitmap.o
OBJS-$(CONFIG_TEXTUREDSP) += texturedsp.o
OBJS-$(CONFIG_TEXTUREDSPENC) += texturedspenc.o
OBJS-$(CONFIG_TPELDSP) += tpeldsp.o
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 4dae23d06e..530c01f193 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -2055,6 +2055,15 @@ typedef struct AVCodecContext {
* The decoder can then override during decoding as needed.
*/
AVChannelLayout ch_layout;
+
+ /**
+ * Pointer to ASS_Library instance (cast to void *)
+ * - encoding: unused
+ * - decoding: is set to NULL initially in init_input_stream, and if
+ * there must be a text to bitmap conversion, is set to a
+ * new instance of ASS_Library in init_output_stream
+ */
+ void *ass_context;
} AVCodecContext;
/**
diff --git a/libavcodec/text_to_bitmap.c b/libavcodec/text_to_bitmap.c
new file mode 100644
index 0000000000..87c46985d9
--- /dev/null
+++ b/libavcodec/text_to_bitmap.c
@@ -0,0 +1,170 @@
+//
+// Created by traian on 2022-05-02.
+//
+
+#include "text_to_bitmap.h"
+
+#include "fftools/ffmpeg.h"
+#include "avcodec.h"
+#include "ass_split.h"
+
+struct ASS_Context {
+ ASS_Library *library;
+ ASS_Renderer *renderer;
+ ASS_Track *track;
+ ASSSplitContext *ass_split_context;
+};
+
+void init_ass_context(InputStream *ist, OutputStream *ost)
+{
+ if (ist->dec_ctx->ass_context) return;
+ ASS_Context *context = (ASS_Context *)malloc(sizeof(ASS_Context));
+ context->library = ass_library_init();
+ ass_set_extract_fonts(context->library, 1);
+ // TODO: ass_add_font(context->library, );
+
+ // Try to get a height and width from somewhere
+ int width = 0, height = 0;
+ do
+ {
+ // Try input stream
+ if (ost->enc_ctx->width != 0 && ost->enc_ctx->height != 0)
+ {
+ width = ost->enc_ctx->width;
+ height = ost->enc_ctx->height;
+ break;
+ }
+ // Try output stream
+ if (ist->dec_ctx->width != 0 && ist->dec_ctx->height != 0)
+ {
+ width = ist->dec_ctx->width;
+ height = ist->dec_ctx->height;
+ break;
+ }
+ // Try output streams
+ for (int j = 0; j < nb_output_streams; j++)
+ if (output_streams[j]->enc_ctx->codec_type == AVMEDIA_TYPE_VIDEO)
+ {
+ width = output_streams[j]->enc_ctx->width;
+ height = output_streams[j]->enc_ctx->height;
+ break;
+ }
+ if (width && height) break;
+ // Try input streams
+ for (int j = 0; j < nb_input_streams; j++)
+ if (input_streams[j]->dec_ctx->codec_type == AVMEDIA_TYPE_VIDEO)
+ {
+ width = input_streams[j]->dec_ctx->width;
+ height = input_streams[j]->dec_ctx->height;
+ break;
+ }
+ if (width && height) break;
+
+ // TODO Error: Cannot render without a width and height
+ } while (0);
+
+ context->renderer = ass_renderer_init(context->library);
+ ass_set_frame_size(context->renderer, width, height);
+ ass_set_pixel_aspect(context->renderer, 1);
+ ass_set_storage_size(context->renderer, width, height);
+ ass_set_shaper(context->renderer, 0);
+ ass_set_fonts(context->renderer, NULL, NULL, 1, NULL, 1);
+
+ context->track = ass_read_memory(context->library, (char *)ist->dec_ctx->subtitle_header, ist->dec_ctx->subtitle_header_size, NULL);
+ context->ass_split_context = ff_ass_split((char *)ist->dec_ctx->subtitle_header);
+
+ ist->dec_ctx->ass_context = context;
+}
+
+void free_ass_context(ASS_Context *context) {
+ ass_library_done(context->library);
+ ass_renderer_done(context->renderer);
+ ass_free_track(context->track);
+ ff_ass_split_free(context->ass_split_context);
+ free(context);
+}
+
+/* libass stores an RGBA color in the format RRGGBBTT, where TT is the transparency level */
+#define AR(c) ((c)>>24)
+#define AG(c) (((c)>>16)&0xFF)
+#define AB(c) (((c)>>8) &0xFF)
+#define AA(c) ((0xFF-(c)) &0xFF)
+
+#define ALPHA_THRESHOLD 0b10000000
+
+void render_avsub_ass(ASS_Context *context, AVSubtitle *sub)
+{
+ printf("render_avsub_ass\n");
+ for (int r = 0; r < sub->num_rects; r++)
+ {
+ AVSubtitleRect *rect = sub->rects[r];
+ if (rect->data[0]) continue;
+
+ ASSDialog *dialog = ff_ass_split_dialog(context->ass_split_context, rect->ass);
+ ASS_Track *track = context->track;
+ if (track->n_events > 0)
+ ass_free_event(track, 0);
+ track->n_events = 0;
+ ass_alloc_event(track);
+ track->n_events = track->max_events = 1;
+ track->events[0].Start = sub->start_display_time + sub->pts / (AV_TIME_BASE / 1000);
+ track->events[0].Duration = sub->end_display_time - sub->start_display_time;
+ track->events[0].Effect = strdup(dialog->effect);
+ track->events[0].Layer = dialog->layer;
+ track->events[0].MarginL = dialog->margin_l;
+ track->events[0].MarginR = dialog->margin_r;
+ track->events[0].MarginV = dialog->margin_v;
+ track->events[0].Name = strdup(dialog->name);
+ track->events[0].Text = strdup(dialog->text);
+ track->events[0].ReadOrder = dialog->readorder;
+ track->events[0].Style = 0;
+ for (int style = 0; style < track->n_styles; style++)
+ if (!strcmp(track->styles[style].Name, dialog->style))
+ track->events[0].Style = style;
+ track->events[0].render_priv = NULL;
+ ff_ass_free_dialog(&dialog);
+
+ ASS_Image *image = ass_render_frame(context->renderer, track,
+ track->events[0].Start + track->events[0].Duration / 2, NULL);
+ if (image == NULL) printf("WARNING: failed to render ass\n");
+
+ rect->x = image ? image->dst_x : 0; rect->w = 0;
+ rect->y = image ? image->dst_y : 0; rect->h = 0;
+ rect->nb_colors = 1; // Transparent background counts as a color
+ for (ASS_Image *img = image; img != NULL; img = img->next)
+ {
+ // Set image bounds to encompass all images
+ if (img->dst_x < rect->x) rect->x = img->dst_x;
+ if (img->dst_y < rect->y) rect->y = img->dst_y;
+ if (img->dst_x + img->w > rect->x + rect->w)
+ rect->w = img->dst_x + img->w - rect->x;
+ if (img->dst_y + img->h > rect->y + rect->h)
+ rect->h = img->dst_y + img->h - rect->y;
+ rect->nb_colors++;
+ }
+ rect->linesize[0] = rect->w;
+ rect->data[0] = (uint8_t *)malloc(rect->w * rect->h * sizeof(uint8_t));
+ rect->data[1] = (uint8_t *)malloc(4 * rect->nb_colors * sizeof(uint8_t));
+ memset(rect->data[0], 0, rect->w * rect->h); // Set all to transparent
+ memset(rect->data[1], 0, 4); // Set transparent color
+ memset(&rect->linesize[1], 0, 3 * sizeof(int));
+ rect->data[2] = rect->data[3] = NULL;
+ for (int color = 1; image != NULL; image = image->next, color++)
+ {
+ // Set color
+ rect->data[1][4 * color + 0] = AR(image->color);
+ rect->data[1][4 * color + 1] = AG(image->color);
+ rect->data[1][4 * color + 2] = AB(image->color);
+ rect->data[1][4 * color + 3] = AA(image->color);
+ // Set pixels
+ for (int y = 0; y < image->h; y++)
+ for (int x = 0; x < image->w; x++)
+ if (image->bitmap[y * image->stride + x] >= ALPHA_THRESHOLD)
+ {
+ int x_rect = image->dst_x + x - rect->x;
+ int y_rect = image->dst_y + y - rect->y;
+ rect->data[0][y_rect * rect->w + x_rect] = color;
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/libavcodec/text_to_bitmap.h b/libavcodec/text_to_bitmap.h
new file mode 100644
index 0000000000..4cba5889f4
--- /dev/null
+++ b/libavcodec/text_to_bitmap.h
@@ -0,0 +1,18 @@
+//
+// Created by traian on 2022-05-02.
+//
+
+#ifndef FFMPEG_TEXT_TO_BITMAP_H
+#define FFMPEG_TEXT_TO_BITMAP_H
+
+#include <ass/ass.h>
+#include "fftools/ffmpeg.h"
+
+struct ASS_Context;
+typedef struct ASS_Context ASS_Context;
+
+void init_ass_context(InputStream *ist, OutputStream *ost);
+void render_avsub_ass(ASS_Context *, AVSubtitle *);
+void free_ass_context(ASS_Context *context);
+
+#endif //FFMPEG_TEXT_TO_BITMAP_H
diff --git a/libavfilter/vf_subtitles.c b/libavfilter/vf_subtitles.c
index 7226911f6c..703dbec37d 100644
--- a/libavfilter/vf_subtitles.c
+++ b/libavfilter/vf_subtitles.c
@@ -217,103 +217,6 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *picref)
return ff_filter_frame(outlink, picref);
}
-#include "../fftools/ffmpeg.h"
-#include "libavcodec/avcodec.h"
-#include "../libavcodec/ass_split.h"
-
-#define ALPHA_THRESHOLD 0b10000000
-
-void render_avsub_ass(InputStream *, AVSubtitle *, int, int);
-void render_avsub_ass(InputStream *ist, AVSubtitle *sub, int frame_width, int frame_height)
-{
- ASS_Library *library = ass_library_init();
- ass_set_extract_fonts(library, 1);
- ASS_Renderer *renderer = ass_renderer_init(library);
- ass_set_frame_size(renderer, frame_width, frame_height);
- ass_set_pixel_aspect(renderer, 1);
- ass_set_storage_size(renderer, frame_width, frame_height);
- ass_set_shaper(renderer, 0);
- ass_set_fonts(renderer, NULL, NULL, 1, NULL, 1);
-
- for (int r = 0; r < sub->num_rects; r++)
- {
- AVSubtitleRect *rect = sub->rects[r];
- if (rect->data[0]) continue;
-
- ASSSplitContext *ass_context = ff_ass_split((char *)ist->dec_ctx->subtitle_header);
- ASSDialog *dialog = ff_ass_split_dialog(ass_context, rect->ass);
-
- ASS_Track *track = ass_read_memory(library, (char *)ist->dec_ctx->subtitle_header, ist->dec_ctx->subtitle_header_size, NULL);
- track->n_events = track->max_events = 1;
- track->events = (ASS_Event *)malloc(sizeof(ASS_Event));
- track->events[0].Start = sub->start_display_time + sub->pts / (AV_TIME_BASE / 1000);
- track->events[0].Duration = sub->end_display_time - sub->start_display_time;
- track->events[0].Effect = strdup(dialog->effect);
- track->events[0].Layer = dialog->layer;
- track->events[0].MarginL = dialog->margin_l;
- track->events[0].MarginR = dialog->margin_r;
- track->events[0].MarginV = dialog->margin_v;
- track->events[0].Name = strdup(dialog->name);
- track->events[0].Text = strdup(dialog->text);
- track->events[0].ReadOrder = dialog->readorder;
- track->events[0].Style = 0;
- for (int style = 0; style < track->n_styles; style++)
- if (!strcmp(track->styles[style].Name, dialog->style))
- track->events[0].Style = style;
- track->events[0].render_priv = NULL;
- ff_ass_free_dialog(&dialog);
- ff_ass_split_free(ass_context);
-
- int change;
- ASS_Image *image = ass_render_frame(renderer, track, track->events[0].Start + 1, &change); // Don't have to free it for some reason
- printf("image: %p\n", image);
-
- rect->x = image->dst_x; rect->w = 0;
- rect->y = image->dst_y; rect->h = 0;
- rect->nb_colors = 1; // Transparent background counts as a color
- for (ASS_Image *img = image; img != NULL; img = img->next)
- {
- // Set image bounds to encompass all images
- if (img->dst_x < rect->x) rect->x = img->dst_x;
- if (img->dst_y < rect->y) rect->y = img->dst_y;
- if (img->dst_x + img->w > rect->x + rect->w)
- rect->w = img->dst_x + img->w - rect->x;
- if (img->dst_y + img->h > rect->y + rect->h)
- rect->h = img->dst_y + img->h - rect->y;
- rect->nb_colors++;
- }
- rect->linesize[0] = rect->w;
- rect->data[0] = (uint8_t *)malloc(rect->w * rect->h * sizeof(uint8_t));
- rect->data[1] = (uint8_t *)malloc(4 * rect->nb_colors * sizeof(uint8_t));
- memset(rect->data[0], 0, rect->w * rect->h); // Set all to transparent
- memset(rect->data[1], 0, 4); // Set transparent color
- memset(&rect->linesize[1], 0, 3 * sizeof(int));
- rect->data[2] = rect->data[3] = NULL;
- for (int color = 1; image != NULL; image = image->next, color++)
- {
- // Set color
- rect->data[1][4 * color + 0] = AR(image->color);
- rect->data[1][4 * color + 1] = AG(image->color);
- rect->data[1][4 * color + 2] = AB(image->color);
- rect->data[1][4 * color + 3] = AA(image->color);
- // Set pixels
- for (int y = 0; y < image->h; y++)
- for (int x = 0; x < image->w; x++)
- if (image->bitmap[y * image->stride + x] >= ALPHA_THRESHOLD)
- {
- int x_rect = image->dst_x + x - rect->x;
- int y_rect = image->dst_y + y - rect->y;
- rect->data[0][y_rect * rect->w + x_rect] = color;
- }
- }
-
- ass_free_track(track);
- }
-
- ass_renderer_done(renderer);
- ass_library_done(library);
-}
-
static const AVFilterPad ass_inputs[] = {
{
.name = "default",
--
2.34.1
More information about the ffmpeg-devel
mailing list