[FFmpeg-devel] ogg kate text subtitles support (patch)

u-9iep at aetey.se u-9iep at aetey.se
Mon Oct 24 10:31:25 EEST 2016


Hello,

Given the practical constraints I can not thoroughly fulfill all the
requirements for submitting a patch. I hope it can make it at least to
the list archive, for possible future perusal by someone.

The patch addresses the missing Kate subtitles support, reflected
in trac as
           https://trac.ffmpeg.org/ticket/3039

It is based on the patch present in the kate libraries as of 2011-10-12
(https://git.xiph.org/?p=users/oggk/kate.git)

The attached version applies cleanly to ffmpeg-3.1.3 and does not introduce
any dependencies.

It makes ffmpeg and the programs using ffmpeg, like mplayer,
properly recognize and decode ogg kate text subtitles.

Thanks for the excellent ffmpeg software.

This is my attempt to make a small contribution, for a minor but
regrettably missing and inexpensive feature.

Regards,
Rune
-------------- next part --------------
--- a/libavformat/Makefile	2016-10-07 15:15:21.486265907 +0200
+++ b/libavformat/Makefile	2016-10-07 15:17:01.003589996 +0200
@@ -326,6 +326,7 @@
                                             oggparsedaala.o  \
                                             oggparsedirac.o  \
                                             oggparseflac.o   \
+                                            oggparsekate.o   \
                                             oggparseogm.o    \
                                             oggparseopus.o   \
                                             oggparseskeleton.o \
--- a/libavformat/oggdec.c	2016-10-07 19:41:38.423750182 +0200
+++ b/libavformat/oggdec.c	2016-10-07 20:25:41.479354827 +0200
@@ -50,6 +50,7 @@
     &ff_celt_codec,
     &ff_opus_codec,
     &ff_vp8_codec,
+    &ff_kate_codec,
     &ff_old_dirac_codec,
     &ff_old_flac_codec,
     &ff_ogm_video_codec,
@@ -146,6 +147,7 @@
         os->bufpos     = 0;
         os->pstart     = 0;
         os->psize      = 0;
+        os->skip       = 0;
         os->granule    = -1;
         os->lastpts    = AV_NOPTS_VALUE;
         os->lastdts    = AV_NOPTS_VALUE;
@@ -568,7 +570,7 @@
             *dsize = os->psize;
         if (fpos)
             *fpos = os->sync_pos;
-        os->pstart  += os->psize;
+        os->pstart += os->psize+os->skip;
         os->psize    = 0;
         if(os->pstart == os->bufpos)
             os->bufpos = os->pstart = 0;
@@ -607,7 +609,7 @@
     size = avio_size(s->pb);
     if (size < 0)
         return 0;
-    end = size > MAX_PAGE_SIZE ? size - MAX_PAGE_SIZE : 0;
+    end = size > MAX_PAGE_SIZE ? size - MAX_PAGE_SIZE : size;
 
     ret = ogg_save(s);
     if (ret < 0)
@@ -797,6 +799,9 @@
             return ret;
     } while (idx < 0 || !s->streams[idx]);
 
+    if (psize == 0)
+        return 0;
+
     ogg = s->priv_data;
     os  = ogg->streams + idx;
 
--- a/libavformat/oggdec.h	2016-10-07 19:55:24.486255449 +0200
+++ b/libavformat/oggdec.h	2016-10-07 20:32:56.265793913 +0200
@@ -65,7 +65,8 @@
     unsigned int pstart;
     unsigned int psize;
     unsigned int pflags;
-    unsigned int pduration;
+    uint64_t     pduration;
+    unsigned int skip;
     uint32_t serial;
     uint64_t granule;
     uint64_t start_granule;
@@ -129,6 +130,7 @@
 extern const struct ogg_codec ff_theora_codec;
 extern const struct ogg_codec ff_vorbis_codec;
 extern const struct ogg_codec ff_vp8_codec;
+extern const struct ogg_codec ff_kate_codec;
 
 int ff_vorbis_comment(AVFormatContext *ms, AVDictionary **m,
                       const uint8_t *buf, int size, int parse_picture);
--- /dev/null	2015-11-25 12:33:48.239947234 +0100
+++ b/libavformat/oggparsekate.c	2016-10-08 14:18:28.251729834 +0200
@@ -0,0 +1,252 @@
+/*
+ *    Copyright (C) 2009 ogg.k.ogg.k at googlemail.com
+ *    adjustments   2016 by Rl @ Aetey Global Technologies AB
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include <ctype.h>
+#include <stdlib.h>
+#include "libavutil/intreadwrite.h"
+#include "libavcodec/bytestream.h"
+#include "avlanguage.h"
+#include "avformat.h"
+#include "internal.h"
+#include "oggdec.h"
+
+struct kate_params {
+    int major, minor;
+    uint32_t gps_num;
+    uint32_t gps_den;
+    int granule_shift;
+
+    int canvas_width;
+    int canvas_height;
+
+    int num_headers;
+    int text_encoding;
+    int directionality;
+
+    char language[16];
+    char category[16];
+};
+
+static const char * const kate_magic = "kate\0\0\0";
+
+static int
+parse_kate_header (AVFormatContext * s, int idx)
+{
+    struct ogg *ogg = s->priv_data;
+    struct ogg_stream *os = ogg->streams + idx;
+    AVStream *st = s->streams[idx];
+    struct kate_params *thp = os->private;
+    int cds = st->codecpar->extradata_size + os->psize + 2;
+    uint8_t *cdp;
+    const uint8_t *ptr;
+    uint16_t v16;
+
+    if (!(os->buf[os->pstart] & 0x80))
+        return 0;
+
+    if(!thp){
+        thp = av_mallocz(sizeof(*thp));
+        os->private = thp;
+    }
+
+    if (os->buf[os->pstart] == 0x80) {
+        if (os->psize < 64) {
+            av_log(NULL, AV_LOG_ERROR, "BOS header is too small\n");
+            return -1;
+        }
+
+        ptr = os->buf + os->pstart+1; // skip the packet type now
+
+        for (int n=0; n<7; ++n) {
+            if (bytestream_get_byte(&ptr) != kate_magic[n]) {
+                av_log(NULL, AV_LOG_ERROR, "Header has wrong magic\n");
+                return -1;
+            }
+        }
+
+        (void)bytestream_get_byte(&ptr); /* reserved zero byte, may be non zero in future stream revs */
+        thp->major = bytestream_get_byte(&ptr);
+        thp->minor = bytestream_get_byte(&ptr);
+        thp->num_headers = bytestream_get_byte(&ptr);
+        thp->text_encoding = bytestream_get_byte(&ptr);
+        thp->directionality = bytestream_get_byte(&ptr);
+        (void)bytestream_get_byte(&ptr); /* reserved */
+        thp->granule_shift = bytestream_get_byte(&ptr);
+        v16 = bytestream_get_le16(&ptr);
+        thp->canvas_width = (v16 & ((1<<12)-1)) << (v16 >> 12);
+        v16 = bytestream_get_le16(&ptr);
+        thp->canvas_height = (v16 & ((1<<12)-1)) << (v16 >> 12);
+        (void)bytestream_get_le32(&ptr); /* reserved */
+        thp->gps_num = bytestream_get_le32(&ptr);
+        thp->gps_den = bytestream_get_le32(&ptr);
+        bytestream_get_buffer(&ptr, thp->language, 16);
+        bytestream_get_buffer(&ptr, thp->category, 16);
+
+        if (thp->major > 0) {
+            av_log(NULL, AV_LOG_ERROR, "Major bitstream version %d not supported\n", thp->major);
+            return -1;
+        }
+        if (thp->num_headers <= 0) {
+            av_log(NULL, AV_LOG_ERROR, "Invalid number of headers (%d)\n", thp->num_headers);
+            return -1;
+        }
+        if (thp->text_encoding != 0) {
+            av_log(NULL, AV_LOG_ERROR, "Unsupported text encoding (%d)\n", thp->text_encoding);
+            return -1;
+        }
+        if (thp->granule_shift >= 64) {
+            av_log(NULL, AV_LOG_ERROR, "Invalid granule shift (%d)\n", thp->granule_shift);
+            return -1;
+        }
+        if (thp->gps_num == 0 || thp->gps_den == 0) {
+            av_log(NULL, AV_LOG_ERROR, "Invalid granule rate (%u/%u)\n", thp->gps_num, thp->gps_den);
+            return -1;
+        }
+        if (thp->language[15]) {
+            av_log(NULL, AV_LOG_ERROR, "Invalid language\n");
+            return -1;
+        }
+        if (thp->category[15]) {
+            av_log(NULL, AV_LOG_ERROR, "Invalid category\n");
+            return -1;
+        }
+
+        st->codecpar->codec_type = AVMEDIA_TYPE_SUBTITLE;
+        st->codecpar->codec_id   = AV_CODEC_ID_TEXT;
+        avpriv_set_pts_info(st, 64, thp->gps_den, thp->gps_num); // time_base = 1 / granule_rate
+
+        if (thp->language[0]) {
+            // nicked off asfdec.c
+            const char primary_tag[3] = { tolower(thp->language[0]), tolower(thp->language[1]), '\0' }; // ignore country code if any
+            const char *iso6392 = av_convert_lang_to(primary_tag, AV_LANG_ISO639_2_BIBL);
+            if (iso6392)
+                //av_metadata_set(&st->metadata, "language", iso6392);
+                av_dict_set(&st->metadata, "language", iso6392, 0);
+        }
+    } else if (os->buf[os->pstart] == 0x83) {
+        ff_vorbis_stream_comment(s, st, os->buf + os->pstart + 9, os->psize - 10);
+    }
+
+    st->codecpar->extradata = av_realloc (st->codecpar->extradata,
+                                       cds + FF_INPUT_BUFFER_PADDING_SIZE);
+    cdp = st->codecpar->extradata + st->codecpar->extradata_size;
+    *cdp++ = os->psize >> 8;
+    *cdp++ = os->psize & 0xff;
+    memcpy (cdp, os->buf + os->pstart, os->psize);
+    st->codecpar->extradata_size = cds;
+
+    return 1;
+}
+
+static int
+parse_kate_packet (AVFormatContext * s, int idx)
+{
+    struct ogg *ogg = s->priv_data;
+    struct ogg_stream *os = ogg->streams + idx;
+    uint8_t *p = os->buf + os->pstart, packtype;
+    const uint8_t *ptr = p;
+    int64_t start, duration, backlink;
+    int32_t len;
+    static const int text_offset = 1+3*8+4;
+
+    if (os->psize < 1) {
+        av_log(NULL, AV_LOG_ERROR, "Data packet should be at least 1 byte long\n");
+        return -1;
+    }
+
+    packtype = bytestream_get_byte(&ptr);
+
+    // we ignore any non text data packet for now (repeats, etc)
+    if (packtype != 0) {
+        av_log(NULL, AV_LOG_DEBUG, "Packet type %02x ignored\n", packtype);
+        os->psize = 0;
+        return 0;
+    }
+
+    if (os->psize < text_offset) {
+        av_log(NULL, AV_LOG_ERROR, "Text data packet should be at least %d bytes long\n",text_offset);
+        return -1;
+    }
+
+    start = bytestream_get_le64(&ptr);
+    duration = bytestream_get_le64(&ptr);
+    backlink = bytestream_get_le64(&ptr);
+
+    if (start < 0) {
+        av_log(NULL, AV_LOG_ERROR, "Invalid start time (%lld)\n", (long long)start);
+        return -1;
+    }
+    if (duration < 0) {
+        av_log(NULL, AV_LOG_ERROR, "Invalid duration (%lld)\n", (long long)duration);
+        return -1;
+    }
+    if (backlink < 0 || backlink > start) {
+        av_log(NULL, AV_LOG_ERROR, "Invalid backlink (%lld)\n", (long long)backlink);
+        return -1;
+    }
+
+    len = bytestream_get_le32(&ptr);
+    if (len < 0) {
+        av_log(NULL, AV_LOG_ERROR, "Invalid text length (%d)\n", len);
+        return -1;
+    }
+    ptr += len;
+
+    os->pstart += text_offset;
+    os->psize -= text_offset;
+    if (len > os->psize) {
+      av_log(NULL, AV_LOG_WARNING, "Text length greater than packet size - clipped\n");
+      os->skip = 0;
+      len = os->psize;
+    }
+    else {
+      os->skip = os->psize - len;
+      os->psize = len;
+    }
+    os->pduration = duration;
+
+    return 0;
+}
+
+static uint64_t
+parse_kate_granpos (AVFormatContext * s, int i, uint64_t gp, int64_t *dts)
+{
+    struct ogg *ogg = s->priv_data;
+    struct ogg_stream *os = ogg->streams + i;
+    struct kate_params *thp = os->private;
+    uint64_t base, offset;
+
+    if (!thp)
+        return 0;
+    base = gp >> thp->granule_shift;
+    offset = gp & ((1<<thp->granule_shift)-1);
+// we do not have to do anything to *dts here (?) -- rl
+    return base+offset;
+}
+
+const struct ogg_codec ff_kate_codec = {
+    .magic = "\200kate\0\0\0",
+    .magicsize = 8,
+    .header = parse_kate_header,
+    .packet = parse_kate_packet,
+    .gptopts = parse_kate_granpos,
+    .granule_is_start = 1,
+    .nb_header = 0,
+};


More information about the ffmpeg-devel mailing list