[FFmpeg-devel] [PATCH 1/7] avcodec: add s337m parser and decoder

Wed Dec 4 16:14:03 EET 2024

From: Nicolas Gaullier <nicolas.gaullier at cji.paris>

Signed-off-by: Nicolas Gaullier <nicolas.gaullier at cji.paris>
---
 configure                                |   2 +
 libavcodec/Makefile                      |   4 +
 libavcodec/allcodecs.c                   |   2 +
 libavcodec/codec_desc.c                  |  14 +
 libavcodec/codec_id.h                    |   2 +
 libavcodec/dolby_e_parse.c               |   3 +
 libavcodec/parsers.c                     |   2 +
 libavcodec/s337m.c                       | 327 +++++++++++++++++++++++
 libavcodec/s337m_parser.c                | 133 +++++++++
 libavcodec/spdif_s337m_parse.c           | 142 ++++++++++
 libavcodec/spdif_s337m_parser_internal.h |  92 +++++++
 libavcodec/utils.c                       |   2 +
 libavcodec/version.c                     |   2 +-
 13 files changed, 726 insertions(+), 1 deletion(-)
 create mode 100644 libavcodec/s337m.c
 create mode 100644 libavcodec/s337m_parser.c
 create mode 100644 libavcodec/spdif_s337m_parse.c
 create mode 100644 libavcodec/spdif_s337m_parser_internal.h

diff --git a/configure b/configure
index d7b7b49f92..eb4eea97a9 100755
--- a/configure
+++ b/configure
@@ -3072,6 +3072,8 @@ rv20_encoder_select="h263_encoder"
 rv30_decoder_select="golomb h264pred h264qpel mpegvideodec rv34dsp"
 rv40_decoder_select="golomb h264pred h264qpel mpegvideodec rv34dsp"
 rv60_decoder_select="videodsp golomb"
+s337m_16_decoder_select="dolby_e_decoder"
+s337m_24_decoder_select="dolby_e_decoder"
 screenpresso_decoder_deps="zlib"
 shorten_decoder_select="bswapdsp"
 sipr_decoder_select="lsp"
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index a6e0e0b55e..deff288312 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -672,6 +672,8 @@ OBJS-$(CONFIG_RV60_DECODER)            += rv60dec.o rv60dsp.o
 OBJS-$(CONFIG_SAMI_DECODER)            += samidec.o ass.o htmlsubtitles.o
 OBJS-$(CONFIG_S302M_DECODER)           += s302m.o
 OBJS-$(CONFIG_S302M_ENCODER)           += s302menc.o
+OBJS-$(CONFIG_S337M_16_DECODER)        += spdif_s337m_parse.o s337m.o
+OBJS-$(CONFIG_S337M_24_DECODER)        += spdif_s337m_parse.o s337m.o
 OBJS-$(CONFIG_SANM_DECODER)            += sanm.o
 OBJS-$(CONFIG_SCPR_DECODER)            += scpr.o
 OBJS-$(CONFIG_SCREENPRESSO_DECODER)    += screenpresso.o
@@ -1233,6 +1235,8 @@ OBJS-$(CONFIG_PNG_PARSER)              += png_parser.o
 OBJS-$(CONFIG_PNM_PARSER)              += pnm_parser.o pnm.o
 OBJS-$(CONFIG_QOI_PARSER)              += qoi_parser.o
 OBJS-$(CONFIG_RV34_PARSER)             += rv34_parser.o
+OBJS-$(CONFIG_S337M_16_PARSER)         += spdif_s337m_parse.o s337m_parser.o
+OBJS-$(CONFIG_S337M_24_PARSER)         += spdif_s337m_parse.o s337m_parser.o
 OBJS-$(CONFIG_SBC_PARSER)              += sbc_parser.o
 OBJS-$(CONFIG_SIPR_PARSER)             += sipr_parser.o
 OBJS-$(CONFIG_TAK_PARSER)              += tak_parser.o tak.o
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index 0b559dfc58..04e294734d 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -522,6 +522,8 @@ extern const FFCodec ff_ra_144_encoder;
 extern const FFCodec ff_ra_144_decoder;
 extern const FFCodec ff_ra_288_decoder;
 extern const FFCodec ff_ralf_decoder;
+extern const FFCodec ff_s337m_16_decoder;
+extern const FFCodec ff_s337m_24_decoder;
 extern const FFCodec ff_sbc_encoder;
 extern const FFCodec ff_sbc_decoder;
 extern const FFCodec ff_shorten_decoder;
diff --git a/libavcodec/codec_desc.c b/libavcodec/codec_desc.c
index bc9163bf98..927d19a8f9 100644
--- a/libavcodec/codec_desc.c
+++ b/libavcodec/codec_desc.c
@@ -3451,6 +3451,20 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .long_name = NULL_IF_CONFIG_SMALL("LC3 (Low Complexity Communication Codec)"),
         .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
     },
+    {
+        .id        = AV_CODEC_ID_S337M_16,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "s337m_16",
+        .long_name = NULL_IF_CONFIG_SMALL("S337M within 16-bit pcm"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_S337M_24,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "s337m_24",
+        .long_name = NULL_IF_CONFIG_SMALL("S337M within 24-bit pcm"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
 
     /* subtitle codecs */
     {
diff --git a/libavcodec/codec_id.h b/libavcodec/codec_id.h
index 6bfaa02601..60cb33eec2 100644
--- a/libavcodec/codec_id.h
+++ b/libavcodec/codec_id.h
@@ -550,6 +550,8 @@ enum AVCodecID {
     AV_CODEC_ID_OSQ,
     AV_CODEC_ID_QOA,
     AV_CODEC_ID_LC3,
+    AV_CODEC_ID_S337M_16,
+    AV_CODEC_ID_S337M_24,
 
     /* subtitle codecs */
     AV_CODEC_ID_FIRST_SUBTITLE = 0x17000,          ///< A dummy ID pointing at the start of subtitle codecs.
diff --git a/libavcodec/dolby_e_parse.c b/libavcodec/dolby_e_parse.c
index ffedcd99a4..3f6e1abc02 100644
--- a/libavcodec/dolby_e_parse.c
+++ b/libavcodec/dolby_e_parse.c
@@ -30,6 +30,9 @@ static const uint8_t nb_channels_tab[MAX_PROG_CONF + 1] = {
     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 6, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8
 };
 
+/* 42965 and 53706 are approximate values (rounding down):
+ * accurate video sync require muxing into s337m/aes
+ */
 static const uint16_t sample_rate_tab[16] = {
     0, 42965, 43008, 44800, 53706, 53760
 };
diff --git a/libavcodec/parsers.c b/libavcodec/parsers.c
index 8bfd2dbce0..5d78cc7969 100644
--- a/libavcodec/parsers.c
+++ b/libavcodec/parsers.c
@@ -70,6 +70,8 @@ extern const AVCodecParser ff_qoi_parser;
 extern const AVCodecParser ff_rv34_parser;
 extern const AVCodecParser ff_sbc_parser;
 extern const AVCodecParser ff_sipr_parser;
+extern const AVCodecParser ff_s337m_16_parser;
+extern const AVCodecParser ff_s337m_24_parser;
 extern const AVCodecParser ff_tak_parser;
 extern const AVCodecParser ff_vc1_parser;
 extern const AVCodecParser ff_vorbis_parser;
diff --git a/libavcodec/s337m.c b/libavcodec/s337m.c
new file mode 100644
index 0000000000..d5ac28b118
--- /dev/null
+++ b/libavcodec/s337m.c
@@ -0,0 +1,327 @@
+/*
+ * S337M decoder
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/avassert.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/opt.h"
+#include "libswresample/swresample.h"
+
+#include "avcodec.h"
+#include "codec_internal.h"
+#include "decode.h"
+#include "spdif_s337m_parser_internal.h"
+
+typedef struct S337MDecodeContext {
+    const AVClass   *class;
+    AVCodecContext  *avctx;
+    SPDIFS337MContext dectx;
+
+    int             passthrough;
+
+    int             inited;
+    int             flushed;
+
+    int             aes_start_position;
+    AVCodecContext  *codec_avctx;
+    AVFrame         *codec_frame;
+    int             codec_initial_sample_rate;
+    SwrContext      *swr;
+    int64_t         next_pts;
+    int             prev_aes_samples;
+} S337MDecodeContext;
+
+static av_cold int s337m_init(AVCodecContext *avctx)
+{
+    S337MDecodeContext *s = avctx->priv_data;
+    SPDIFS337MContext *dectx = &s->dectx;
+
+    dectx->avctx = s->avctx = avctx;
+
+    return 0;
+}
+
+static int set_codec(S337MDecodeContext *s)
+{
+    SPDIFS337MContext *dectx = &s->dectx;
+    const AVCodec *codec;
+    int ret;
+
+    if (s->codec_avctx) {
+        if (s->codec_avctx->codec_id != dectx->codec)
+            return AVERROR_INPUT_CHANGED;
+        return 0;
+    }
+
+    codec = avcodec_find_decoder(dectx->codec);
+    if (!codec)
+        return AVERROR_BUG;
+
+    s->codec_avctx = avcodec_alloc_context3(codec);
+    if (!s->codec_avctx)
+        return AVERROR(ENOMEM);
+
+    ret = avcodec_open2(s->codec_avctx, codec, NULL);
+    if (ret < 0)
+        return ret;
+
+    s->codec_frame = av_frame_alloc();
+    if (!s->codec_frame)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+static int init_resample(AVCodecContext *avctx, SwrContext **swrp, int in_sample_rate)
+{
+    SwrContext *swr = *swrp = swr_alloc();
+    int ret;
+
+    if (!swr)
+        return AVERROR(ENOMEM);
+
+    av_opt_set_chlayout(swr, "in_chlayout",  &avctx->ch_layout, 0);
+    av_opt_set_chlayout(swr, "out_chlayout", &avctx->ch_layout, 0);
+    av_opt_set_int(swr, "in_sample_fmt",     avctx->sample_fmt, 0);
+    av_opt_set_int(swr, "out_sample_fmt",    avctx->sample_fmt, 0);
+    av_opt_set_int(swr, "in_sample_rate",    in_sample_rate, 0);
+    av_opt_set_int(swr, "out_sample_rate",   avctx->sample_rate, 0);
+
+    /* There are two main cases that require sync to timestamps:
+     * - dolby_e sample rate value is not accurate for drop frame video:
+     * use soft comp responsively to handle these regular single-sample drifts
+     * - in case of loss of s337m sync:
+     * use hard comp to insert whole frames of silence
+     *
+     * And one use case in between that requires NO sync:
+     * The guardband phase has to be silently ignored as the video
+     * frame is assumed to be synced with sample 0 of the aes stream.
+     */
+    av_opt_set_int(swr,    "async",          1,        0);
+    av_opt_set_double(swr, "min_comp",       1./48000, 0);
+    av_opt_set_double(swr, "max_soft_comp",  0.0001,   0);
+    av_opt_set_double(swr, "min_hard_comp",  0.02,     0);
+
+    ret = swr_init(swr);
+    if (ret < 0)
+        return ret;
+
+    return 0;
+}
+
+extern const FFCodec ff_s337m_24_decoder;
+
+/* The first input packet is usually empty: it is guard band bytes.
+ * Following packets always start with a syncword.
+ * The 1-frame (2 frames if including the usual first null packet) delay
+ * accomodates with the resampler, but the number of samples per frame is always preserved
+ * (ex: alternation of 1601/1602 audio samples per frame for Dolby E at 29.97).
+ */
+static int s337m_decode_frame(AVCodecContext *avctx, AVFrame *frame,
+                            int *got_frame, AVPacket *avpkt)
+{
+    S337MDecodeContext *s1 = avctx->priv_data;
+    SPDIFS337MContext *dectx = &s1->dectx;
+    int aes_word_bits = avctx->codec == (AVCodec *)&ff_s337m_24_decoder ? 24 : 16;
+    int aes_samples =  avpkt->size / (aes_word_bits >> 2);
+    int prev_aes_samples = s1->prev_aes_samples;
+    int ret, next;
+
+    if (s1->flushed || !avpkt->size && s1->passthrough)
+            return 0;
+
+    if (s1->passthrough) {
+        if (!s1->inited) {
+            av_channel_layout_uninit(&avctx->ch_layout);
+            avctx->ch_layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO;
+            avctx->sample_fmt = aes_word_bits == 24 ? AV_SAMPLE_FMT_S32 : AV_SAMPLE_FMT_S16;
+            s1->inited = 1;
+        }
+        frame->nb_samples = aes_samples;
+        if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
+            return ret;
+        if (aes_word_bits == 16)
+            memcpy(frame->extended_data[0], avpkt->data, aes_samples * (aes_word_bits >> 2));
+        else {
+            uint8_t *buf_in = avpkt->data;
+            uint8_t *buf_out = frame->extended_data[0];
+            for (; buf_in + 5 < avpkt->data + avpkt->size; buf_in+=6, buf_out+=8)
+                AV_WL64(buf_out,
+                     (uint64_t)AV_RL24(buf_in)   <<  8 |
+                     (uint64_t)AV_RL24(buf_in+3) << 40 );
+        }
+        *got_frame = 1;
+        return avpkt->size;
+    }
+
+    s1->prev_aes_samples = aes_samples;
+    if (s1->inited) {
+        frame->nb_samples = prev_aes_samples;
+        if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
+            return ret;
+    }
+    if (!avpkt->size) {
+        ret = swr_convert(s1->swr,
+                frame->extended_data, frame->nb_samples,
+                NULL, 0);
+        if (ret < 0)
+            return ret;
+        av_assert0(ret == frame->nb_samples);
+        *got_frame = 1;
+        s1->flushed = 1;
+        return 0;
+    }
+
+    next = avpriv_s337m_parse_header(dectx, avpkt->data, avpkt->size, aes_word_bits);
+    if (next < 0)
+        return next;
+    else if (!next) {
+        av_assert0(!s1->inited);
+        s1->aes_start_position += avpkt->size;
+        return avpkt->size;
+    }
+    ret = set_codec(s1);
+    if (ret < 0)
+        return ret;
+
+    ret = av_packet_make_writable(avpkt);
+    if (ret < 0)
+        return ret;
+    avpkt->data += next;
+    avpkt->size = dectx->frame_size;
+
+    if (aes_word_bits == 16)
+        avpriv_spdif_s337m_bswap_buf16((uint16_t *)avpkt->data, (uint16_t *)avpkt->data, avpkt->size >> 1);
+    else
+        avpriv_spdif_s337m_bswap_buf24(avpkt->data, avpkt->size);
+
+    ret = avcodec_send_packet(s1->codec_avctx, avpkt);
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Error submitting a packet for decoding\n");
+        return ret;
+    }
+
+    ret = avcodec_receive_frame(s1->codec_avctx, s1->codec_frame);
+    if (ret < 0)
+        return ret;
+
+    if (!s1->inited) {
+        ret = av_channel_layout_copy(&avctx->ch_layout, &s1->codec_avctx->ch_layout);
+        if (ret < 0)
+            return ret;
+        avctx->sample_fmt = s1->codec_avctx->sample_fmt;
+        s1->codec_initial_sample_rate = s1->codec_avctx->sample_rate;
+        ret = init_resample(avctx, &s1->swr, s1->codec_initial_sample_rate);
+        if (ret < 0)
+            return ret;
+        swr_next_pts(s1->swr, 0);
+        s1->inited = 1;
+        av_log(avctx, AV_LOG_VERBOSE,
+                "s337m phase: %.6fs\n", s1->aes_start_position / (aes_word_bits >> 2) / (float)avctx->sample_rate);
+        /* The small initial guard band must not be taken into account for syncing
+         * but completely missing frames must be (in that case, the guard band length is unknown, so also included).
+         */
+        if (s1->aes_start_position >= dectx->frame_size) {
+            s1->next_pts += s1->codec_initial_sample_rate * s1->aes_start_position / (aes_word_bits >> 2);
+            swr_next_pts(s1->swr, s1->next_pts);
+        }
+        ret = swr_convert(s1->swr,
+                NULL, 0,
+                (const uint8_t**)s1->codec_frame->extended_data, s1->codec_frame->nb_samples);
+        if (ret < 0)
+            return ret;
+
+        return avpkt->size;
+    } else {
+        if (av_channel_layout_compare(&avctx->ch_layout, &s1->codec_avctx->ch_layout)
+            || avctx->sample_fmt != s1->codec_avctx->sample_fmt
+            || s1->codec_initial_sample_rate != s1->codec_avctx->sample_rate)
+            return AVERROR_INPUT_CHANGED;
+
+        s1->next_pts += s1->codec_initial_sample_rate * prev_aes_samples;
+        swr_next_pts(s1->swr, s1->next_pts);
+        ret = swr_convert(s1->swr,
+                frame->extended_data, frame->nb_samples,
+                (const uint8_t**)s1->codec_frame->extended_data, s1->codec_frame->nb_samples);
+        if (ret < 0)
+            return ret;
+        av_assert0(ret == frame->nb_samples);
+    }
+
+    *got_frame = 1;
+    return avpkt->size;
+}
+
+static void s337m_flush(AVCodecContext *avctx)
+{
+    S337MDecodeContext *s = avctx->priv_data;
+    avcodec_flush_buffers(s->codec_avctx);
+}
+
+static av_cold int s337m_close(AVCodecContext *avctx)
+{
+    S337MDecodeContext *s = avctx->priv_data;
+    avcodec_free_context(&s->codec_avctx);
+    swr_free(&s->swr);
+    av_frame_free(&s->codec_frame);
+
+    return 0;
+}
+
+#define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM)
+static const AVOption options[] = {
+    {"passthrough", "Pass NON-PCM through unchanged", offsetof(S337MDecodeContext, passthrough), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, PAR },
+    {NULL}
+};
+
+static const AVClass s337m_decoder_class = {
+    .class_name = "s337m decoder",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+const FFCodec ff_s337m_16_decoder = {
+    .p.name         = "s337m_16",
+    .p.long_name    = NULL_IF_CONFIG_SMALL("S337M 16-bit transport"),
+    .p.type         = AVMEDIA_TYPE_AUDIO,
+    .p.id           = AV_CODEC_ID_S337M_16,
+    .init           = s337m_init,
+    FF_CODEC_DECODE_CB(s337m_decode_frame),
+    .close          = s337m_close,
+    .flush          = s337m_flush,
+    .priv_data_size = sizeof(S337MDecodeContext),
+    .p.priv_class   = &s337m_decoder_class,
+    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_CHANNEL_CONF | AV_CODEC_CAP_DELAY,
+    .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
+};
+const FFCodec ff_s337m_24_decoder = {
+    .p.name         = "s337m_24",
+    .p.long_name    = NULL_IF_CONFIG_SMALL("S337M 24-bit transport"),
+    .p.type         = AVMEDIA_TYPE_AUDIO,
+    .p.id           = AV_CODEC_ID_S337M_24,
+    .init           = s337m_init,
+    FF_CODEC_DECODE_CB(s337m_decode_frame),
+    .close          = s337m_close,
+    .flush          = s337m_flush,
+    .priv_data_size = sizeof(S337MDecodeContext),
+    .p.priv_class   = &s337m_decoder_class,
+    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_CHANNEL_CONF | AV_CODEC_CAP_DELAY,
+    .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
+};
diff --git a/libavcodec/s337m_parser.c b/libavcodec/s337m_parser.c
new file mode 100644
index 0000000000..cb7ef7c739
--- /dev/null
+++ b/libavcodec/s337m_parser.c
@@ -0,0 +1,133 @@
+/*
+ * S337M parser
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/intreadwrite.h"
+#include "libavutil/mem.h"
+
+#include "parser.h"
+#include "spdif_s337m_parser_internal.h"
+
+static int all_zero(const uint8_t *buf, int size)
+{
+    int i = 0;
+#if HAVE_FAST_UNALIGNED
+    /* we check i < size instead of i + 3 / 7 because it is
+     * simpler and there must be AV_INPUT_BUFFER_PADDING_SIZE
+     * bytes at the end.
+     */
+#if HAVE_FAST_64BIT
+    while (i < size && !AV_RN64(buf + i))
+        i += 8;
+#else
+    while (i < size && !AV_RN32(buf + i))
+        i += 4;
+#endif
+#endif
+    for (; i < size; i++)
+        if (buf[i])
+            return 1;
+    return 0;
+}
+
+extern const AVCodecParser ff_s337m_24_parser;
+
+static int s337m_parse(AVCodecParserContext *s,
+                           AVCodecContext *avctx,
+                           const uint8_t **poutbuf, int *poutbuf_size,
+                           const uint8_t *buf, int buf_size)
+{
+    struct SPDIFS337MParseContext *pc1 = s->priv_data;
+    ParseContext *pc = &pc1->pc;
+    int aes_word_bits = s->parser == &ff_s337m_24_parser ? 24 : 16;
+    int eof = !buf_size;
+    int next;
+
+    if (s->flags & PARSER_FLAG_COMPLETE_FRAMES) {
+        next = buf_size;
+    } else {
+        next = avpriv_spdif_s337m_find_syncword(pc1, buf, buf_size, aes_word_bits);
+
+        if (!pc1->inited) {
+            /* bytes preceding the first syncword will be zeroed */
+            if (all_zero(buf, next != END_NOT_FOUND ? next : buf_size)) {
+                if (!pc1->warned_corrupted_guardband) {
+                    av_log(avctx, AV_LOG_VERBOSE,
+                            "Guard band has unexpected non-null bytes - they will be ignored.\n");
+                    pc1->warned_corrupted_guardband = 1;
+                }
+                if (buf_size > pc1->null_buf_size) {
+                    int old_buf_size = pc1->null_buf_size;
+                    int8_t *new_buf = av_realloc(pc1->null_buf, buf_size);
+                    if (!new_buf)
+                        return AVERROR(ENOMEM);
+                    pc1->null_buf = new_buf;
+                    memset(&pc1->null_buf[old_buf_size], 0, buf_size - old_buf_size);
+                    pc1->null_buf_size = buf_size;
+                }
+                buf = pc1->null_buf;
+            }
+            if (next != END_NOT_FOUND) {
+                pc1->inited = 1;
+                pc1->aes_initial_offset = pc1->aes_offset + next;
+            }
+        }
+        if (ff_combine_frame(pc, next, &buf, &buf_size) < 0) {
+            pc1->aes_offset += buf_size;
+            *poutbuf = NULL;
+            *poutbuf_size = 0;
+            return buf_size;
+        }
+    }
+    /* Contrary to the exact frame duration computed by the decoder, the packet duration
+     * computed here will reflect s337m jitter or phase change (if any),
+     * but there will not be any drift.
+     * The content/duration of the initial guard band (zeroed first packet) will be
+     * ignored by the decoder if it is less than a full audio frame, so that the overall
+     * duration may differ between the parser and the decoder.
+     */
+    pc1->aes_offset += eof ? pc1->aes_initial_offset : next;
+    s->duration = (pc1->aes_offset << 2) / aes_word_bits;
+    pc1->aes_offset = 0;
+
+    *poutbuf = buf;
+    *poutbuf_size = buf_size;
+    return next;
+}
+
+static void s337m_close(AVCodecParserContext *s)
+{
+    struct SPDIFS337MParseContext *pc1 = s->priv_data;
+
+    av_freep(&pc1->null_buf);
+    ff_parse_close(s);
+}
+
+const AVCodecParser ff_s337m_16_parser = {
+    .codec_ids      = { AV_CODEC_ID_S337M_16 },
+    .priv_data_size = sizeof(SPDIFS337MParseContext),
+    .parser_parse   = s337m_parse,
+    .parser_close   = s337m_close,
+};
+const AVCodecParser ff_s337m_24_parser = {
+    .codec_ids      = { AV_CODEC_ID_S337M_24 },
+    .priv_data_size = sizeof(SPDIFS337MParseContext),
+    .parser_parse   = s337m_parse,
+    .parser_close   = s337m_close,
+};
diff --git a/libavcodec/spdif_s337m_parse.c b/libavcodec/spdif_s337m_parse.c
new file mode 100644
index 0000000000..5e43e8b2b7
--- /dev/null
+++ b/libavcodec/spdif_s337m_parse.c
@@ -0,0 +1,142 @@
+/*
+ * SPDIF/S337M common code
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/bswap.h"
+#include "codec_id.h"
+#include "bytestream.h"
+
+#include "spdif_s337m_parser_internal.h"
+
+//TODO move to DSP
+void avpriv_spdif_s337m_bswap_buf16(uint16_t *dst, const uint16_t *src, int w)
+{
+    int i;
+
+    for (i = 0; i + 8 <= w; i += 8) {
+        dst[i + 0] = av_bswap16(src[i + 0]);
+        dst[i + 1] = av_bswap16(src[i + 1]);
+        dst[i + 2] = av_bswap16(src[i + 2]);
+        dst[i + 3] = av_bswap16(src[i + 3]);
+        dst[i + 4] = av_bswap16(src[i + 4]);
+        dst[i + 5] = av_bswap16(src[i + 5]);
+        dst[i + 6] = av_bswap16(src[i + 6]);
+        dst[i + 7] = av_bswap16(src[i + 7]);
+    }
+    for (; i < w; i++)
+        dst[i + 0] = av_bswap16(src[i + 0]);
+}
+
+void avpriv_spdif_s337m_bswap_buf24(uint8_t *data, int size)
+{
+    int i;
+
+    for (i = 0; i < size / 3; i++, data += 3)
+        FFSWAP(uint8_t, data[0], data[2]);
+}
+
+int avpriv_spdif_s337m_find_syncword(SPDIFS337MParseContext *pc1, const uint8_t *buf, int buf_size, int word_bits)
+{
+    ParseContext *pc = &pc1->pc;
+    uint64_t state = pc->state64;
+    uint64_t state_ext = pc1->state_ext;
+    int i = 0;
+
+    for (; i < buf_size; i++) {
+        state_ext = (state_ext >> 8) | state & 0xFF00000000000000;
+        state = (state << 8) | buf[i];
+        if (state_ext
+            || word_bits == 16 && state != MARKER_16LE
+            || word_bits == 24 && state != MARKER_20LE && state != MARKER_24LE)
+            continue;
+
+        pc->state64 = -1;
+        return state == MARKER_16LE ? i - 3 : i - 5;
+    }
+
+    pc->state64 = state;
+    pc1->state_ext = state_ext;
+    return END_NOT_FOUND;
+}
+
+static int s337m_get_codec(SPDIFS337MContext *dectx, uint64_t state, int data_type, int data_size, int s337m_word_bits)
+{
+    int word_bits;
+
+    if (IS_16LE_MARKER(state)) {
+        word_bits = 16;
+    } else if (IS_20LE_MARKER(state)) {
+        data_type >>= 8;
+        data_size >>= 4;
+        word_bits = 20;
+    } else if (IS_24LE_MARKER(state)) {
+        data_type >>= 8;
+        word_bits = 24;
+    } else return AVERROR_INVALIDDATA;
+
+    if (!(s337m_word_bits == 16 && word_bits == 16) &&
+        !(s337m_word_bits == 24 && word_bits == 20) &&
+        !(s337m_word_bits == 24 && word_bits == 24)) {
+        if (dectx && dectx->avctx)
+            av_log(dectx->avctx, AV_LOG_ERROR, "s337m: unexpected %d-bit payload in %d-bit container\n", word_bits, s337m_word_bits);
+        return AVERROR_INVALIDDATA;
+    }
+
+    switch(data_type & 0x1F) {
+        case 0x1C:
+            if (dectx) {
+                dectx->frame_size = (word_bits + 7 >> 3) * data_size / word_bits;
+                dectx->codec = AV_CODEC_ID_DOLBY_E;
+            }
+            break;
+
+        default:
+            /* When probing 16-bit streams, spdif codecs can be encountered. */
+            if (dectx && dectx->avctx)
+                avpriv_report_missing_feature(dectx->avctx, "Data type %#x in SMPTE 337M", data_type & 0x1F);
+            return dectx ? AVERROR_PATCHWELCOME : AVERROR_INVALIDDATA;
+    }
+
+    return 0;
+}
+
+int avpriv_s337m_parse_header(SPDIFS337MContext *dectx, const uint8_t *buf, int buf_size, int s337m_word_bits)
+{
+    uint64_t state;
+    int data_type, data_size, next = s337m_word_bits >> 1;
+    int ret = 0;
+
+    if (buf_size < next)
+        return AVERROR_BUFFER_TOO_SMALL;
+    if (s337m_word_bits == 16) {
+        state = AV_RB32(buf);
+        data_type = AV_RL16(buf + 4);
+        data_size = AV_RL16(buf + 6);
+    } else {
+        state = AV_RB48(buf);
+        data_type = AV_RL24(buf + 6);
+        data_size = AV_RL24(buf + 9);
+    }
+    if (!state)
+        return 0;
+    if (ret = s337m_get_codec(dectx, state, data_type, data_size, s337m_word_bits))
+        return ret;
+
+    return next;
+}
diff --git a/libavcodec/spdif_s337m_parser_internal.h b/libavcodec/spdif_s337m_parser_internal.h
new file mode 100644
index 0000000000..da9407ebaf
--- /dev/null
+++ b/libavcodec/spdif_s337m_parser_internal.h
@@ -0,0 +1,92 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_SPDIF_S337M_PARSER_INTERNAL_H
+#define AVCODEC_SPDIF_S337M_PARSER_INTERNAL_H
+
+#include "parser.h"
+
+#define MARKER_16LE         0x72F81F4E
+#define MARKER_20LE         0x20876FF0E154
+#define MARKER_24LE         0x72F8961F4EA5
+
+#define IS_16LE_MARKER(state)   ((state & 0xFFFFFFFF) == MARKER_16LE)
+#define IS_20LE_MARKER(state)   ((state & 0xF0FFFFF0FFFF) == MARKER_20LE)
+#define IS_24LE_MARKER(state)   ((state & 0xFFFFFFFFFFFF) == MARKER_24LE)
+
+/**
+ * @struct SPDIFS337MContext
+ * Context for use by decoder and parser.
+ */
+typedef struct SPDIFS337MContext {
+    void        *avctx;
+
+    int         frame_size;
+    enum AVCodecID codec;
+} SPDIFS337MContext;
+
+/**
+ * @struct SPDIFS337MParseContext
+ * Context for use by parser to split packets.
+ */
+typedef struct SPDIFS337MParseContext {
+    ParseContext pc;
+    uint64_t     state_ext;
+
+    int          inited;
+    int          aes_initial_offset;
+    int          aes_offset;
+
+    uint8_t      *null_buf;
+    int          null_buf_size;
+    int          warned_corrupted_guardband;
+} SPDIFS337MParseContext;
+
+void avpriv_spdif_s337m_bswap_buf16(uint16_t *dst, const uint16_t *src, int w);
+void avpriv_spdif_s337m_bswap_buf24(uint8_t *data, int size);
+
+/**
+ * Find an 'extended sync code' as suggested by SMPTE 337M Annex A.
+ * Its length is set to 128 bits for optimization, while Annex A
+ * suggests 6 words (96 bits for 16-bit or 144 bits for 20/24-bit).
+ *
+ * Do not require sync byte alignment on container word bytes,
+ * but still, 16-bit payloads require 16-bit container
+ * and 20/24-bit payloads require 24-bit container.
+ * Note: SPDIF is always 16-bit.
+ *
+ * @param  pc1              To persist states between calls
+ * @param  buf              Buffer for reading
+ * @param  buf_size         Max available bytes to read
+ * @param  word_bits        Buffer word size: 16 or 24
+ * @return syncword byte position on success, or END_NOT_FOUND
+ **/
+int avpriv_spdif_s337m_find_syncword(SPDIFS337MParseContext *pc1, const uint8_t *buf, int buf_size, int word_bits);
+
+/**
+ * Parse s337m header: get codec and frame_size.
+ *
+ * @param  avc              If not null, codec and frame_size will be set
+ * @param  buf              Buffer for reading with s337m syncword at byte position 0
+ * @param  buf_size         Max available bytes to read
+ * @param  s337m_word_bits  Buffer word size: 16 or 24
+ * @return header size > 0 on success, 0 if the first two words are null, or < 0 on error
+ */
+int avpriv_s337m_parse_header(SPDIFS337MContext *avc, const uint8_t *buf, int buf_size, int s337m_word_bits);
+
+#endif /* AVCODEC_SPDIF_S337M_PARSER_INTERNAL_H */
diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index 28023a4a4d..922c6e2417 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -497,6 +497,7 @@ int av_get_exact_bits_per_sample(enum AVCodecID codec_id)
     case AV_CODEC_ID_PCM_S16LE_PLANAR:
     case AV_CODEC_ID_PCM_U16BE:
     case AV_CODEC_ID_PCM_U16LE:
+    case AV_CODEC_ID_S337M_16:
         return 16;
     case AV_CODEC_ID_PCM_S24DAUD:
     case AV_CODEC_ID_PCM_S24BE:
@@ -504,6 +505,7 @@ int av_get_exact_bits_per_sample(enum AVCodecID codec_id)
     case AV_CODEC_ID_PCM_S24LE_PLANAR:
     case AV_CODEC_ID_PCM_U24BE:
     case AV_CODEC_ID_PCM_U24LE:
+    case AV_CODEC_ID_S337M_24:
         return 24;
     case AV_CODEC_ID_PCM_S32BE:
     case AV_CODEC_ID_PCM_S32LE:
diff --git a/libavcodec/version.c b/libavcodec/version.c
index 03dd95e5ba..b4917b4e29 100644
--- a/libavcodec/version.c
+++ b/libavcodec/version.c
@@ -35,7 +35,7 @@ unsigned avcodec_version(void)
                   AV_CODEC_ID_PCM_SGA      == 65572 &&
                   AV_CODEC_ID_ADPCM_XMD    == 69683 &&
                   AV_CODEC_ID_CBD2_DPCM    == 81928 &&
-                  AV_CODEC_ID_QOA          == 86121 &&
+                  AV_CODEC_ID_S337M_24     == 86124 &&
                   AV_CODEC_ID_ARIB_CAPTION == 94233 &&
                   AV_CODEC_ID_SMPTE_2038   == 98315,
                   "Don't insert new codec ids in the middle of a list");
-- 
2.30.2