[FFmpeg-devel] [PATCH 16/21] libavcodec/avcodec, libavformat/movenc: support embedding channel layout to stream side data

Tue Aug 23 12:03:34 EEST 2016

From: Erkki Seppälä <erkki.seppala.ext at nokia.com>

Added support for passing complex channel layout configuration as side
packet data (AV_PKT_DATA_AUDIO_CHANNEL_LAYOUT,
AV_PKT_DATA_AUDIO_CHANNEL_PREDEFINED_LAYOUT,
AV_PKT_DATA_AUDIO_CHANNEL_LAYOUT_OBJECT_STRUCTURED) to ISO media files
as specified by ISO/IEC 14496-12.

This information isn't integrated into the existing channel layout
system though, which is much more restricted compared to what the
standard permits. However, the side packet data is structured so that
it does not require too much ISO base media file format knowledge in
client code.

This information ends up to an (optional) chnl box in the written file
in an isom track.

Signed-off-by: Erkki Seppälä <erkki.seppala.ext at nokia.com>
Signed-off-by: OZOPlayer <OZOPL at nokia.com>
---
 libavcodec/avcodec.h | 51 ++++++++++++++++++++++++++++++++-
 libavformat/movenc.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 130 insertions(+), 2 deletions(-)

diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 36c85e9..6c64e6a 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -1365,6 +1365,35 @@ typedef struct AVTrackReferences {
     /** followed by: int tracks[nb_tracks]; -- tracks this track refers to */
 } AVTrackReferences;
 
+/** Describes the speaker position of a single audio channel of a single track */
+typedef struct AVAudioTrackChannelPosition {
+    int speaker_position; /** an OutputChannelPosition from ISO/IEC 23001-8 */
+
+    /** The following are used if speaker_position == 126 */
+    int azimuth;
+    int elevation;
+} AVAudioTrackChannelPosition;
+
+/** Describes the channel layout (ie. speaker position) of a single audio track */
+typedef struct AVAudioTrackChannelLayout {
+    int nb_positions;
+    /** followed by: AVAudioTrackChannelPosition positions[nb_positions]; */
+} AVAudioTrackChannelLayout;
+
+/** Describes the channel layout based on predefined layout of a single track
+    by providing the layout and the list of channels are are omitted */
+typedef struct AVAudioTrackChannelPredefinedLayout {
+    int layout;        /** ChannelConfiguration from ISO/IEC 23001-8 */
+    int nb_omitted_channels;
+    /** followed by: char omitted_channels[nb_omitted_channels]; - non-zero indicates the channel is omitted */
+} AVAudioTrackChannelPredefinedLayout;
+
+/** Describes the channel layout to be object-sturctued with given
+    number of objects */
+typedef struct AVAudioTrackChannelLayoutObjectStructured {
+    int object_count;
+} AVAudioTrackChannelLayoutObjectStructured;
+
 enum AVPacketSideDataType {
     AV_PKT_DATA_PALETTE,
 
@@ -1556,7 +1585,27 @@ enum AVPacketSideDataType {
      * Configured the timed metadata parameters, such as the uri and
      * meta data configuration. The key is of type AVTimedMetadata.
      */
-    AV_PKT_DATA_TIMED_METADATA_INFO
+    AV_PKT_DATA_TIMED_METADATA_INFO,
+
+    /**
+     * Channel layout, describing the position of spakers for the
+     * channels of a track, following the structure
+     * AVAudioTrackChannelLayout.
+     */
+    AV_PKT_DATA_AUDIO_CHANNEL_LAYOUT,
+
+    /**
+     * Predefined channel layout, describing the position of spakers
+     * for the channels of a track, following the structure
+     * AVAudioTrackChannelPredefinedLayout.
+     */
+    AV_PKT_DATA_AUDIO_CHANNEL_PREDEFINED_LAYOUT,
+
+    /**
+     * The channel layout is object structured with the number of objects in
+     * AVAudioTrackChannelLayoutObjectStructured
+     */
+    AV_PKT_DATA_AUDIO_CHANNEL_LAYOUT_OBJECT_STRUCTURED
 };
 
 #define AV_PKT_DATA_QUALITY_FACTOR AV_PKT_DATA_QUALITY_STATS //DEPRECATED
diff --git a/libavformat/movenc.c b/libavformat/movenc.c
index ff4bf85..9606918 100644
--- a/libavformat/movenc.c
+++ b/libavformat/movenc.c
@@ -557,6 +557,83 @@ static unsigned compute_avg_bitrate(MOVTrack *track)
     return size * 8 * track->timescale / track->track_duration;
 }
 
+static int mov_write_chnl_tag(AVIOContext *pb, MOVTrack *track)
+{
+    AVAudioTrackChannelLayout *layout =
+        (void*) av_stream_get_side_data(track->st, AV_PKT_DATA_AUDIO_CHANNEL_LAYOUT,
+                                        NULL);
+
+    AVAudioTrackChannelPredefinedLayout *predefLayout =
+        (void*) av_stream_get_side_data(track->st, AV_PKT_DATA_AUDIO_CHANNEL_PREDEFINED_LAYOUT,
+                                        NULL);
+
+    AVAudioTrackChannelLayoutObjectStructured *objectStructured =
+        (void*) av_stream_get_side_data(track->st, AV_PKT_DATA_AUDIO_CHANNEL_LAYOUT_OBJECT_STRUCTURED,
+                                        NULL);
+
+    if (!layout && !predefLayout && !objectStructured) {
+        return 0;
+    } else {
+        int64_t pos = avio_tell(pb);
+
+        int channel_structured           = !!layout || !!predefLayout;
+
+        // ChannelConfiguration from ISO/IEC 23001-8
+        int defined_layout               = predefLayout ? predefLayout->layout : 0;
+        int channel_count                = track->par->channels;
+
+        int object_structured            = !!objectStructured;
+        int object_count                 = objectStructured ? objectStructured->object_count : 0;
+
+        int stream_structure             = (channel_structured << 0) | (object_structured << 1);
+
+        avio_wb32(pb, 0); // size
+        ffio_wfourcc(pb, "chnl");
+        avio_wb32(pb, 0); // Version
+
+        avio_w8(pb, stream_structure);
+
+        if (channel_structured) {
+            avio_w8(pb, defined_layout);
+            if (defined_layout == 0) {
+                AVAudioTrackChannelPosition* positions;
+                int i;
+                av_assert0(layout);
+                av_assert0(layout->nb_positions >= channel_count);
+
+                positions = (void*) (layout + 1);
+
+                for (i = 0; i < channel_count; ++i) {
+                    AVAudioTrackChannelPosition *pos = &positions[i];
+                    avio_w8(pb, pos->speaker_position);
+                    if (pos->speaker_position == 126) {
+                        avio_wb16(pb, pos->azimuth);
+                        avio_w8(pb, pos->elevation);
+                    }
+                }
+            } else {
+                int omitted_channels_map[64 / 8] = { 0 };
+                char* omitted_channels;
+                int i;
+                av_assert0(predefLayout);
+
+                omitted_channels = (void*) (predefLayout + 1);
+
+                for (i = 0; i < FFMIN(64, predefLayout->nb_omitted_channels); ++i) {
+                    omitted_channels_map[i / 8] |=
+                        ((!!omitted_channels[i]) << (i % 8));
+                }
+                for (i = 0; i < 64 / 8; ++i)
+                    avio_w8(pb, omitted_channels_map[i]);
+            }
+        }
+        if (object_structured)
+            avio_w8(pb, object_count);
+
+        return update_size(pb, pos);
+    }
+}
+
 static int mov_write_esds_tag(AVIOContext *pb, MOVTrack *track) // Basic
 {
     AVCPBProperties *props;
@@ -996,8 +1073,10 @@ static int mov_write_audio_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex
          (mov_pcm_le_gt16(track->par->codec_id) && version==1) ||
          (mov_pcm_be_gt16(track->par->codec_id) && version==1)))
         mov_write_wave_tag(s, pb, track);
-    else if (track->tag == MKTAG('m','p','4','a'))
+    else if (track->tag == MKTAG('m','p','4','a')) {
+        mov_write_chnl_tag(pb, track);
         mov_write_esds_tag(pb, track);
+    }
     else if (track->par->codec_id == AV_CODEC_ID_AMR_NB)
         mov_write_amr_tag(pb, track);
     else if (track->par->codec_id == AV_CODEC_ID_AC3)
-- 
2.7.4