[Ffmpeg-devel] [PATCH] THP PCM decoder (GSoC Qualification)

Tue Apr 3 15:21:56 CEST 2007

Michael Niedermayer <michaelni at gmx.at> writes:

Hi,

> Hi
>
> On Mon, Apr 02, 2007 at 07:24:49PM +0200, Marco Gerards wrote:
>> Baptiste Coudurier <baptiste.coudurier at smartjog.com> writes:
> [...]
>> >> [...]
>> >> +    }
>> >> +    else {
>> >> +       ret = av_get_packet(pb, pkt, thp->audiosize);
>> >> +       if (ret != thp->audiosize) {
>> >> +          av_free_packet(pkt);
>> >> +          return AVERROR_IO;
>> >> +       }
>> >> +      pkt->stream_index = thp->audio_stream_index;
>> >> +      thp->audiosize = 0;
>> >> +      thp->frame++;
>> >
>> > Can't seek be avoided now ? Does audio follow video in data stream ? If
>> > so Im wondering if reading video + audio in the same time, using buffer
>> > then output audio after, would not be cleaner and simpler.
>> 
>> It's convenient that I can make a video and audio packet and read
>> simultaneously.  I think the code otherwise will get more complex.  Do
>> you think it is worth the complexity and reading everything at once?
>> 
>> I do not think seeking can be avoided.  Some data follows the frame,
>> it is not documented.  So there is a gap between the total framesize
>> and the image+audio size.
>
> how does that extra data look ? (iam just curious ...)
>
>
> [...]
>> Index: libavcodec/adpcm.c
>> ===================================================================
>> --- libavcodec/adpcm.c	(revision 8597)
>> +++ libavcodec/adpcm.c	(working copy)
>> @@ -442,6 +442,8 @@
>>      switch(avctx->codec->id) {
>>      case CODEC_ID_ADPCM_IMA_QT: /* XXX: can't test until we get .mov writer */
>>          break;
>> +    case CODEC_ID_ADPCM_THP:
>> +        break;
>>      case CODEC_ID_ADPCM_IMA_WAV:
>>          n = avctx->frame_size / 8;
>
> what is this good for?

Removed.

>>              c->status[0].prev_sample = (signed short)samples[0]; /* XXX */
>> @@ -1308,6 +1310,69 @@
>>              src++;
>>          }
>>          break;
>> +    case CODEC_ID_ADPCM_THP:
>> +      {
>> +        GetBitContext gb;
>> +        float table[16][2];
>> +        int samplecnt;
>> +        int prev1[2], prev2[2];
>> +        int ch;
>> +
>> +        if (buf_size < 80) {
>> +          av_log(avctx, AV_LOG_ERROR, "frame too small\n");
>> +          return -1;
>> +        }
>
> indention in ffmpeg is 4 spaces

Fixed.

>
>> +
>> +        init_get_bits(&gb, src, buf_size);
>
> size is in bits ...

Fixed.

>> +        src += buf_size;
>> +
>> +                    get_bits(&gb, 32); /* Channel size */
>> +        samplecnt = get_bits(&gb, 32);
>> +
>> +        for (ch = 0; ch < 2; ch++)
>> +          for (i = 0; i < 16; i++) {
>> +              /* Read the fixed point entry and store as floating
>> +                 point.  */
>> +              int entry = get_sbits(&gb, 16);
>> +              table[i][ch] = (float) entry  / (1 << 11);
>
> please remove all floating point code from the decoder, theres no need
> for it and it makes the code binary identical (easy testable) on different
> architectures

Done.

>> +          }
>> +
>> +        /* Initialize the previous sample.  */
>> +        for (ch = 0; ch < 2; ch++) {
>> +            prev1[ch] = get_sbits(&gb, 16);
>> +            prev2[ch] = get_sbits(&gb, 16);
>> +        }
>> +
>> +        for (ch = 0; ch <= st; ch++) {
>> +            int sample = samplecnt;
>> +
>> +            /* Read in every sample for this channel.  */
>> +            while (sample > 0) {
>
> a for() would be simpler here

Yes, I have done this now.

>> +                uint8_t index = get_bits (&gb, 4) & 7;
>> +                int exp = get_bits (&gb, 4);
>> +                float factor1 = table[index * 2][ch];
>> +                float factor2 = table[index * 2 + 1][ch];
>> +        
>
> trailing whitespace

Removed.

>> +                /* Decode 14 samples.  */
>> +                for (n = 0; n < 14; n++) {
>> +                    int sampledat = get_sbits (&gb, 4);
>> +                    *samples = prev1[ch]*factor1 
>> +                               + prev2[ch]*factor2 + (sampledat << exp);
>> +                    prev2[ch] = prev1[ch];
>> +                    prev1[ch] = *samples++;
>> +
>> +                    if (samples >= samples_end) {
>> +                       av_log(avctx, AV_LOG_ERROR, "allocated output buffer is too small\n");
>> +                       return -1;
>> +                    }
>
> this check can be moved out of the loop

Done.

In this new patch there are still two issues.  For some reason the
quality of the sound was not that good.  It wasn't as bad as someone
reported, for example he said that there were issues with mono sound.
I can not reproduce this.

The problem is in stereo sound.  I have disabled stereo in this patch
and the sound is just fine now.  To be honest, I am not sure what the
problem is.  I have tried all kinds of things without much success.
Perhaps it is better to commit this first.  After that I can fix this
with another patch.

Another thing is the seek.  Is this a big problem?  I am only seeking
forwards, so I made the assumption that ffmpeg just skips the data if
it cannot seek.  Or is this something I should look at?  It's not
documented how many padding there is.

--
Marco


Index: libavcodec/allcodecs.c
===================================================================

--- libavcodec/allcodecs.c	(revision 8605)
+++ libavcodec/allcodecs.c	(working copy)
@@ -242,6 +242,7 @@
     REGISTER_ENCDEC (ADPCM_SBPRO_3, adpcm_sbpro_3);
     REGISTER_ENCDEC (ADPCM_SBPRO_4, adpcm_sbpro_4);
     REGISTER_ENCDEC (ADPCM_SWF, adpcm_swf);
+    REGISTER_ENCDEC (ADPCM_THP, adpcm_thp);
     REGISTER_ENCDEC (ADPCM_XA, adpcm_xa);
     REGISTER_ENCDEC (ADPCM_YAMAHA, adpcm_yamaha);
 
Index: libavcodec/Makefile
===================================================================
--- libavcodec/Makefile	(revision 8605)
+++ libavcodec/Makefile	(working copy)
@@ -246,6 +246,8 @@
 OBJS-$(CONFIG_ADPCM_SBPRO_4_ENCODER)   += adpcm.o
 OBJS-$(CONFIG_ADPCM_SWF_DECODER)       += adpcm.o
 OBJS-$(CONFIG_ADPCM_SWF_ENCODER)       += adpcm.o
+OBJS-$(CONFIG_ADPCM_THP_DECODER)       += adpcm.o
+OBJS-$(CONFIG_ADPCM_THP_ENCODER)       += adpcm.o
 OBJS-$(CONFIG_ADPCM_XA_DECODER)        += adpcm.o
 OBJS-$(CONFIG_ADPCM_XA_ENCODER)        += adpcm.o
 OBJS-$(CONFIG_ADPCM_YAMAHA_DECODER)    += adpcm.o
Index: libavcodec/avcodec.h
===================================================================
--- libavcodec/avcodec.h	(revision 8605)
+++ libavcodec/avcodec.h	(working copy)
@@ -198,6 +198,7 @@
     CODEC_ID_ADPCM_SBPRO_4,
     CODEC_ID_ADPCM_SBPRO_3,
     CODEC_ID_ADPCM_SBPRO_2,
+    CODEC_ID_ADPCM_THP,
 
     /* AMR */
     CODEC_ID_AMR_NB= 0x12000,
@@ -2405,6 +2406,7 @@
 PCM_CODEC(CODEC_ID_ADPCM_SBPRO_4, adpcm_sbpro_4);
 PCM_CODEC(CODEC_ID_ADPCM_SMJPEG,  adpcm_ima_smjpeg);
 PCM_CODEC(CODEC_ID_ADPCM_SWF,     adpcm_swf);
+PCM_CODEC(CODEC_ID_ADPCM_THP,     adpcm_thp);
 PCM_CODEC(CODEC_ID_ADPCM_XA,      adpcm_xa);
 PCM_CODEC(CODEC_ID_ADPCM_YAMAHA,  adpcm_yamaha);
 
Index: libavcodec/adpcm.c
===================================================================
--- libavcodec/adpcm.c	(revision 8605)
+++ libavcodec/adpcm.c	(working copy)
@@ -1308,6 +1308,62 @@
             src++;
         }
         break;
+    case CODEC_ID_ADPCM_THP:
+      {
+        GetBitContext gb;
+        long table[16][2];
+        int samplecnt;
+        int prev1[2], prev2[2];
+        int ch;
+
+        if (buf_size < 80) {
+            av_log(avctx, AV_LOG_ERROR, "frame too small\n");
+            return -1;
+        }
+
+        init_get_bits(&gb, src, buf_size * 8);
+        src += buf_size;
+
+                    get_bits(&gb, 32); /* Channel size */
+        samplecnt = get_bits(&gb, 32);
+
+        for (ch = 0; ch < 2; ch++)
+            for (i = 0; i < 16; i++)
+                table[i][ch] = get_sbits(&gb, 16);
+
+        /* Initialize the previous sample.  */
+        for (ch = 0; ch < 2; ch++) {
+            prev1[ch] = get_sbits(&gb, 16);
+            prev2[ch] = get_sbits(&gb, 16);
+        }
+
+        if (samples + samplecnt >= samples_end) {
+            av_log(avctx, AV_LOG_ERROR, "allocated output buffer is too small\n");
+            return -1;
+        }
+
+        for (ch = 0; ch <= st; ch++) {
+            /* Read in every sample for this channel.  */
+            for (i = 0; i < samplecnt / 14; i++) {
+                uint8_t index = get_bits (&gb, 4) & 7;
+                int exp = get_bits (&gb, 4);
+                long factor1 = table[index * 2][ch];
+                long factor2 = table[index * 2 + 1][ch];
+
+                /* Decode 14 samples.  */
+                for (n = 0; n < 14; n++) {
+                    int sampledat = get_sbits (&gb, 4);
+                    *samples = ((prev1[ch]*factor1 
+                                + prev2[ch]*factor2) >> 11) + (sampledat << exp);
+                    prev2[ch] = prev1[ch];
+                    prev1[ch] = *samples++;
+                }
+            }
+        }
+
+        break;
+      }
+
     default:
         return -1;
     }
@@ -1368,5 +1424,6 @@
 ADPCM_CODEC(CODEC_ID_ADPCM_SBPRO_4, adpcm_sbpro_4);
 ADPCM_CODEC(CODEC_ID_ADPCM_SBPRO_3, adpcm_sbpro_3);
 ADPCM_CODEC(CODEC_ID_ADPCM_SBPRO_2, adpcm_sbpro_2);
+ADPCM_CODEC(CODEC_ID_ADPCM_THP, adpcm_thp);
 
 #undef ADPCM_CODEC
Index: doc/ffmpeg-doc.texi
===================================================================
--- doc/ffmpeg-doc.texi	(revision 8605)
+++ doc/ffmpeg-doc.texi	(working copy)
@@ -902,7 +902,7 @@
 @tab This format is used in non-Windows version of Feeble Files game and
 different game cutscenes repacked for use with ScummVM.
 @item THP @tab    @tab X
- at tab Used on the Nintendo GameCube (video only)
+ at tab Used on the Nintendo GameCube
 @end multitable
 
 @code{X} means that encoding (resp. decoding) is supported.
Index: libavformat/thp.c
===================================================================
--- libavformat/thp.c	(revision 8605)
+++ libavformat/thp.c	(working copy)
@@ -35,10 +35,12 @@
     int              next_frame;
     int              next_framesz;
     int              video_stream_index;
+    int              audio_stream_index;
     int              compcount;
     unsigned char    components[16];
     AVStream*        vst;
     int              has_audio;
+    int              audiosize;
 } ThpDemuxContext;
 
 
@@ -116,7 +118,26 @@
              get_be32(pb); /* Unknown.  */
         }
       else if (thp->components[i] == 1) {
-          /* XXX: Required for audio playback.  */
+          if (thp->has_audio != 0)
+             break;
+
+          /* Audio component.  */
+          st = av_new_stream(s, 0);
+          if (!st)
+              return AVERROR_NOMEM;
+
+          st->codec->codec_type = CODEC_TYPE_AUDIO;
+          st->codec->codec_id = CODEC_ID_ADPCM_THP;
+          st->codec->codec_tag = 0;  /* no fourcc */
+          st->codec->channels    = get_be32(pb); /* numChannels.  */
+          st->codec->sample_rate = get_be32(pb); /* Frequency.  */
+
+          /* XXX: For now, force to mono sound.  */
+          st->codec->channels = 1;
+
+          av_set_pts_info(st, 64, 1, st->codec->sample_rate);
+
+          thp->audio_stream_index = st->index;
           thp->has_audio = 1;
       }
     }
@@ -132,6 +153,8 @@
     int size;
     int ret;
 
+    if (thp->audiosize == 0) {
+
     /* Terminate when last frame is reached.  */
     if (thp->frame >= thp->framecnt)
        return AVERROR_IO;
@@ -145,8 +168,12 @@
                         get_be32(pb); /* Previous total size.  */
     size              = get_be32(pb); /* Total size of this frame.  */
 
+    /* Store the audiosize so the next time this function is called,
+       the audio can be read.  */
     if (thp->has_audio)
-                        get_be32(pb); /* Audio size.  */
+       thp->audiosize = get_be32(pb); /* Audio size.  */
+    else
+       thp->frame++;
 
     ret = av_get_packet(pb, pkt, size);
     if (ret != size) {
@@ -155,8 +182,19 @@
     }
 
     pkt->stream_index = thp->video_stream_index;
-    thp->frame++;
+    }
+    else {
+       ret = av_get_packet(pb, pkt, thp->audiosize);
+       if (ret != thp->audiosize) {
+          av_free_packet(pkt);
+          return AVERROR_IO;
+       }
 
+      pkt->stream_index = thp->audio_stream_index;
+      thp->audiosize = 0;
+      thp->frame++;
+    }
+
     return 0;
 }