FFmpeg
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
svq1enc.c
Go to the documentation of this file.
1 /*
2  * SVQ1 Encoder
3  * Copyright (C) 2004 Mike Melanson <melanson@pcisys.net>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * Sorenson Vector Quantizer #1 (SVQ1) video codec.
25  * For more information of the SVQ1 algorithm, visit:
26  * http://www.pcisys.net/~melanson/codecs/
27  */
28 
29 #include "avcodec.h"
30 #include "dsputil.h"
31 #include "mpegvideo.h"
32 #include "h263.h"
33 #include "internal.h"
34 #include "libavutil/avassert.h"
35 #include "svq1.h"
36 #include "svq1enc_cb.h"
37 
38 
39 typedef struct SVQ1Context {
40  /* FIXME: Needed for motion estimation, should not be used for anything
41  * else, the idea is to make the motion estimation eventually independent
42  * of MpegEncContext, so this will be removed then. */
51 
52  /* why ooh why this sick breadth first order,
53  * everything is slower and more complex */
55 
58 
59  /* Y plane block dimensions */
62 
63  /* U & V plane (C planes) block dimensions */
66 
67  uint16_t *mb_type;
68  uint32_t *dummy;
69  int16_t (*motion_val8[3])[2];
70  int16_t (*motion_val16[3])[2];
71 
72  int64_t rd_total;
73 
75 } SVQ1Context;
76 
77 static void svq1_write_header(SVQ1Context *s, int frame_type)
78 {
79  int i;
80 
81  /* frame code */
82  put_bits(&s->pb, 22, 0x20);
83 
84  /* temporal reference (sure hope this is a "don't care") */
85  put_bits(&s->pb, 8, 0x00);
86 
87  /* frame type */
88  put_bits(&s->pb, 2, frame_type - 1);
89 
90  if (frame_type == AV_PICTURE_TYPE_I) {
91  /* no checksum since frame code is 0x20 */
92  /* no embedded string either */
93  /* output 5 unknown bits (2 + 2 + 1) */
94  put_bits(&s->pb, 5, 2); /* 2 needed by quicktime decoder */
95 
98  s->frame_width, s->frame_height);
99  put_bits(&s->pb, 3, i);
100 
101  if (i == 7) {
102  put_bits(&s->pb, 12, s->frame_width);
103  put_bits(&s->pb, 12, s->frame_height);
104  }
105  }
106 
107  /* no checksum or extra data (next 2 bits get 0) */
108  put_bits(&s->pb, 2, 0);
109 }
110 
111 #define QUALITY_THRESHOLD 100
112 #define THRESHOLD_MULTIPLIER 0.6
113 
114 static int encode_block(SVQ1Context *s, uint8_t *src, uint8_t *ref,
115  uint8_t *decoded, int stride, int level,
116  int threshold, int lambda, int intra)
117 {
118  int count, y, x, i, j, split, best_mean, best_score, best_count;
119  int best_vector[6];
120  int block_sum[7] = { 0, 0, 0, 0, 0, 0 };
121  int w = 2 << (level + 2 >> 1);
122  int h = 2 << (level + 1 >> 1);
123  int size = w * h;
124  int16_t block[7][256];
125  const int8_t *codebook_sum, *codebook;
126  const uint16_t(*mean_vlc)[2];
127  const uint8_t(*multistage_vlc)[2];
128 
129  best_score = 0;
130  // FIXME: Optimize, this does not need to be done multiple times.
131  if (intra) {
132  codebook_sum = svq1_intra_codebook_sum[level];
133  codebook = ff_svq1_intra_codebooks[level];
134  mean_vlc = ff_svq1_intra_mean_vlc;
135  multistage_vlc = ff_svq1_intra_multistage_vlc[level];
136  for (y = 0; y < h; y++) {
137  for (x = 0; x < w; x++) {
138  int v = src[x + y * stride];
139  block[0][x + w * y] = v;
140  best_score += v * v;
141  block_sum[0] += v;
142  }
143  }
144  } else {
145  codebook_sum = svq1_inter_codebook_sum[level];
146  codebook = ff_svq1_inter_codebooks[level];
147  mean_vlc = ff_svq1_inter_mean_vlc + 256;
148  multistage_vlc = ff_svq1_inter_multistage_vlc[level];
149  for (y = 0; y < h; y++) {
150  for (x = 0; x < w; x++) {
151  int v = src[x + y * stride] - ref[x + y * stride];
152  block[0][x + w * y] = v;
153  best_score += v * v;
154  block_sum[0] += v;
155  }
156  }
157  }
158 
159  best_count = 0;
160  best_score -= (int)((unsigned)block_sum[0] * block_sum[0] >> (level + 3));
161  best_mean = block_sum[0] + (size >> 1) >> (level + 3);
162 
163  if (level < 4) {
164  for (count = 1; count < 7; count++) {
165  int best_vector_score = INT_MAX;
166  int best_vector_sum = -999, best_vector_mean = -999;
167  const int stage = count - 1;
168  const int8_t *vector;
169 
170  for (i = 0; i < 16; i++) {
171  int sum = codebook_sum[stage * 16 + i];
172  int sqr, diff, score;
173 
174  vector = codebook + stage * size * 16 + i * size;
175  sqr = s->dsp.ssd_int8_vs_int16(vector, block[stage], size);
176  diff = block_sum[stage] - sum;
177  score = sqr - (diff * (int64_t)diff >> (level + 3)); // FIXME: 64bit slooow
178  if (score < best_vector_score) {
179  int mean = diff + (size >> 1) >> (level + 3);
180  av_assert2(mean > -300 && mean < 300);
181  mean = av_clip(mean, intra ? 0 : -256, 255);
182  best_vector_score = score;
183  best_vector[stage] = i;
184  best_vector_sum = sum;
185  best_vector_mean = mean;
186  }
187  }
188  av_assert0(best_vector_mean != -999);
189  vector = codebook + stage * size * 16 + best_vector[stage] * size;
190  for (j = 0; j < size; j++)
191  block[stage + 1][j] = block[stage][j] - vector[j];
192  block_sum[stage + 1] = block_sum[stage] - best_vector_sum;
193  best_vector_score += lambda *
194  (+1 + 4 * count +
195  multistage_vlc[1 + count][1]
196  + mean_vlc[best_vector_mean][1]);
197 
198  if (best_vector_score < best_score) {
199  best_score = best_vector_score;
200  best_count = count;
201  best_mean = best_vector_mean;
202  }
203  }
204  }
205 
206  split = 0;
207  if (best_score > threshold && level) {
208  int score = 0;
209  int offset = level & 1 ? stride * h / 2 : w / 2;
210  PutBitContext backup[6];
211 
212  for (i = level - 1; i >= 0; i--)
213  backup[i] = s->reorder_pb[i];
214  score += encode_block(s, src, ref, decoded, stride, level - 1,
215  threshold >> 1, lambda, intra);
216  score += encode_block(s, src + offset, ref + offset, decoded + offset,
217  stride, level - 1, threshold >> 1, lambda, intra);
218  score += lambda;
219 
220  if (score < best_score) {
221  best_score = score;
222  split = 1;
223  } else {
224  for (i = level - 1; i >= 0; i--)
225  s->reorder_pb[i] = backup[i];
226  }
227  }
228  if (level > 0)
229  put_bits(&s->reorder_pb[level], 1, split);
230 
231  if (!split) {
232  av_assert1(best_mean >= 0 && best_mean < 256 || !intra);
233  av_assert1(best_mean >= -256 && best_mean < 256);
234  av_assert1(best_count >= 0 && best_count < 7);
235  av_assert1(level < 4 || best_count == 0);
236 
237  /* output the encoding */
238  put_bits(&s->reorder_pb[level],
239  multistage_vlc[1 + best_count][1],
240  multistage_vlc[1 + best_count][0]);
241  put_bits(&s->reorder_pb[level], mean_vlc[best_mean][1],
242  mean_vlc[best_mean][0]);
243 
244  for (i = 0; i < best_count; i++) {
245  av_assert2(best_vector[i] >= 0 && best_vector[i] < 16);
246  put_bits(&s->reorder_pb[level], 4, best_vector[i]);
247  }
248 
249  for (y = 0; y < h; y++)
250  for (x = 0; x < w; x++)
251  decoded[x + y * stride] = src[x + y * stride] -
252  block[best_count][x + w * y] +
253  best_mean;
254  }
255 
256  return best_score;
257 }
258 
259 static int svq1_encode_plane(SVQ1Context *s, int plane,
260  unsigned char *src_plane,
261  unsigned char *ref_plane,
262  unsigned char *decoded_plane,
263  int width, int height, int src_stride, int stride)
264 {
265  int x, y;
266  int i;
267  int block_width, block_height;
268  int level;
269  int threshold[6];
270  uint8_t *src = s->scratchbuf + stride * 16;
271  const int lambda = (s->picture.quality * s->picture.quality) >>
272  (2 * FF_LAMBDA_SHIFT);
273 
274  /* figure out the acceptable level thresholds in advance */
275  threshold[5] = QUALITY_THRESHOLD;
276  for (level = 4; level >= 0; level--)
277  threshold[level] = threshold[level + 1] * THRESHOLD_MULTIPLIER;
278 
279  block_width = (width + 15) / 16;
280  block_height = (height + 15) / 16;
281 
282  if (s->picture.pict_type == AV_PICTURE_TYPE_P) {
283  s->m.avctx = s->avctx;
285  s->m.last_picture_ptr = &s->m.last_picture;
286  s->m.last_picture.f.data[0] = ref_plane;
287  s->m.linesize =
288  s->m.last_picture.f.linesize[0] =
289  s->m.new_picture.f.linesize[0] =
291  s->m.width = width;
292  s->m.height = height;
293  s->m.mb_width = block_width;
294  s->m.mb_height = block_height;
295  s->m.mb_stride = s->m.mb_width + 1;
296  s->m.b8_stride = 2 * s->m.mb_width + 1;
297  s->m.f_code = 1;
298  s->m.pict_type = s->picture.pict_type;
299  s->m.me_method = s->avctx->me_method;
300  s->m.me.scene_change_score = 0;
301  s->m.flags = s->avctx->flags;
302  // s->m.out_format = FMT_H263;
303  // s->m.unrestricted_mv = 1;
304  s->m.lambda = s->picture.quality;
305  s->m.qscale = s->m.lambda * 139 +
306  FF_LAMBDA_SCALE * 64 >>
307  FF_LAMBDA_SHIFT + 7;
308  s->m.lambda2 = s->m.lambda * s->m.lambda +
309  FF_LAMBDA_SCALE / 2 >>
311 
312  if (!s->motion_val8[plane]) {
313  s->motion_val8[plane] = av_mallocz((s->m.b8_stride *
314  block_height * 2 + 2) *
315  2 * sizeof(int16_t));
316  s->motion_val16[plane] = av_mallocz((s->m.mb_stride *
317  (block_height + 2) + 1) *
318  2 * sizeof(int16_t));
319  }
320 
321  s->m.mb_type = s->mb_type;
322 
323  // dummies, to avoid segfaults
325  s->m.current_picture.mb_var = (uint16_t *)s->dummy;
326  s->m.current_picture.mc_mb_var = (uint16_t *)s->dummy;
327  s->m.current_picture.f.mb_type = s->dummy;
328 
329  s->m.current_picture.f.motion_val[0] = s->motion_val8[plane] + 2;
330  s->m.p_mv_table = s->motion_val16[plane] +
331  s->m.mb_stride + 1;
332  s->m.dsp = s->dsp; // move
333  ff_init_me(&s->m);
334 
335  s->m.me.dia_size = s->avctx->dia_size;
336  s->m.first_slice_line = 1;
337  for (y = 0; y < block_height; y++) {
338  s->m.new_picture.f.data[0] = src - y * 16 * stride; // ugly
339  s->m.mb_y = y;
340 
341  for (i = 0; i < 16 && i + 16 * y < height; i++) {
342  memcpy(&src[i * stride], &src_plane[(i + 16 * y) * src_stride],
343  width);
344  for (x = width; x < 16 * block_width; x++)
345  src[i * stride + x] = src[i * stride + x - 1];
346  }
347  for (; i < 16 && i + 16 * y < 16 * block_height; i++)
348  memcpy(&src[i * stride], &src[(i - 1) * stride],
349  16 * block_width);
350 
351  for (x = 0; x < block_width; x++) {
352  s->m.mb_x = x;
353  ff_init_block_index(&s->m);
355 
356  ff_estimate_p_frame_motion(&s->m, x, y);
357  }
358  s->m.first_slice_line = 0;
359  }
360 
361  ff_fix_long_p_mvs(&s->m);
362  ff_fix_long_mvs(&s->m, NULL, 0, s->m.p_mv_table, s->m.f_code,
364  }
365 
366  s->m.first_slice_line = 1;
367  for (y = 0; y < block_height; y++) {
368  for (i = 0; i < 16 && i + 16 * y < height; i++) {
369  memcpy(&src[i * stride], &src_plane[(i + 16 * y) * src_stride],
370  width);
371  for (x = width; x < 16 * block_width; x++)
372  src[i * stride + x] = src[i * stride + x - 1];
373  }
374  for (; i < 16 && i + 16 * y < 16 * block_height; i++)
375  memcpy(&src[i * stride], &src[(i - 1) * stride], 16 * block_width);
376 
377  s->m.mb_y = y;
378  for (x = 0; x < block_width; x++) {
379  uint8_t reorder_buffer[3][6][7 * 32];
380  int count[3][6];
381  int offset = y * 16 * stride + x * 16;
382  uint8_t *decoded = decoded_plane + offset;
383  uint8_t *ref = ref_plane + offset;
384  int score[4] = { 0, 0, 0, 0 }, best;
385  uint8_t *temp = s->scratchbuf;
386 
387  if (s->pb.buf_end - s->pb.buf -
388  (put_bits_count(&s->pb) >> 3) < 3000) { // FIXME: check size
389  av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
390  return -1;
391  }
392 
393  s->m.mb_x = x;
394  ff_init_block_index(&s->m);
396 
397  if (s->picture.pict_type == AV_PICTURE_TYPE_I ||
398  (s->m.mb_type[x + y * s->m.mb_stride] &
400  for (i = 0; i < 6; i++)
401  init_put_bits(&s->reorder_pb[i], reorder_buffer[0][i],
402  7 * 32);
403  if (s->picture.pict_type == AV_PICTURE_TYPE_P) {
405  put_bits(&s->reorder_pb[5], vlc[1], vlc[0]);
406  score[0] = vlc[1] * lambda;
407  }
408  score[0] += encode_block(s, src + 16 * x, NULL, temp, stride,
409  5, 64, lambda, 1);
410  for (i = 0; i < 6; i++) {
411  count[0][i] = put_bits_count(&s->reorder_pb[i]);
412  flush_put_bits(&s->reorder_pb[i]);
413  }
414  } else
415  score[0] = INT_MAX;
416 
417  best = 0;
418 
419  if (s->picture.pict_type == AV_PICTURE_TYPE_P) {
421  int mx, my, pred_x, pred_y, dxy;
422  int16_t *motion_ptr;
423 
424  motion_ptr = ff_h263_pred_motion(&s->m, 0, 0, &pred_x, &pred_y);
425  if (s->m.mb_type[x + y * s->m.mb_stride] &
427  for (i = 0; i < 6; i++)
428  init_put_bits(&s->reorder_pb[i], reorder_buffer[1][i],
429  7 * 32);
430 
431  put_bits(&s->reorder_pb[5], vlc[1], vlc[0]);
432 
433  s->m.pb = s->reorder_pb[5];
434  mx = motion_ptr[0];
435  my = motion_ptr[1];
436  av_assert1(mx >= -32 && mx <= 31);
437  av_assert1(my >= -32 && my <= 31);
438  av_assert1(pred_x >= -32 && pred_x <= 31);
439  av_assert1(pred_y >= -32 && pred_y <= 31);
440  ff_h263_encode_motion(&s->m, mx - pred_x, 1);
441  ff_h263_encode_motion(&s->m, my - pred_y, 1);
442  s->reorder_pb[5] = s->m.pb;
443  score[1] += lambda * put_bits_count(&s->reorder_pb[5]);
444 
445  dxy = (mx & 1) + 2 * (my & 1);
446 
447  s->dsp.put_pixels_tab[0][dxy](temp + 16,
448  ref + (mx >> 1) +
449  stride * (my >> 1),
450  stride, 16);
451 
452  score[1] += encode_block(s, src + 16 * x, temp + 16,
453  decoded, stride, 5, 64, lambda, 0);
454  best = score[1] <= score[0];
455 
457  score[2] = s->dsp.sse[0](NULL, src + 16 * x, ref,
458  stride, 16);
459  score[2] += vlc[1] * lambda;
460  if (score[2] < score[best] && mx == 0 && my == 0) {
461  best = 2;
462  s->dsp.put_pixels_tab[0][0](decoded, ref, stride, 16);
463  for (i = 0; i < 6; i++)
464  count[2][i] = 0;
465  put_bits(&s->pb, vlc[1], vlc[0]);
466  }
467  }
468 
469  if (best == 1) {
470  for (i = 0; i < 6; i++) {
471  count[1][i] = put_bits_count(&s->reorder_pb[i]);
472  flush_put_bits(&s->reorder_pb[i]);
473  }
474  } else {
475  motion_ptr[0] =
476  motion_ptr[1] =
477  motion_ptr[2] =
478  motion_ptr[3] =
479  motion_ptr[0 + 2 * s->m.b8_stride] =
480  motion_ptr[1 + 2 * s->m.b8_stride] =
481  motion_ptr[2 + 2 * s->m.b8_stride] =
482  motion_ptr[3 + 2 * s->m.b8_stride] = 0;
483  }
484  }
485 
486  s->rd_total += score[best];
487 
488  for (i = 5; i >= 0; i--)
489  avpriv_copy_bits(&s->pb, reorder_buffer[best][i],
490  count[best][i]);
491  if (best == 0)
492  s->dsp.put_pixels_tab[0][0](decoded, temp, stride, 16);
493  }
494  s->m.first_slice_line = 0;
495  }
496  return 0;
497 }
498 
500 {
501  SVQ1Context *const s = avctx->priv_data;
502 
503  ff_dsputil_init(&s->dsp, avctx);
504  avctx->coded_frame = &s->picture;
505 
506  s->frame_width = avctx->width;
507  s->frame_height = avctx->height;
508 
509  s->y_block_width = (s->frame_width + 15) / 16;
510  s->y_block_height = (s->frame_height + 15) / 16;
511 
512  s->c_block_width = (s->frame_width / 4 + 15) / 16;
513  s->c_block_height = (s->frame_height / 4 + 15) / 16;
514 
515  s->avctx = avctx;
516  s->m.avctx = avctx;
518  s->m.me.temp =
519  s->m.me.scratchpad = av_mallocz((avctx->width + 64) *
520  2 * 16 * 2 * sizeof(uint8_t));
521  s->m.me.map = av_mallocz(ME_MAP_SIZE * sizeof(uint32_t));
522  s->m.me.score_map = av_mallocz(ME_MAP_SIZE * sizeof(uint32_t));
523  s->mb_type = av_mallocz((s->y_block_width + 1) *
524  s->y_block_height * sizeof(int16_t));
525  s->dummy = av_mallocz((s->y_block_width + 1) *
526  s->y_block_height * sizeof(int32_t));
527  ff_h263_encode_init(&s->m); // mv_penalty
528 
529  return 0;
530 }
531 
533  const AVFrame *pict, int *got_packet)
534 {
535  SVQ1Context *const s = avctx->priv_data;
536  AVFrame *const p = &s->picture;
537  AVFrame temp;
538  int i, ret;
539 
540  if ((ret = ff_alloc_packet2(avctx, pkt, s->y_block_width * s->y_block_height *
542  return ret;
543 
544  if (avctx->pix_fmt != AV_PIX_FMT_YUV410P) {
545  av_log(avctx, AV_LOG_ERROR, "unsupported pixel format\n");
546  return -1;
547  }
548 
549  if (!s->current_picture.data[0]) {
550  if ((ret = ff_get_buffer(avctx, &s->current_picture))< 0 ||
551  (ret = ff_get_buffer(avctx, &s->last_picture)) < 0) {
552  return ret;
553  }
554  s->scratchbuf = av_malloc(s->current_picture.linesize[0] * 16 * 2);
555  }
556 
557  temp = s->current_picture;
559  s->last_picture = temp;
560 
561  init_put_bits(&s->pb, pkt->data, pkt->size);
562 
563  *p = *pict;
564  p->pict_type = avctx->gop_size && avctx->frame_number % avctx->gop_size ?
567 
569  for (i = 0; i < 3; i++)
570  if (svq1_encode_plane(s, i,
571  s->picture.data[i],
572  s->last_picture.data[i],
573  s->current_picture.data[i],
574  s->frame_width / (i ? 4 : 1),
575  s->frame_height / (i ? 4 : 1),
576  s->picture.linesize[i],
577  s->current_picture.linesize[i]) < 0)
578  return -1;
579 
580  // avpriv_align_put_bits(&s->pb);
581  while (put_bits_count(&s->pb) & 31)
582  put_bits(&s->pb, 1, 0);
583 
584  flush_put_bits(&s->pb);
585 
586  pkt->size = put_bits_count(&s->pb) / 8;
587  if (p->pict_type == AV_PICTURE_TYPE_I)
588  pkt->flags |= AV_PKT_FLAG_KEY;
589  *got_packet = 1;
590 
591  return 0;
592 }
593 
595 {
596  SVQ1Context *const s = avctx->priv_data;
597  int i;
598 
599  av_log(avctx, AV_LOG_DEBUG, "RD: %f\n",
600  s->rd_total / (double)(avctx->width * avctx->height *
601  avctx->frame_number));
602 
603  av_freep(&s->m.me.scratchpad);
604  av_freep(&s->m.me.map);
605  av_freep(&s->m.me.score_map);
606  av_freep(&s->mb_type);
607  av_freep(&s->dummy);
608  av_freep(&s->scratchbuf);
609 
610  for (i = 0; i < 3; i++) {
611  av_freep(&s->motion_val8[i]);
612  av_freep(&s->motion_val16[i]);
613  }
614  if(s->current_picture.data[0])
615  avctx->release_buffer(avctx, &s->current_picture);
616  if(s->last_picture.data[0])
617  avctx->release_buffer(avctx, &s->last_picture);
618 
619  return 0;
620 }
621 
623  .name = "svq1",
624  .type = AVMEDIA_TYPE_VIDEO,
625  .id = AV_CODEC_ID_SVQ1,
626  .priv_data_size = sizeof(SVQ1Context),
628  .encode2 = svq1_encode_frame,
630  .pix_fmts = (const enum PixelFormat[]) { AV_PIX_FMT_YUV410P,
631  AV_PIX_FMT_NONE },
632  .long_name = NULL_IF_CONFIG_SMALL("Sorenson Vector Quantizer 1 / Sorenson Video 1 / SVQ1"),
633 };