FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
cinepakenc.c
Go to the documentation of this file.
1 /*
2  * Cinepak encoder (c) 2011 Tomas Härdin
3  * http://titan.codemill.se/~tomhar/cinepakenc.patch
4  *
5  * Fixes and improvements, vintage decoders compatibility
6  * (c) 2013, 2014 Rl, Aetey Global Technologies AB
7 
8 Permission is hereby granted, free of charge, to any person obtaining a
9 copy of this software and associated documentation files (the "Software"),
10 to deal in the Software without restriction, including without limitation
11 the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 and/or sell copies of the Software, and to permit persons to whom the
13 Software is furnished to do so, subject to the following conditions:
14 
15 The above copyright notice and this permission notice shall be included
16 in all copies or substantial portions of the Software.
17 
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
22 OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
23 ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24 OTHER DEALINGS IN THE SOFTWARE.
25 
26  * TODO:
27  * - optimize: color space conversion, ...
28  * - implement options to set the min/max number of strips?
29  * MAYBE:
30  * - "optimally" split the frame into several non-regular areas
31  * using a separate codebook pair for each area and approximating
32  * the area by several rectangular strips (generally not full width ones)
33  * (use quadtree splitting? a simple fixed-granularity grid?)
34  *
35  *
36  * version 2014-01-23 Rl
37  * - added option handling for flexibility
38  *
39  * version 2014-01-21 Rl
40  * - believe it or not, now we get even smaller files, with better quality
41  * (which means I missed an optimization earlier :)
42  *
43  * version 2014-01-20 Rl
44  * - made the encoder compatible with vintage decoders
45  * and added some yet unused code for possible future
46  * incremental codebook updates
47  * - fixed a small memory leak
48  *
49  * version 2013-04-28 Rl
50  * - bugfixed codebook optimization logic
51  *
52  * version 2013-02-14 Rl
53  * "Valentine's Day" version:
54  * - made strip division more robust
55  * - minimized bruteforcing the number of strips,
56  * (costs some R/D but speeds up compession a lot), the heuristic
57  * assumption is that score as a function of the number of strips has
58  * one wide minimum which moves slowly, of course not fully true
59  * - simplified codebook generation,
60  * the old code was meant for other optimizations than we actually do
61  * - optimized the codebook generation / error estimation for MODE_MC
62  *
63  * version 2013-02-12 Rl
64  * - separated codebook training sets, avoided the transfer of wasted bytes,
65  * which yields both better quality and smaller files
66  * - now using the correct colorspace (TODO: move conversion to libswscale)
67  *
68  * version 2013-02-08 Rl
69  * - fixes/optimization in multistrip encoding and codebook size choice,
70  * quality/bitrate is now better than that of the binary proprietary encoder
71  */
72 
73 #include "libavutil/intreadwrite.h"
74 #include "avcodec.h"
75 #include "libavutil/lfg.h"
76 #include "elbg.h"
77 #include "internal.h"
78 
79 #include "libavutil/avassert.h"
80 #include "libavutil/opt.h"
81 
82 #define CVID_HEADER_SIZE 10
83 #define STRIP_HEADER_SIZE 12
84 #define CHUNK_HEADER_SIZE 4
85 
86 #define MB_SIZE 4 //4x4 MBs
87 #define MB_AREA (MB_SIZE*MB_SIZE)
88 
89 #define VECTOR_MAX 6 //six or four entries per vector depending on format
90 #define CODEBOOK_MAX 256 //size of a codebook
91 
92 #define MAX_STRIPS 32 //Note: having fewer choices regarding the number of strips speeds up encoding (obviously)
93 #define MIN_STRIPS 1 //Note: having more strips speeds up encoding the frame (this is less obvious)
94 // MAX_STRIPS limits the maximum quality you can reach
95 // when you want hight quality on high resolutions,
96 // MIN_STRIPS limits the minimum efficiently encodable bit rate
97 // on low resolutions
98 // the numbers are only used for brute force optimization for the first frame,
99 // for the following frames they are adaptively readjusted
100 // NOTE the decoder in ffmpeg has its own arbitrary limitation on the number
101 // of strips, currently 32
102 
103 typedef enum {
107 
109 } CinepakMode;
110 
111 typedef enum {
115 
117 } mb_encoding;
118 
119 typedef struct {
120  int v1_vector; //index into v1 codebook
121  int v1_error; //error when using V1 encoding
122  int v4_vector[4]; //indices into v4 codebooks
123  int v4_error; //error when using V4 encoding
124  int skip_error; //error when block is skipped (aka copied from last frame)
125  mb_encoding best_encoding; //last result from calculate_mode_score()
126 } mb_info;
127 
128 typedef struct {
129  int v1_codebook[CODEBOOK_MAX*VECTOR_MAX];
130  int v4_codebook[CODEBOOK_MAX*VECTOR_MAX];
131  int v1_size;
132  int v4_size;
134 } strip_info;
135 
136 typedef struct {
137  const AVClass *class;
139  unsigned char *pict_bufs[4], *strip_buf, *frame_buf;
145  int w, h;
147  int curframe, keyint;
149  uint64_t lambda;
152  mb_info *mb; //MB RD state
153  int min_strips; //the current limit
154  int max_strips; //the current limit
155 #ifdef CINEPAKENC_DEBUG
156  mb_info *best_mb; //TODO: remove. only used for printing stats
157  int num_v1_mode, num_v4_mode, num_mc_mode;
158  int num_v1_encs, num_v4_encs, num_skips;
159 #endif
160 // options
167 
168 #define OFFSET(x) offsetof(CinepakEncContext, x)
169 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
170 static const AVOption options[] = {
171  { "max_extra_cb_iterations", "Max extra codebook recalculation passes, more is better and slower", OFFSET(max_extra_cb_iterations), AV_OPT_TYPE_INT, { .i64 = 2 }, 0, INT_MAX, VE },
172  { "skip_empty_cb", "Avoid wasting bytes, ignore vintage MacOS decoder", OFFSET(skip_empty_cb), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
173  { "max_strips", "Limit strips/frame, vintage compatible is 1..3, otherwise the more the better", OFFSET(max_max_strips), AV_OPT_TYPE_INT, { .i64 = 3 }, MIN_STRIPS, MAX_STRIPS, VE },
174  { "min_strips", "Enforce min strips/frame, more is worse and faster, must be <= max_strips", OFFSET(min_min_strips), AV_OPT_TYPE_INT, { .i64 = MIN_STRIPS }, MIN_STRIPS, MAX_STRIPS, VE },
175  { "strip_number_adaptivity", "How fast the strip number adapts, more is slightly better, much slower", OFFSET(strip_number_delta_range), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, MAX_STRIPS-MIN_STRIPS, VE },
176  { NULL },
177 };
178 
179 static const AVClass cinepak_class = {
180  .class_name = "cinepak",
181  .item_name = av_default_item_name,
182  .option = options,
183  .version = LIBAVUTIL_VERSION_INT,
184 };
185 
187 {
188  CinepakEncContext *s = avctx->priv_data;
189  int x, mb_count, strip_buf_size, frame_buf_size;
190 
191  if (avctx->width & 3 || avctx->height & 3) {
192  av_log(avctx, AV_LOG_ERROR, "width and height must be multiples of four (got %ix%i)\n",
193  avctx->width, avctx->height);
194  return AVERROR(EINVAL);
195  }
196 
197  if (s->min_min_strips > s->max_max_strips) {
198  av_log(avctx, AV_LOG_ERROR, "minimal number of strips can not exceed maximal (got %i and %i)\n",
200  return AVERROR(EINVAL);
201  }
202 
203  if (!(s->last_frame = av_frame_alloc()))
204  return AVERROR(ENOMEM);
205  if (!(s->best_frame = av_frame_alloc()))
206  goto enomem;
207  if (!(s->scratch_frame = av_frame_alloc()))
208  goto enomem;
209  if (avctx->pix_fmt == AV_PIX_FMT_RGB24)
210  if (!(s->input_frame = av_frame_alloc()))
211  goto enomem;
212 
213  if (!(s->codebook_input = av_malloc(sizeof(int) * (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4) * (avctx->width * avctx->height) >> 2)))
214  goto enomem;
215 
216  if (!(s->codebook_closest = av_malloc(sizeof(int) * (avctx->width * avctx->height) >> 2)))
217  goto enomem;
218 
219  for(x = 0; x < (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 4 : 3); x++)
220  if(!(s->pict_bufs[x] = av_malloc((avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4) * (avctx->width * avctx->height) >> 2)))
221  goto enomem;
222 
223  mb_count = avctx->width * avctx->height / MB_AREA;
224 
225  //the largest possible chunk is 0x31 with all MBs encoded in V4 mode
226  //and full codebooks being replaced in INTER mode,
227  // which is 34 bits per MB
228  //and 2*256 extra flag bits per strip
229  strip_buf_size = STRIP_HEADER_SIZE + 3 * CHUNK_HEADER_SIZE + 2 * VECTOR_MAX * CODEBOOK_MAX + 4 * (mb_count + (mb_count + 15) / 16) + (2 * CODEBOOK_MAX)/8;
230 
231  frame_buf_size = CVID_HEADER_SIZE + s->max_max_strips * strip_buf_size;
232 
233  if (!(s->strip_buf = av_malloc(strip_buf_size)))
234  goto enomem;
235 
236  if (!(s->frame_buf = av_malloc(frame_buf_size)))
237  goto enomem;
238 
239  if (!(s->mb = av_malloc_array(mb_count, sizeof(mb_info))))
240  goto enomem;
241 
242 #ifdef CINEPAKENC_DEBUG
243  if (!(s->best_mb = av_malloc_array(mb_count, sizeof(mb_info))))
244  goto enomem;
245 #endif
246 
247  av_lfg_init(&s->randctx, 1);
248  s->avctx = avctx;
249  s->w = avctx->width;
250  s->h = avctx->height;
251  s->frame_buf_size = frame_buf_size;
252  s->curframe = 0;
253  s->keyint = avctx->keyint_min;
254  s->pix_fmt = avctx->pix_fmt;
255 
256  //set up AVFrames
257  s->last_frame->data[0] = s->pict_bufs[0];
258  s->last_frame->linesize[0] = s->w;
259  s->best_frame->data[0] = s->pict_bufs[1];
260  s->best_frame->linesize[0] = s->w;
261  s->scratch_frame->data[0] = s->pict_bufs[2];
262  s->scratch_frame->linesize[0] = s->w;
263 
264  if (s->pix_fmt == AV_PIX_FMT_RGB24) {
265  s->last_frame->data[1] = s->last_frame->data[0] + s->w * s->h;
266  s->last_frame->data[2] = s->last_frame->data[1] + ((s->w * s->h) >> 2);
267  s->last_frame->linesize[1] = s->last_frame->linesize[2] = s->w >> 1;
268 
269  s->best_frame->data[1] = s->best_frame->data[0] + s->w * s->h;
270  s->best_frame->data[2] = s->best_frame->data[1] + ((s->w * s->h) >> 2);
271  s->best_frame->linesize[1] = s->best_frame->linesize[2] = s->w >> 1;
272 
273  s->scratch_frame->data[1] = s->scratch_frame->data[0] + s->w * s->h;
274  s->scratch_frame->data[2] = s->scratch_frame->data[1] + ((s->w * s->h) >> 2);
275  s->scratch_frame->linesize[1] = s->scratch_frame->linesize[2] = s->w >> 1;
276 
277  s->input_frame->data[0] = s->pict_bufs[3];
278  s->input_frame->linesize[0] = s->w;
279  s->input_frame->data[1] = s->input_frame->data[0] + s->w * s->h;
280  s->input_frame->data[2] = s->input_frame->data[1] + ((s->w * s->h) >> 2);
281  s->input_frame->linesize[1] = s->input_frame->linesize[2] = s->w >> 1;
282  }
283 
284  s->min_strips = s->min_min_strips;
285  s->max_strips = s->max_max_strips;
286 
287 #ifdef CINEPAKENC_DEBUG
288  s->num_v1_mode = s->num_v4_mode = s->num_mc_mode = s->num_v1_encs = s->num_v4_encs = s->num_skips = 0;
289 #endif
290 
291  return 0;
292 
293 enomem:
297  if (avctx->pix_fmt == AV_PIX_FMT_RGB24)
301  av_freep(&s->strip_buf);
302  av_freep(&s->frame_buf);
303  av_freep(&s->mb);
304 #ifdef CINEPAKENC_DEBUG
305  av_freep(&s->best_mb);
306 #endif
307 
308  for(x = 0; x < (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 4 : 3); x++)
309  av_freep(&s->pict_bufs[x]);
310 
311  return AVERROR(ENOMEM);
312 }
313 
314 static int64_t calculate_mode_score(CinepakEncContext *s, int h, strip_info *info, int report, int *training_set_v1_shrunk, int *training_set_v4_shrunk
315 #ifdef CINEPAK_REPORT_SERR
316 , int64_t *serr
317 #endif
318 )
319 {
320  //score = FF_LAMBDA_SCALE * error + lambda * bits
321  int x;
322  int entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
323  int mb_count = s->w * h / MB_AREA;
324  mb_info *mb;
325  int64_t score1, score2, score3;
326  int64_t ret = s->lambda * ((info->v1_size ? CHUNK_HEADER_SIZE + info->v1_size * entry_size : 0) +
327  (info->v4_size ? CHUNK_HEADER_SIZE + info->v4_size * entry_size : 0) +
328  CHUNK_HEADER_SIZE) << 3;
329 
330  //av_log(s->avctx, AV_LOG_INFO, "sizes %3i %3i -> %9lli score mb_count %i", info->v1_size, info->v4_size, (long long int)ret, mb_count);
331 
332 #ifdef CINEPAK_REPORT_SERR
333  *serr = 0;
334 #endif
335 
336  switch(info->mode) {
337  case MODE_V1_ONLY:
338  //one byte per MB
339  ret += s->lambda * 8 * mb_count;
340 
341 // while calculating we assume all blocks are ENC_V1
342  for(x = 0; x < mb_count; x++) {
343  mb = &s->mb[x];
344  ret += FF_LAMBDA_SCALE * mb->v1_error;
345 #ifdef CINEPAK_REPORT_SERR
346  *serr += mb->v1_error;
347 #endif
348 // this function is never called for report in MODE_V1_ONLY
349 // if(!report)
350  mb->best_encoding = ENC_V1;
351  }
352 
353  break;
354  case MODE_V1_V4:
355  //9 or 33 bits per MB
356  if(report) {
357 // no moves between the corresponding training sets are allowed
358  *training_set_v1_shrunk = *training_set_v4_shrunk = 0;
359  for(x = 0; x < mb_count; x++) {
360  int mberr;
361  mb = &s->mb[x];
362  if(mb->best_encoding == ENC_V1)
363  score1 = s->lambda * 9 + FF_LAMBDA_SCALE * (mberr=mb->v1_error);
364  else
365  score1 = s->lambda * 33 + FF_LAMBDA_SCALE * (mberr=mb->v4_error);
366  ret += score1;
367 #ifdef CINEPAK_REPORT_SERR
368  *serr += mberr;
369 #endif
370  }
371  } else { // find best mode per block
372  for(x = 0; x < mb_count; x++) {
373  mb = &s->mb[x];
374  score1 = s->lambda * 9 + FF_LAMBDA_SCALE * mb->v1_error;
375  score2 = s->lambda * 33 + FF_LAMBDA_SCALE * mb->v4_error;
376 
377  if(score1 <= score2) {
378  ret += score1;
379 #ifdef CINEPAK_REPORT_SERR
380  *serr += mb->v1_error;
381 #endif
382  mb->best_encoding = ENC_V1;
383  } else {
384  ret += score2;
385 #ifdef CINEPAK_REPORT_SERR
386  *serr += mb->v4_error;
387 #endif
388  mb->best_encoding = ENC_V4;
389  }
390  }
391  }
392 
393  break;
394  case MODE_MC:
395  //1, 10 or 34 bits per MB
396  if(report) {
397  int v1_shrunk = 0, v4_shrunk = 0;
398  for(x = 0; x < mb_count; x++) {
399  mb = &s->mb[x];
400 // it is OK to move blocks to ENC_SKIP here
401 // but not to any codebook encoding!
402  score1 = s->lambda * 1 + FF_LAMBDA_SCALE * mb->skip_error;
403  if(mb->best_encoding == ENC_SKIP) {
404  ret += score1;
405 #ifdef CINEPAK_REPORT_SERR
406  *serr += mb->skip_error;
407 #endif
408  } else if(mb->best_encoding == ENC_V1) {
409  if((score2=s->lambda * 10 + FF_LAMBDA_SCALE * mb->v1_error) >= score1) {
410  mb->best_encoding = ENC_SKIP;
411  ++v1_shrunk;
412  ret += score1;
413 #ifdef CINEPAK_REPORT_SERR
414  *serr += mb->skip_error;
415 #endif
416  } else {
417  ret += score2;
418 #ifdef CINEPAK_REPORT_SERR
419  *serr += mb->v1_error;
420 #endif
421  }
422  } else {
423  if((score3=s->lambda * 34 + FF_LAMBDA_SCALE * mb->v4_error) >= score1) {
424  mb->best_encoding = ENC_SKIP;
425  ++v4_shrunk;
426  ret += score1;
427 #ifdef CINEPAK_REPORT_SERR
428  *serr += mb->skip_error;
429 #endif
430  } else {
431  ret += score3;
432 #ifdef CINEPAK_REPORT_SERR
433  *serr += mb->v4_error;
434 #endif
435  }
436  }
437  }
438  *training_set_v1_shrunk = v1_shrunk;
439  *training_set_v4_shrunk = v4_shrunk;
440  } else { // find best mode per block
441  for(x = 0; x < mb_count; x++) {
442  mb = &s->mb[x];
443  score1 = s->lambda * 1 + FF_LAMBDA_SCALE * mb->skip_error;
444  score2 = s->lambda * 10 + FF_LAMBDA_SCALE * mb->v1_error;
445  score3 = s->lambda * 34 + FF_LAMBDA_SCALE * mb->v4_error;
446 
447  if(score1 <= score2 && score1 <= score3) {
448  ret += score1;
449 #ifdef CINEPAK_REPORT_SERR
450  *serr += mb->skip_error;
451 #endif
452  mb->best_encoding = ENC_SKIP;
453  } else if(score2 <= score3) {
454  ret += score2;
455 #ifdef CINEPAK_REPORT_SERR
456  *serr += mb->v1_error;
457 #endif
458  mb->best_encoding = ENC_V1;
459  } else {
460  ret += score3;
461 #ifdef CINEPAK_REPORT_SERR
462  *serr += mb->v4_error;
463 #endif
464  mb->best_encoding = ENC_V4;
465  }
466  }
467  }
468 
469  break;
470  }
471 
472  return ret;
473 }
474 
475 static int write_chunk_header(unsigned char *buf, int chunk_type, int chunk_size)
476 {
477  buf[0] = chunk_type;
478  AV_WB24(&buf[1], chunk_size + CHUNK_HEADER_SIZE);
479  return CHUNK_HEADER_SIZE;
480 }
481 
482 static int encode_codebook(CinepakEncContext *s, int *codebook, int size, int chunk_type_yuv, int chunk_type_gray, unsigned char *buf)
483 {
484  int x, y, ret, entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
485  int incremental_codebook_replacement_mode = 0; // hardcoded here,
486  // the compiler should notice that this is a constant -- rl
487 
488  ret = write_chunk_header(buf,
489  s->pix_fmt == AV_PIX_FMT_RGB24 ?
490  chunk_type_yuv+(incremental_codebook_replacement_mode?1:0) :
491  chunk_type_gray+(incremental_codebook_replacement_mode?1:0),
492  entry_size * size
493  + (incremental_codebook_replacement_mode?(size+31)/32*4:0) );
494 
495 // we do codebook encoding according to the "intra" mode
496 // but we keep the "dead" code for reference in case we will want
497 // to use incremental codebook updates (which actually would give us
498 // "kind of" motion compensation, especially in 1 strip/frame case) -- rl
499 // (of course, the code will be not useful as-is)
500  if(incremental_codebook_replacement_mode) {
501  int flags = 0;
502  int flagsind;
503  for(x = 0; x < size; x++) {
504  if(flags == 0) {
505  flagsind = ret;
506  ret += 4;
507  flags = 0x80000000;
508  } else
509  flags = ((flags>>1) | 0x80000000);
510  for(y = 0; y < entry_size; y++)
511  buf[ret++] = codebook[y + x*entry_size] ^ (y >= 4 ? 0x80 : 0);
512  if((flags&0xffffffff) == 0xffffffff) {
513  AV_WB32(&buf[flagsind], flags);
514  flags = 0;
515  }
516  }
517  if(flags)
518  AV_WB32(&buf[flagsind], flags);
519  } else
520  for(x = 0; x < size; x++)
521  for(y = 0; y < entry_size; y++)
522  buf[ret++] = codebook[y + x*entry_size] ^ (y >= 4 ? 0x80 : 0);
523 
524  return ret;
525 }
526 
527 //sets out to the sub picture starting at (x,y) in in
529 {
530  out->data[0] = in->data[0] + x + y * in->linesize[0];
531  out->linesize[0] = in->linesize[0];
532 
533  if(s->pix_fmt == AV_PIX_FMT_RGB24) {
534  out->data[1] = in->data[1] + (x >> 1) + (y >> 1) * in->linesize[1];
535  out->linesize[1] = in->linesize[1];
536 
537  out->data[2] = in->data[2] + (x >> 1) + (y >> 1) * in->linesize[2];
538  out->linesize[2] = in->linesize[2];
539  }
540 }
541 
542 //decodes the V1 vector in mb into the 4x4 MB pointed to by sub_pict
543 static void decode_v1_vector(CinepakEncContext *s, AVPicture *sub_pict, int v1_vector, strip_info *info)
544 {
545  int entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
546 
547  sub_pict->data[0][0] =
548  sub_pict->data[0][1] =
549  sub_pict->data[0][ sub_pict->linesize[0]] =
550  sub_pict->data[0][1+ sub_pict->linesize[0]] = info->v1_codebook[v1_vector*entry_size];
551 
552  sub_pict->data[0][2] =
553  sub_pict->data[0][3] =
554  sub_pict->data[0][2+ sub_pict->linesize[0]] =
555  sub_pict->data[0][3+ sub_pict->linesize[0]] = info->v1_codebook[v1_vector*entry_size+1];
556 
557  sub_pict->data[0][2*sub_pict->linesize[0]] =
558  sub_pict->data[0][1+2*sub_pict->linesize[0]] =
559  sub_pict->data[0][ 3*sub_pict->linesize[0]] =
560  sub_pict->data[0][1+3*sub_pict->linesize[0]] = info->v1_codebook[v1_vector*entry_size+2];
561 
562  sub_pict->data[0][2+2*sub_pict->linesize[0]] =
563  sub_pict->data[0][3+2*sub_pict->linesize[0]] =
564  sub_pict->data[0][2+3*sub_pict->linesize[0]] =
565  sub_pict->data[0][3+3*sub_pict->linesize[0]] = info->v1_codebook[v1_vector*entry_size+3];
566 
567  if(s->pix_fmt == AV_PIX_FMT_RGB24) {
568  sub_pict->data[1][0] =
569  sub_pict->data[1][1] =
570  sub_pict->data[1][ sub_pict->linesize[1]] =
571  sub_pict->data[1][1+ sub_pict->linesize[1]] = info->v1_codebook[v1_vector*entry_size+4];
572 
573  sub_pict->data[2][0] =
574  sub_pict->data[2][1] =
575  sub_pict->data[2][ sub_pict->linesize[2]] =
576  sub_pict->data[2][1+ sub_pict->linesize[2]] = info->v1_codebook[v1_vector*entry_size+5];
577  }
578 }
579 
580 //decodes the V4 vectors in mb into the 4x4 MB pointed to by sub_pict
581 static void decode_v4_vector(CinepakEncContext *s, AVPicture *sub_pict, int *v4_vector, strip_info *info)
582 {
583  int i, x, y, entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
584 
585  for(i = y = 0; y < 4; y += 2) {
586  for(x = 0; x < 4; x += 2, i++) {
587  sub_pict->data[0][x + y*sub_pict->linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size];
588  sub_pict->data[0][x+1 + y*sub_pict->linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size+1];
589  sub_pict->data[0][x + (y+1)*sub_pict->linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size+2];
590  sub_pict->data[0][x+1 + (y+1)*sub_pict->linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size+3];
591 
592  if(s->pix_fmt == AV_PIX_FMT_RGB24) {
593  sub_pict->data[1][(x>>1) + (y>>1)*sub_pict->linesize[1]] = info->v4_codebook[v4_vector[i]*entry_size+4];
594  sub_pict->data[2][(x>>1) + (y>>1)*sub_pict->linesize[2]] = info->v4_codebook[v4_vector[i]*entry_size+5];
595  }
596  }
597  }
598 }
599 
601 {
602  int y, p;
603 
604  for(y = 0; y < MB_SIZE; y++) {
605  memcpy(a->data[0]+y*a->linesize[0], b->data[0]+y*b->linesize[0],
606  MB_SIZE);
607  }
608 
609  if(s->pix_fmt == AV_PIX_FMT_RGB24) {
610  for(p = 1; p <= 2; p++) {
611  for(y = 0; y < MB_SIZE/2; y++) {
612  memcpy(a->data[p] + y*a->linesize[p],
613  b->data[p] + y*b->linesize[p],
614  MB_SIZE/2);
615  }
616  }
617  }
618 }
619 
620 static int encode_mode(CinepakEncContext *s, int h, AVPicture *scratch_pict, AVPicture *last_pict, strip_info *info, unsigned char *buf)
621 {
622  int x, y, z, flags, bits, temp_size, header_ofs, ret = 0, mb_count = s->w * h / MB_AREA;
623  int needs_extra_bit, should_write_temp;
624  unsigned char temp[64]; //32/2 = 16 V4 blocks at 4 B each -> 64 B
625  mb_info *mb;
626  AVPicture sub_scratch = {{0}}, sub_last = {{0}};
627 
628  //encode codebooks
629 ////// MacOS vintage decoder compatibility dictates the presence of
630 ////// the codebook chunk even when the codebook is empty - pretty dumb...
631 ////// and also the certain order of the codebook chunks -- rl
632  if(info->v4_size || !s->skip_empty_cb)
633  ret += encode_codebook(s, info->v4_codebook, info->v4_size, 0x20, 0x24, buf + ret);
634 
635  if(info->v1_size || !s->skip_empty_cb)
636  ret += encode_codebook(s, info->v1_codebook, info->v1_size, 0x22, 0x26, buf + ret);
637 
638  //update scratch picture
639  for(z = y = 0; y < h; y += MB_SIZE) {
640  for(x = 0; x < s->w; x += MB_SIZE, z++) {
641  mb = &s->mb[z];
642 
643  get_sub_picture(s, x, y, scratch_pict, &sub_scratch);
644 
645  if(info->mode == MODE_MC && mb->best_encoding == ENC_SKIP) {
646  get_sub_picture(s, x, y, last_pict, &sub_last);
647  copy_mb(s, &sub_scratch, &sub_last);
648  } else if(info->mode == MODE_V1_ONLY || mb->best_encoding == ENC_V1)
649  decode_v1_vector(s, &sub_scratch, mb->v1_vector, info);
650  else
651  decode_v4_vector(s, &sub_scratch, mb->v4_vector, info);
652  }
653  }
654 
655  switch(info->mode) {
656  case MODE_V1_ONLY:
657  //av_log(s->avctx, AV_LOG_INFO, "mb_count = %i\n", mb_count);
658  ret += write_chunk_header(buf + ret, 0x32, mb_count);
659 
660  for(x = 0; x < mb_count; x++)
661  buf[ret++] = s->mb[x].v1_vector;
662 
663  break;
664  case MODE_V1_V4:
665  //remember header position
666  header_ofs = ret;
667  ret += CHUNK_HEADER_SIZE;
668 
669  for(x = 0; x < mb_count; x += 32) {
670  flags = 0;
671  for(y = x; y < FFMIN(x+32, mb_count); y++)
672  if(s->mb[y].best_encoding == ENC_V4)
673  flags |= 1 << (31 - y + x);
674 
675  AV_WB32(&buf[ret], flags);
676  ret += 4;
677 
678  for(y = x; y < FFMIN(x+32, mb_count); y++) {
679  mb = &s->mb[y];
680 
681  if(mb->best_encoding == ENC_V1)
682  buf[ret++] = mb->v1_vector;
683  else
684  for(z = 0; z < 4; z++)
685  buf[ret++] = mb->v4_vector[z];
686  }
687  }
688 
689  write_chunk_header(buf + header_ofs, 0x30, ret - header_ofs - CHUNK_HEADER_SIZE);
690 
691  break;
692  case MODE_MC:
693  //remember header position
694  header_ofs = ret;
695  ret += CHUNK_HEADER_SIZE;
696  flags = bits = temp_size = 0;
697 
698  for(x = 0; x < mb_count; x++) {
699  mb = &s->mb[x];
700  flags |= (mb->best_encoding != ENC_SKIP) << (31 - bits++);
701  needs_extra_bit = 0;
702  should_write_temp = 0;
703 
704  if(mb->best_encoding != ENC_SKIP) {
705  if(bits < 32)
706  flags |= (mb->best_encoding == ENC_V4) << (31 - bits++);
707  else
708  needs_extra_bit = 1;
709  }
710 
711  if(bits == 32) {
712  AV_WB32(&buf[ret], flags);
713  ret += 4;
714  flags = bits = 0;
715 
716  if(mb->best_encoding == ENC_SKIP || needs_extra_bit) {
717  memcpy(&buf[ret], temp, temp_size);
718  ret += temp_size;
719  temp_size = 0;
720  } else
721  should_write_temp = 1;
722  }
723 
724  if(needs_extra_bit) {
725  flags = (mb->best_encoding == ENC_V4) << 31;
726  bits = 1;
727  }
728 
729  if(mb->best_encoding == ENC_V1)
730  temp[temp_size++] = mb->v1_vector;
731  else if(mb->best_encoding == ENC_V4)
732  for(z = 0; z < 4; z++)
733  temp[temp_size++] = mb->v4_vector[z];
734 
735  if(should_write_temp) {
736  memcpy(&buf[ret], temp, temp_size);
737  ret += temp_size;
738  temp_size = 0;
739  }
740  }
741 
742  if(bits > 0) {
743  AV_WB32(&buf[ret], flags);
744  ret += 4;
745  memcpy(&buf[ret], temp, temp_size);
746  ret += temp_size;
747  }
748 
749  write_chunk_header(buf + header_ofs, 0x31, ret - header_ofs - CHUNK_HEADER_SIZE);
750 
751  break;
752  }
753 
754  return ret;
755 }
756 
757 //computes distortion of 4x4 MB in b compared to a
759 {
760  int x, y, p, d, ret = 0;
761 
762  for(y = 0; y < MB_SIZE; y++) {
763  for(x = 0; x < MB_SIZE; x++) {
764  d = a->data[0][x + y*a->linesize[0]] - b->data[0][x + y*b->linesize[0]];
765  ret += d*d;
766  }
767  }
768 
769  if(s->pix_fmt == AV_PIX_FMT_RGB24) {
770  for(p = 1; p <= 2; p++) {
771  for(y = 0; y < MB_SIZE/2; y++) {
772  for(x = 0; x < MB_SIZE/2; x++) {
773  d = a->data[p][x + y*a->linesize[p]] - b->data[p][x + y*b->linesize[p]];
774  ret += d*d;
775  }
776  }
777  }
778  }
779 
780  return ret;
781 }
782 
783 // return the possibly adjusted size of the codebook
784 #define CERTAIN(x) ((x)!=ENC_UNCERTAIN)
785 static int quantize(CinepakEncContext *s, int h, AVPicture *pict,
786  int v1mode, strip_info *info,
787  mb_encoding encoding)
788 {
789  int x, y, i, j, k, x2, y2, x3, y3, plane, shift, mbn;
790  int entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
791  int *codebook = v1mode ? info->v1_codebook : info->v4_codebook;
792  int size = v1mode ? info->v1_size : info->v4_size;
793  int64_t total_error = 0;
794  uint8_t vq_pict_buf[(MB_AREA*3)/2];
795  AVPicture sub_pict, vq_pict;
796 
797  for(mbn = i = y = 0; y < h; y += MB_SIZE) {
798  for(x = 0; x < s->w; x += MB_SIZE, ++mbn) {
799  int *base;
800 
801  if(CERTAIN(encoding)) {
802 // use for the training only the blocks known to be to be encoded [sic:-]
803  if(s->mb[mbn].best_encoding != encoding) continue;
804  }
805 
806  base = s->codebook_input + i*entry_size;
807  if(v1mode) {
808  //subsample
809  for(j = y2 = 0; y2 < entry_size; y2 += 2) {
810  for(x2 = 0; x2 < 4; x2 += 2, j++) {
811  plane = y2 < 4 ? 0 : 1 + (x2 >> 1);
812  shift = y2 < 4 ? 0 : 1;
813  x3 = shift ? 0 : x2;
814  y3 = shift ? 0 : y2;
815  base[j] = (pict->data[plane][((x+x3) >> shift) + ((y+y3) >> shift) * pict->linesize[plane]] +
816  pict->data[plane][((x+x3) >> shift) + 1 + ((y+y3) >> shift) * pict->linesize[plane]] +
817  pict->data[plane][((x+x3) >> shift) + (((y+y3) >> shift) + 1) * pict->linesize[plane]] +
818  pict->data[plane][((x+x3) >> shift) + 1 + (((y+y3) >> shift) + 1) * pict->linesize[plane]]) >> 2;
819  }
820  }
821  } else {
822  //copy
823  for(j = y2 = 0; y2 < MB_SIZE; y2 += 2) {
824  for(x2 = 0; x2 < MB_SIZE; x2 += 2) {
825  for(k = 0; k < entry_size; k++, j++) {
826  plane = k >= 4 ? k - 3 : 0;
827 
828  if(k >= 4) {
829  x3 = (x+x2) >> 1;
830  y3 = (y+y2) >> 1;
831  } else {
832  x3 = x + x2 + (k & 1);
833  y3 = y + y2 + (k >> 1);
834  }
835 
836  base[j] = pict->data[plane][x3 + y3*pict->linesize[plane]];
837  }
838  }
839  }
840  }
841  i += v1mode ? 1 : 4;
842  }
843  }
844 // if(i < mbn*(v1mode ? 1 : 4)) {
845 // av_log(s->avctx, AV_LOG_INFO, "reducing training set for %s from %i to %i (encoding %i)\n", v1mode?"v1":"v4", mbn*(v1mode ? 1 : 4), i, encoding);
846 // }
847 
848  if(i == 0) // empty training set, nothing to do
849  return 0;
850  if(i < size) {
851  //av_log(s->avctx, (CERTAIN(encoding) ? AV_LOG_ERROR : AV_LOG_INFO), "WOULD WASTE: %s cbsize %i bigger than training set size %i (encoding %i)\n", v1mode?"v1":"v4", size, i, encoding);
852  size = i;
853  }
854 
855  avpriv_init_elbg(s->codebook_input, entry_size, i, codebook, size, 1, s->codebook_closest, &s->randctx);
856  avpriv_do_elbg(s->codebook_input, entry_size, i, codebook, size, 1, s->codebook_closest, &s->randctx);
857 
858  //setup vq_pict, which contains a single MB
859  vq_pict.data[0] = vq_pict_buf;
860  vq_pict.linesize[0] = MB_SIZE;
861  vq_pict.data[1] = &vq_pict_buf[MB_AREA];
862  vq_pict.data[2] = vq_pict.data[1] + (MB_AREA >> 2);
863  vq_pict.linesize[1] = vq_pict.linesize[2] = MB_SIZE >> 1;
864 
865  //copy indices
866  for(i = j = y = 0; y < h; y += MB_SIZE) {
867  for(x = 0; x < s->w; x += MB_SIZE, j++) {
868  mb_info *mb = &s->mb[j];
869 // skip uninteresting blocks if we know their preferred encoding
870  if(CERTAIN(encoding) && mb->best_encoding != encoding)
871  continue;
872 
873  //point sub_pict to current MB
874  get_sub_picture(s, x, y, pict, &sub_pict);
875 
876  if(v1mode) {
877  mb->v1_vector = s->codebook_closest[i];
878 
879  //fill in vq_pict with V1 data
880  decode_v1_vector(s, &vq_pict, mb->v1_vector, info);
881 
882  mb->v1_error = compute_mb_distortion(s, &sub_pict, &vq_pict);
883  total_error += mb->v1_error;
884  } else {
885  for(k = 0; k < 4; k++)
886  mb->v4_vector[k] = s->codebook_closest[i+k];
887 
888  //fill in vq_pict with V4 data
889  decode_v4_vector(s, &vq_pict, mb->v4_vector, info);
890 
891  mb->v4_error = compute_mb_distortion(s, &sub_pict, &vq_pict);
892  total_error += mb->v4_error;
893  }
894  i += v1mode ? 1 : 4;
895  }
896  }
897 // check that we did it right in the beginning of the function
898  av_assert0(i >= size); // training set is no smaller than the codebook
899 
900  //av_log(s->avctx, AV_LOG_INFO, "isv1 %i size= %i i= %i error %lli\n", v1mode, size, i, (long long int)total_error);
901 
902  return size;
903 }
904 
905 static void calculate_skip_errors(CinepakEncContext *s, int h, AVPicture *last_pict, AVPicture *pict, strip_info *info)
906 {
907  int x, y, i;
908  AVPicture sub_last, sub_pict;
909 
910  for(i = y = 0; y < h; y += MB_SIZE) {
911  for(x = 0; x < s->w; x += MB_SIZE, i++) {
912  get_sub_picture(s, x, y, last_pict, &sub_last);
913  get_sub_picture(s, x, y, pict, &sub_pict);
914 
915  s->mb[i].skip_error = compute_mb_distortion(s, &sub_last, &sub_pict);
916  }
917  }
918 }
919 
920 static void write_strip_header(CinepakEncContext *s, int y, int h, int keyframe, unsigned char *buf, int strip_size)
921 {
922 // actually we are exclusively using intra strip coding (how much can we win
923 // otherwise? how to choose which part of a codebook to update?),
924 // keyframes are different only because we disallow ENC_SKIP on them -- rl
925 // (besides, the logic here used to be inverted: )
926 // buf[0] = keyframe ? 0x11: 0x10;
927  buf[0] = keyframe ? 0x10: 0x11;
928  AV_WB24(&buf[1], strip_size + STRIP_HEADER_SIZE);
929 // AV_WB16(&buf[4], y); /* using absolute y values works -- rl */
930  AV_WB16(&buf[4], 0); /* using relative values works as well -- rl */
931  AV_WB16(&buf[6], 0);
932 // AV_WB16(&buf[8], y+h); /* using absolute y values works -- rl */
933  AV_WB16(&buf[8], h); /* using relative values works as well -- rl */
934  AV_WB16(&buf[10], s->w);
935  //av_log(s->avctx, AV_LOG_INFO, "write_strip_header() %x keyframe=%d\n", buf[0], keyframe);
936 }
937 
938 static int rd_strip(CinepakEncContext *s, int y, int h, int keyframe, AVPicture *last_pict, AVPicture *pict, AVPicture *scratch_pict, unsigned char *buf, int64_t *best_score
939 #ifdef CINEPAK_REPORT_SERR
940 , int64_t *best_serr
941 #endif
942 )
943 {
944  int64_t score = 0;
945 #ifdef CINEPAK_REPORT_SERR
946  int64_t serr;
947 #endif
948  int best_size = 0;
949  strip_info info;
950 // for codebook optimization:
951  int v1enough, v1_size, v4enough, v4_size;
952  int new_v1_size, new_v4_size;
953  int v1shrunk, v4shrunk;
954 
955  if(!keyframe)
956  calculate_skip_errors(s, h, last_pict, pict, &info);
957 
958  //try some powers of 4 for the size of the codebooks
959  //constraint the v4 codebook to be no bigger than v1 one,
960  //(and no less than v1_size/4)
961  //thus making v1 preferable and possibly losing small details? should be ok
962 #define SMALLEST_CODEBOOK 1
963  for(v1enough = 0, v1_size = SMALLEST_CODEBOOK; v1_size <= CODEBOOK_MAX && !v1enough; v1_size <<= 2) {
964  for(v4enough = 0, v4_size = 0; v4_size <= v1_size && !v4enough; v4_size = v4_size ? v4_size << 2 : v1_size >= SMALLEST_CODEBOOK << 2 ? v1_size >> 2 : SMALLEST_CODEBOOK) {
965  //try all modes
966  for(CinepakMode mode = 0; mode < MODE_COUNT; mode++) {
967  //don't allow MODE_MC in intra frames
968  if(keyframe && mode == MODE_MC)
969  continue;
970 
971  if(mode == MODE_V1_ONLY) {
972  info.v1_size = v1_size;
973 // the size may shrink even before optimizations if the input is short:
974  info.v1_size = quantize(s, h, pict, 1, &info, ENC_UNCERTAIN);
975  if(info.v1_size < v1_size)
976 // too few eligible blocks, no sense in trying bigger sizes
977  v1enough = 1;
978 
979  info.v4_size = 0;
980  } else { // mode != MODE_V1_ONLY
981  // if v4 codebook is empty then only allow V1-only mode
982  if(!v4_size)
983  continue;
984 
985  if(mode == MODE_V1_V4) {
986  info.v4_size = v4_size;
987  info.v4_size = quantize(s, h, pict, 0, &info, ENC_UNCERTAIN);
988  if(info.v4_size < v4_size)
989 // too few eligible blocks, no sense in trying bigger sizes
990  v4enough = 1;
991  }
992  }
993 
994  info.mode = mode;
995 // choose the best encoding per block, based on current experience
996  score = calculate_mode_score(s, h, &info, 0,
997  &v1shrunk, &v4shrunk
998 #ifdef CINEPAK_REPORT_SERR
999 , &serr
1000 #endif
1001 );
1002 
1003  if(mode != MODE_V1_ONLY){
1004  int extra_iterations_limit = s->max_extra_cb_iterations;
1005 // recompute the codebooks, omitting the extra blocks
1006 // we assume we _may_ come here with more blocks to encode than before
1007  info.v1_size = v1_size;
1008  new_v1_size = quantize(s, h, pict, 1, &info, ENC_V1);
1009  if(new_v1_size < info.v1_size){
1010  //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v1 codebook to %i entries\n", mode, v1_size, v4_size, new_v1_size);
1011  info.v1_size = new_v1_size;
1012  }
1013 // we assume we _may_ come here with more blocks to encode than before
1014  info.v4_size = v4_size;
1015  new_v4_size = quantize(s, h, pict, 0, &info, ENC_V4);
1016  if(new_v4_size < info.v4_size) {
1017  //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v4 codebook to %i entries at first iteration\n", mode, v1_size, v4_size, new_v4_size);
1018  info.v4_size = new_v4_size;
1019  }
1020 // calculate the resulting score
1021 // (do not move blocks to codebook encodings now, as some blocks may have
1022 // got bigger errors despite a smaller training set - but we do not
1023 // ever grow the training sets back)
1024  for(;;) {
1025  score = calculate_mode_score(s, h, &info, 1,
1026  &v1shrunk, &v4shrunk
1027 #ifdef CINEPAK_REPORT_SERR
1028 , &serr
1029 #endif
1030 );
1031 // do we have a reason to reiterate? if so, have we reached the limit?
1032  if((!v1shrunk && !v4shrunk) || !extra_iterations_limit--) break;
1033 // recompute the codebooks, omitting the extra blocks
1034  if(v1shrunk) {
1035  info.v1_size = v1_size;
1036  new_v1_size = quantize(s, h, pict, 1, &info, ENC_V1);
1037  if(new_v1_size < info.v1_size){
1038  //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v1 codebook to %i entries\n", mode, v1_size, v4_size, new_v1_size);
1039  info.v1_size = new_v1_size;
1040  }
1041  }
1042  if(v4shrunk) {
1043  info.v4_size = v4_size;
1044  new_v4_size = quantize(s, h, pict, 0, &info, ENC_V4);
1045  if(new_v4_size < info.v4_size) {
1046  //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v4 codebook to %i entries\n", mode, v1_size, v4_size, new_v4_size);
1047  info.v4_size = new_v4_size;
1048  }
1049  }
1050  }
1051  }
1052 
1053  //av_log(s->avctx, AV_LOG_INFO, "%3i %3i score = %lli\n", v1_size, v4_size, (long long int)score);
1054 
1055  if(best_size == 0 || score < *best_score) {
1056 
1057  *best_score = score;
1058 #ifdef CINEPAK_REPORT_SERR
1059  *best_serr = serr;
1060 #endif
1061  best_size = encode_mode(s, h, scratch_pict, last_pict, &info, s->strip_buf + STRIP_HEADER_SIZE);
1062 
1063  //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: %18lli %i B", mode, info.v1_size, info.v4_size, (long long int)score, best_size);
1064  //av_log(s->avctx, AV_LOG_INFO, "\n");
1065 #ifdef CINEPAK_REPORT_SERR
1066  av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: %18lli %i B\n", mode, v1_size, v4_size, (long long int)serr, best_size);
1067 #endif
1068 
1069 #ifdef CINEPAKENC_DEBUG
1070  //save MB encoding choices
1071  memcpy(s->best_mb, s->mb, mb_count*sizeof(mb_info));
1072 #endif
1073 
1074  //memcpy(strip_temp + STRIP_HEADER_SIZE, strip_temp, best_size);
1075  write_strip_header(s, y, h, keyframe, s->strip_buf, best_size);
1076 
1077  }
1078  }
1079  }
1080  }
1081 
1082 #ifdef CINEPAKENC_DEBUG
1083  //gather stats. this will only work properly of MAX_STRIPS == 1
1084  if(best_info.mode == MODE_V1_ONLY) {
1085  s->num_v1_mode++;
1086  s->num_v1_encs += s->w*h/MB_AREA;
1087  } else {
1088  if(best_info.mode == MODE_V1_V4)
1089  s->num_v4_mode++;
1090  else
1091  s->num_mc_mode++;
1092 
1093  int x;
1094  for(x = 0; x < s->w*h/MB_AREA; x++)
1095  if(s->best_mb[x].best_encoding == ENC_V1)
1096  s->num_v1_encs++;
1097  else if(s->best_mb[x].best_encoding == ENC_V4)
1098  s->num_v4_encs++;
1099  else
1100  s->num_skips++;
1101  }
1102 #endif
1103 
1104  best_size += STRIP_HEADER_SIZE;
1105  memcpy(buf, s->strip_buf, best_size);
1106 
1107  return best_size;
1108 }
1109 
1110 static int write_cvid_header(CinepakEncContext *s, unsigned char *buf, int num_strips, int data_size, int isakeyframe)
1111 {
1112  buf[0] = isakeyframe ? 0 : 1;
1113  AV_WB24(&buf[1], data_size + CVID_HEADER_SIZE);
1114  AV_WB16(&buf[4], s->w);
1115  AV_WB16(&buf[6], s->h);
1116  AV_WB16(&buf[8], num_strips);
1117 
1118  return CVID_HEADER_SIZE;
1119 }
1120 
1121 static int rd_frame(CinepakEncContext *s, const AVFrame *frame, int isakeyframe, unsigned char *buf, int buf_size)
1122 {
1123  int num_strips, strip, i, y, nexty, size, temp_size;
1124  AVPicture last_pict, pict, scratch_pict;
1125  int64_t best_score = 0, score, score_temp;
1126 #ifdef CINEPAK_REPORT_SERR
1127  int64_t best_serr = 0, serr, serr_temp;
1128 #endif
1129 
1130  int best_nstrips = -1, best_size = -1; // mark as uninitialzed
1131 
1132  if(s->pix_fmt == AV_PIX_FMT_RGB24) {
1133  int x;
1134 // build a copy of the given frame in the correct colorspace
1135  for(y = 0; y < s->h; y += 2) {
1136  for(x = 0; x < s->w; x += 2) {
1137  uint8_t *ir[2]; int32_t r, g, b, rr, gg, bb;
1138  ir[0] = ((AVPicture*)frame)->data[0] + x*3 + y*((AVPicture*)frame)->linesize[0];
1139  ir[1] = ir[0] + ((AVPicture*)frame)->linesize[0];
1140  get_sub_picture(s, x, y, (AVPicture*)s->input_frame, &scratch_pict);
1141  r = g = b = 0;
1142  for(i=0; i<4; ++i) {
1143  int i1, i2;
1144  i1 = (i&1); i2 = (i>=2);
1145  rr = ir[i2][i1*3+0];
1146  gg = ir[i2][i1*3+1];
1147  bb = ir[i2][i1*3+2];
1148  r += rr; g += gg; b += bb;
1149 // using fixed point arithmetic for portable repeatability, scaling by 2^23
1150 // "Y"
1151 // rr = 0.2857*rr + 0.5714*gg + 0.1429*bb;
1152  rr = (2396625*rr + 4793251*gg + 1198732*bb) >> 23;
1153  if( rr < 0) rr = 0;
1154  else if (rr > 255) rr = 255;
1155  scratch_pict.data[0][i1 + i2*scratch_pict.linesize[0]] = rr;
1156  }
1157 // let us scale down as late as possible
1158 // r /= 4; g /= 4; b /= 4;
1159 // "U"
1160 // rr = -0.1429*r - 0.2857*g + 0.4286*b;
1161  rr = (-299683*r - 599156*g + 898839*b) >> 23;
1162  if( rr < -128) rr = -128;
1163  else if (rr > 127) rr = 127;
1164  scratch_pict.data[1][0] = rr + 128; // quantize needs unsigned
1165 // "V"
1166 // rr = 0.3571*r - 0.2857*g - 0.0714*b;
1167  rr = (748893*r - 599156*g - 149737*b) >> 23;
1168  if( rr < -128) rr = -128;
1169  else if (rr > 127) rr = 127;
1170  scratch_pict.data[2][0] = rr + 128; // quantize needs unsigned
1171  }
1172  }
1173  }
1174 
1175  //would be nice but quite certainly incompatible with vintage players:
1176  // support encoding zero strips (meaning skip the whole frame)
1177  for(num_strips = s->min_strips; num_strips <= s->max_strips && num_strips <= s->h / MB_SIZE; num_strips++) {
1178  score = 0;
1179  size = 0;
1180 #ifdef CINEPAK_REPORT_SERR
1181  serr = 0;
1182 #endif
1183 
1184  for(y = 0, strip = 1; y < s->h; strip++, y = nexty) {
1185  int strip_height;
1186 
1187  nexty = strip * s->h / num_strips; // <= s->h
1188  //make nexty the next multiple of 4 if not already there
1189  if(nexty & 3)
1190  nexty += 4 - (nexty & 3);
1191 
1192  strip_height = nexty - y;
1193  if(strip_height <= 0) { // can this ever happen?
1194  av_log(s->avctx, AV_LOG_INFO, "skipping zero height strip %i of %i\n", strip, num_strips);
1195  continue;
1196  }
1197 
1198  if(s->pix_fmt == AV_PIX_FMT_RGB24)
1199  get_sub_picture(s, 0, y, (AVPicture*)s->input_frame, &pict);
1200  else
1201  get_sub_picture(s, 0, y, (AVPicture*)frame, &pict);
1202  get_sub_picture(s, 0, y, (AVPicture*)s->last_frame, &last_pict);
1203  get_sub_picture(s, 0, y, (AVPicture*)s->scratch_frame, &scratch_pict);
1204 
1205  if((temp_size = rd_strip(s, y, strip_height, isakeyframe, &last_pict, &pict, &scratch_pict, s->frame_buf + size + CVID_HEADER_SIZE, &score_temp
1206 #ifdef CINEPAK_REPORT_SERR
1207 , &serr_temp
1208 #endif
1209 )) < 0)
1210  return temp_size;
1211 
1212  score += score_temp;
1213 #ifdef CINEPAK_REPORT_SERR
1214  serr += serr_temp;
1215 #endif
1216  size += temp_size;
1217  //av_log(s->avctx, AV_LOG_INFO, "strip %d, isakeyframe=%d", strip, isakeyframe);
1218  //av_log(s->avctx, AV_LOG_INFO, "\n");
1219  }
1220 
1221  if(best_score == 0 || score < best_score) {
1222  best_score = score;
1223 #ifdef CINEPAK_REPORT_SERR
1224  best_serr = serr;
1225 #endif
1226  best_size = size + write_cvid_header(s, s->frame_buf, num_strips, size, isakeyframe);
1227  //av_log(s->avctx, AV_LOG_INFO, "best number of strips so far: %2i, %12lli, %i B\n", num_strips, (long long int)score, best_size);
1228 #ifdef CINEPAK_REPORT_SERR
1229  av_log(s->avctx, AV_LOG_INFO, "best number of strips so far: %2i, %12lli, %i B\n", num_strips, (long long int)serr, best_size);
1230 #endif
1231 
1233  memcpy(buf, s->frame_buf, best_size);
1234  best_nstrips = num_strips;
1235  }
1236 // avoid trying too many strip numbers without a real reason
1237 // (this makes the processing of the very first frame faster)
1238  if(num_strips - best_nstrips > 4)
1239  break;
1240  }
1241 
1242  av_assert0(best_nstrips >= 0 && best_size >= 0);
1243 
1244 // let the number of strips slowly adapt to the changes in the contents,
1245 // compared to full bruteforcing every time this will occasionally lead
1246 // to some r/d performance loss but makes encoding up to several times faster
1247  if(!s->strip_number_delta_range) {
1248  if(best_nstrips == s->max_strips) { // let us try to step up
1249  s->max_strips = best_nstrips + 1;
1250  if(s->max_strips >= s->max_max_strips)
1251  s->max_strips = s->max_max_strips;
1252  } else { // try to step down
1253  s->max_strips = best_nstrips;
1254  }
1255  s->min_strips = s->max_strips - 1;
1256  if(s->min_strips < s->min_min_strips)
1257  s->min_strips = s->min_min_strips;
1258  } else {
1259  s->max_strips = best_nstrips + s->strip_number_delta_range;
1260  if(s->max_strips >= s->max_max_strips)
1261  s->max_strips = s->max_max_strips;
1262  s->min_strips = best_nstrips - s->strip_number_delta_range;
1263  if(s->min_strips < s->min_min_strips)
1264  s->min_strips = s->min_min_strips;
1265  }
1266 
1267  return best_size;
1268 }
1269 
1271  const AVFrame *frame, int *got_packet)
1272 {
1273  CinepakEncContext *s = avctx->priv_data;
1274  int ret;
1275 
1276  s->lambda = frame->quality ? frame->quality - 1 : 2 * FF_LAMBDA_SCALE;
1277 
1278  if ((ret = ff_alloc_packet2(avctx, pkt, s->frame_buf_size)) < 0)
1279  return ret;
1280  ret = rd_frame(s, frame, (s->curframe == 0), pkt->data, s->frame_buf_size);
1281  pkt->size = ret;
1282  if (s->curframe == 0)
1283  pkt->flags |= AV_PKT_FLAG_KEY;
1284  *got_packet = 1;
1285 
1286  FFSWAP(AVFrame *, s->last_frame, s->best_frame);
1287 
1288  if (++s->curframe >= s->keyint)
1289  s->curframe = 0;
1290 
1291  return 0;
1292 }
1293 
1295 {
1296  CinepakEncContext *s = avctx->priv_data;
1297  int x;
1298 
1302  if (avctx->pix_fmt == AV_PIX_FMT_RGB24)
1304  av_freep(&s->codebook_input);
1306  av_freep(&s->strip_buf);
1307  av_freep(&s->frame_buf);
1308  av_freep(&s->mb);
1309 #ifdef CINEPAKENC_DEBUG
1310  av_freep(&s->best_mb);
1311 #endif
1312 
1313  for(x = 0; x < (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 4 : 3); x++)
1314  av_freep(&s->pict_bufs[x]);
1315 
1316 #ifdef CINEPAKENC_DEBUG
1317  av_log(avctx, AV_LOG_INFO, "strip coding stats: %i V1 mode, %i V4 mode, %i MC mode (%i V1 encs, %i V4 encs, %i skips)\n",
1318  s->num_v1_mode, s->num_v4_mode, s->num_mc_mode, s->num_v1_encs, s->num_v4_encs, s->num_skips);
1319 #endif
1320 
1321  return 0;
1322 }
1323 
1325  .name = "cinepak",
1326  .type = AVMEDIA_TYPE_VIDEO,
1327  .id = AV_CODEC_ID_CINEPAK,
1328  .priv_data_size = sizeof(CinepakEncContext),
1330  .encode2 = cinepak_encode_frame,
1331  .close = cinepak_encode_end,
1332  .pix_fmts = (const enum AVPixelFormat[]){AV_PIX_FMT_RGB24, AV_PIX_FMT_GRAY8, AV_PIX_FMT_NONE},
1333  .long_name = NULL_IF_CONFIG_SMALL("Cinepak / CVID"),
1334  .priv_class = &cinepak_class,
1335 };