FFmpeg
vf_deshake_opencl.c
Go to the documentation of this file.
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with FFmpeg; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  *
18  * Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
19  * Copyright (C) 2009, Willow Garage Inc., all rights reserved.
20  * Copyright (C) 2013, OpenCV Foundation, all rights reserved.
21  * Third party copyrights are property of their respective owners.
22  *
23  * Redistribution and use in source and binary forms, with or without modification,
24  * are permitted provided that the following conditions are met:
25  *
26  * * Redistribution's of source code must retain the above copyright notice,
27  * this list of conditions and the following disclaimer.
28  *
29  * * Redistribution's in binary form must reproduce the above copyright notice,
30  * this list of conditions and the following disclaimer in the documentation
31  * and/or other materials provided with the distribution.
32  *
33  * * The name of the copyright holders may not be used to endorse or promote products
34  * derived from this software without specific prior written permission.
35  *
36  * This software is provided by the copyright holders and contributors "as is" and
37  * any express or implied warranties, including, but not limited to, the implied
38  * warranties of merchantability and fitness for a particular purpose are disclaimed.
39  * In no event shall the Intel Corporation or contributors be liable for any direct,
40  * indirect, incidental, special, exemplary, or consequential damages
41  * (including, but not limited to, procurement of substitute goods or services;
42  * loss of use, data, or profits; or business interruption) however caused
43  * and on any theory of liability, whether in contract, strict liability,
44  * or tort (including negligence or otherwise) arising in any way out of
45  * the use of this software, even if advised of the possibility of such damage.
46  */
47 
48 #include <float.h>
49 #include <libavutil/lfg.h>
50 #include "libavutil/opt.h"
51 #include "libavutil/mem.h"
52 #include "libavutil/fifo.h"
53 #include "libavutil/common.h"
54 #include "libavutil/avassert.h"
55 #include "libavutil/pixdesc.h"
56 #include "libavutil/pixfmt.h"
57 #include "avfilter.h"
58 #include "framequeue.h"
59 #include "filters.h"
60 #include "transform.h"
61 #include "internal.h"
62 #include "opencl.h"
63 #include "opencl_source.h"
64 #include "video.h"
65 
66 /*
67 This filter matches feature points between frames (dealing with outliers) and then
68 uses the matches to estimate an affine transform between frames. This transform is
69 decomposed into various values (translation, scale, rotation) and the values are
70 summed relative to the start of the video to obtain on absolute camera position
71 for each frame. This "camera path" is then smoothed via a gaussian filter, resulting
72 in a new path that is turned back into an affine transform and applied to each
73 frame to render it.
74 
75 High-level overview:
76 
77 All of the work to extract motion data from frames occurs in queue_frame. Motion data
78 is buffered in a smoothing window, so queue_frame simply computes the absolute camera
79 positions and places them in ringbuffers.
80 
81 filter_frame is responsible for looking at the absolute camera positions currently
82 in the ringbuffers, applying the gaussian filter, and then transforming the frames.
83 */
84 
85 // Number of bits for BRIEF descriptors
86 #define BREIFN 512
87 // Size of the patch from which a BRIEF descriptor is extracted
88 // This is the size used in OpenCV
89 #define BRIEF_PATCH_SIZE 31
90 #define BRIEF_PATCH_SIZE_HALF (BRIEF_PATCH_SIZE / 2)
91 
92 #define MATCHES_CONTIG_SIZE 2000
93 
94 #define ROUNDED_UP_DIV(a, b) ((a + (b - 1)) / b)
95 
96 typedef struct PointPair {
97  // Previous frame
98  cl_float2 p1;
99  // Current frame
100  cl_float2 p2;
101 } PointPair;
102 
103 typedef struct MotionVector {
105  // Used to mark vectors as potential outliers
107 } MotionVector;
108 
109 // Denotes the indices for the different types of motion in the ringbuffers array
116 
117  // Should always be last
119 };
120 
121 // Struct that holds data for drawing point match debug data
122 typedef struct DebugMatches {
124  // The points used to calculate the affine transform for a frame
126 
128  // For cases where we couldn't calculate a model
130 } DebugMatches;
131 
132 // Groups together the ringbuffers that store absolute distortion / position values
133 // for each frame
134 typedef struct AbsoluteFrameMotion {
135  // Array with the various ringbuffers, indexed via the RingbufferIndices enum
137 
138  // Offset to get to the current frame being processed
139  // (not in bytes)
141  // Keeps track of where the start and end of contiguous motion data is (to
142  // deal with cases where no motion data is found between two frames)
145 
148 
149 // Takes care of freeing the arrays within the DebugMatches inside of the
150 // debug_matches ringbuffer and then freeing the buffer itself.
152  DebugMatches dm;
153 
154  if (!afm->debug_matches) {
155  return;
156  }
157 
158  while (av_fifo_read(afm->debug_matches, &dm, 1) >= 0)
159  av_freep(&dm.matches);
160 
162 }
163 
164 // Stores the translation, scale, rotation, and skew deltas between two frames
165 typedef struct FrameDelta {
166  cl_float2 translation;
167  float rotation;
168  cl_float2 scale;
169  cl_float2 skew;
170 } FrameDelta;
171 
172 typedef struct SimilarityMatrix {
173  // The 2x3 similarity matrix
174  double matrix[6];
176 
177 typedef struct CropInfo {
178  // The top left corner of the bounding box for the crop
179  cl_float2 top_left;
180  // The bottom right corner of the bounding box for the crop
181  cl_float2 bottom_right;
182 } CropInfo;
183 
184 // Returned from function that determines start and end values for iteration
185 // around the current frame in a ringbuffer
186 typedef struct IterIndices {
187  int start;
188  int end;
189 } IterIndices;
190 
191 typedef struct DeshakeOpenCLContext {
193  // Whether or not the above `OpenCLFilterContext` has been initialized
195 
196  // These variables are used in the activate callback
197  int64_t duration;
198  int eof;
199 
200  // State for random number generation
202 
203  // FIFO frame queue used to buffer future frames for processing
205  // Ringbuffers for frame positions
207 
208  // The number of frames' motion to consider before and after the frame we are
209  // smoothing
211  // The number of the frame we are currently processing
213 
214  // Stores a 1d array of normalised gaussian kernel values for convolution
215  float *gauss_kernel;
216 
217  // Buffer for error values used in RANSAC code
218  float *ransac_err;
219 
220  // Information regarding how to crop the smoothed luminance (or RGB) planes
222  // Information regarding how to crop the smoothed chroma planes
224 
225  // Whether or not we are processing YUV input (as oppposed to RGB)
226  int is_yuv;
227  // The underlying format of the hardware surfaces
229 
230  // Buffer to copy `matches` into for the CPU to work with
233 
235 
236  cl_command_queue command_queue;
237  cl_kernel kernel_grayscale;
242  cl_kernel kernel_transform;
244 
245  // Stores a frame converted to grayscale
246  cl_mem grayscale;
247  // Stores the harris response for a frame (measure of "cornerness" for each pixel)
248  cl_mem harris_buf;
249 
250  // Detected features after non-maximum suppression and sub-pixel refinement
252  // Saved from the previous frame
254 
255  // BRIEF sampling pattern that is randomly initialized
257  // Feature point descriptors for the current frame
258  cl_mem descriptors;
259  // Feature point descriptors for the previous frame
261  // Vectors between points in current and previous frame
262  cl_mem matches;
264  // Holds the matrix to transform luminance (or RGB) with
265  cl_mem transform_y;
266  // Holds the matrix to transform chroma with
267  cl_mem transform_uv;
268 
269  // Configurable options
270 
272  int debug_on;
274 
275  // Whether or not feature points should be refined at a sub-pixel level
277  // If the user sets a value other than the default, 0, this percentage is
278  // translated into a sigma value ranging from 0.5 to 40.0
280  // This number is multiplied by the video frame rate to determine the size
281  // of the smooth window
283 
284  // Debug stuff
285 
289 
290  // These store the total time spent executing the different kernels in nanoseconds
291  unsigned long long grayscale_time;
292  unsigned long long harris_response_time;
293  unsigned long long refine_features_time;
294  unsigned long long brief_descriptors_time;
295  unsigned long long match_descriptors_time;
296  unsigned long long transform_time;
297  unsigned long long crop_upscale_time;
298 
299  // Time spent copying matched features from the device to the host
300  unsigned long long read_buf_time;
302 
303 // Returns a random uniformly-distributed number in [low, high]
304 static int rand_in(int low, int high, AVLFG *alfg) {
305  return (av_lfg_get(alfg) % (high - low)) + low;
306 }
307 
308 // Returns the average execution time for an event given the total time and the
309 // number of frames processed.
310 static double averaged_event_time_ms(unsigned long long total_time, int num_frames) {
311  return (double)total_time / (double)num_frames / 1000000.0;
312 }
313 
314 // The following code is loosely ported from OpenCV
315 
316 // Estimates affine transform from 3 point pairs
317 // model is a 2x3 matrix:
318 // a b c
319 // d e f
320 static void run_estimate_kernel(const MotionVector *point_pairs, double *model)
321 {
322  // src points
323  double x1 = point_pairs[0].p.p1.s[0];
324  double y1 = point_pairs[0].p.p1.s[1];
325  double x2 = point_pairs[1].p.p1.s[0];
326  double y2 = point_pairs[1].p.p1.s[1];
327  double x3 = point_pairs[2].p.p1.s[0];
328  double y3 = point_pairs[2].p.p1.s[1];
329 
330  // dest points
331  double X1 = point_pairs[0].p.p2.s[0];
332  double Y1 = point_pairs[0].p.p2.s[1];
333  double X2 = point_pairs[1].p.p2.s[0];
334  double Y2 = point_pairs[1].p.p2.s[1];
335  double X3 = point_pairs[2].p.p2.s[0];
336  double Y3 = point_pairs[2].p.p2.s[1];
337 
338  double d = 1.0 / ( x1*(y2-y3) + x2*(y3-y1) + x3*(y1-y2) );
339 
340  model[0] = d * ( X1*(y2-y3) + X2*(y3-y1) + X3*(y1-y2) );
341  model[1] = d * ( X1*(x3-x2) + X2*(x1-x3) + X3*(x2-x1) );
342  model[2] = d * ( X1*(x2*y3 - x3*y2) + X2*(x3*y1 - x1*y3) + X3*(x1*y2 - x2*y1) );
343 
344  model[3] = d * ( Y1*(y2-y3) + Y2*(y3-y1) + Y3*(y1-y2) );
345  model[4] = d * ( Y1*(x3-x2) + Y2*(x1-x3) + Y3*(x2-x1) );
346  model[5] = d * ( Y1*(x2*y3 - x3*y2) + Y2*(x3*y1 - x1*y3) + Y3*(x1*y2 - x2*y1) );
347 }
348 
349 // Checks that the 3 points in the given array are not collinear
350 static int points_not_collinear(const cl_float2 **points)
351 {
352  int j, k, i = 2;
353 
354  for (j = 0; j < i; j++) {
355  double dx1 = points[j]->s[0] - points[i]->s[0];
356  double dy1 = points[j]->s[1] - points[i]->s[1];
357 
358  for (k = 0; k < j; k++) {
359  double dx2 = points[k]->s[0] - points[i]->s[0];
360  double dy2 = points[k]->s[1] - points[i]->s[1];
361 
362  // Assuming a 3840 x 2160 video with a point at (0, 0) and one at
363  // (3839, 2159), this prevents a third point from being within roughly
364  // 0.5 of a pixel of the line connecting the two on both axes
365  if (fabs(dx2*dy1 - dy2*dx1) <= 1.0) {
366  return 0;
367  }
368  }
369  }
370 
371  return 1;
372 }
373 
374 // Checks a subset of 3 point pairs to make sure that the points are not collinear
375 // and not too close to each other
376 static int check_subset(const MotionVector *pairs_subset)
377 {
378  const cl_float2 *prev_points[] = {
379  &pairs_subset[0].p.p1,
380  &pairs_subset[1].p.p1,
381  &pairs_subset[2].p.p1
382  };
383 
384  const cl_float2 *curr_points[] = {
385  &pairs_subset[0].p.p2,
386  &pairs_subset[1].p.p2,
387  &pairs_subset[2].p.p2
388  };
389 
390  return points_not_collinear(prev_points) && points_not_collinear(curr_points);
391 }
392 
393 // Selects a random subset of 3 points from point_pairs and places them in pairs_subset
394 static int get_subset(
395  AVLFG *alfg,
396  const MotionVector *point_pairs,
397  const int num_point_pairs,
398  MotionVector *pairs_subset,
399  int max_attempts
400 ) {
401  int idx[3];
402  int i = 0, j, iters = 0;
403 
404  for (; iters < max_attempts; iters++) {
405  for (i = 0; i < 3 && iters < max_attempts;) {
406  int idx_i = 0;
407 
408  for (;;) {
409  idx_i = idx[i] = rand_in(0, num_point_pairs, alfg);
410 
411  for (j = 0; j < i; j++) {
412  if (idx_i == idx[j]) {
413  break;
414  }
415  }
416 
417  if (j == i) {
418  break;
419  }
420  }
421 
422  pairs_subset[i] = point_pairs[idx[i]];
423  i++;
424  }
425 
426  if (i == 3 && !check_subset(pairs_subset)) {
427  continue;
428  }
429  break;
430  }
431 
432  return i == 3 && iters < max_attempts;
433 }
434 
435 // Computes the error for each of the given points based on the given model.
436 static void compute_error(
437  const MotionVector *point_pairs,
438  const int num_point_pairs,
439  const double *model,
440  float *err
441 ) {
442  double F0 = model[0], F1 = model[1], F2 = model[2];
443  double F3 = model[3], F4 = model[4], F5 = model[5];
444 
445  for (int i = 0; i < num_point_pairs; i++) {
446  const cl_float2 *f = &point_pairs[i].p.p1;
447  const cl_float2 *t = &point_pairs[i].p.p2;
448 
449  double a = F0*f->s[0] + F1*f->s[1] + F2 - t->s[0];
450  double b = F3*f->s[0] + F4*f->s[1] + F5 - t->s[1];
451 
452  err[i] = a*a + b*b;
453  }
454 }
455 
456 // Determines which of the given point matches are inliers for the given model
457 // based on the specified threshold.
458 //
459 // err must be an array of num_point_pairs length
460 static int find_inliers(
461  MotionVector *point_pairs,
462  const int num_point_pairs,
463  const double *model,
464  float *err,
465  double thresh
466 ) {
467  float t = (float)(thresh * thresh);
468  int i, n = num_point_pairs, num_inliers = 0;
469 
470  compute_error(point_pairs, num_point_pairs, model, err);
471 
472  for (i = 0; i < n; i++) {
473  if (err[i] <= t) {
474  // This is an inlier
475  point_pairs[i].should_consider = 1;
476  num_inliers += 1;
477  } else {
478  point_pairs[i].should_consider = 0;
479  }
480  }
481 
482  return num_inliers;
483 }
484 
485 // Determines the number of iterations required to achieve the desired confidence level.
486 //
487 // The equation used to determine the number of iterations to do is:
488 // 1 - confidence = (1 - inlier_probability^num_points)^num_iters
489 //
490 // Solving for num_iters:
491 //
492 // num_iters = log(1 - confidence) / log(1 - inlier_probability^num_points)
493 //
494 // A more in-depth explanation can be found at https://en.wikipedia.org/wiki/Random_sample_consensus
495 // under the 'Parameters' heading
496 static int ransac_update_num_iters(double confidence, double num_outliers, int max_iters)
497 {
498  double num, denom;
499 
500  confidence = av_clipd(confidence, 0.0, 1.0);
501  num_outliers = av_clipd(num_outliers, 0.0, 1.0);
502 
503  // avoid inf's & nan's
504  num = FFMAX(1.0 - confidence, DBL_MIN);
505  denom = 1.0 - pow(1.0 - num_outliers, 3);
506  if (denom < DBL_MIN) {
507  return 0;
508  }
509 
510  num = log(num);
511  denom = log(denom);
512 
513  return denom >= 0 || -num >= max_iters * (-denom) ? max_iters : (int)round(num / denom);
514 }
515 
516 // Estimates an affine transform between the given pairs of points using RANdom
517 // SAmple Consensus
519  DeshakeOpenCLContext *deshake_ctx,
520  MotionVector *point_pairs,
521  DebugMatches *debug_matches,
522  const int num_point_pairs,
523  double *model_out,
524  const double threshold,
525  const int max_iters,
526  const double confidence
527 ) {
528  int result = 0;
529  double best_model[6], model[6];
530  MotionVector pairs_subset[3], best_pairs[3];
531 
532  int iter, niters = FFMAX(max_iters, 1);
533  int good_count, max_good_count = 0;
534 
535  // We need at least 3 points to build a model from
536  if (num_point_pairs < 3) {
537  return 0;
538  } else if (num_point_pairs == 3) {
539  // There are only 3 points, so RANSAC doesn't apply here
540  run_estimate_kernel(point_pairs, model_out);
541 
542  for (int i = 0; i < 3; ++i) {
543  point_pairs[i].should_consider = 1;
544  }
545 
546  return 1;
547  }
548 
549  for (iter = 0; iter < niters; ++iter) {
550  int found = get_subset(&deshake_ctx->alfg, point_pairs, num_point_pairs, pairs_subset, 10000);
551 
552  if (!found) {
553  if (iter == 0) {
554  return 0;
555  }
556 
557  break;
558  }
559 
560  run_estimate_kernel(pairs_subset, model);
561  good_count = find_inliers(point_pairs, num_point_pairs, model, deshake_ctx->ransac_err, threshold);
562 
563  if (good_count > FFMAX(max_good_count, 2)) {
564  for (int mi = 0; mi < 6; ++mi) {
565  best_model[mi] = model[mi];
566  }
567 
568  for (int pi = 0; pi < 3; pi++) {
569  best_pairs[pi] = pairs_subset[pi];
570  }
571 
572  max_good_count = good_count;
573  niters = ransac_update_num_iters(
574  confidence,
575  (double)(num_point_pairs - good_count) / num_point_pairs,
576  niters
577  );
578  }
579  }
580 
581  if (max_good_count > 0) {
582  for (int mi = 0; mi < 6; ++mi) {
583  model_out[mi] = best_model[mi];
584  }
585 
586  for (int pi = 0; pi < 3; ++pi) {
587  debug_matches->model_matches[pi] = best_pairs[pi];
588  }
589  debug_matches->num_model_matches = 3;
590 
591  // Find the inliers again for the best model for debugging
592  find_inliers(point_pairs, num_point_pairs, best_model, deshake_ctx->ransac_err, threshold);
593  result = 1;
594  }
595 
596  return result;
597 }
598 
599 // "Wiggles" the first point in best_pairs around a tiny bit in order to decrease the
600 // total error
601 static void optimize_model(
602  DeshakeOpenCLContext *deshake_ctx,
603  MotionVector *best_pairs,
604  MotionVector *inliers,
605  const int num_inliers,
606  float best_err,
607  double *model_out
608 ) {
609  float move_x_val = 0.01;
610  float move_y_val = 0.01;
611  int move_x = 1;
612  float old_move_x_val = 0;
613  double model[6];
614  int last_changed = 0;
615 
616  for (int iters = 0; iters < 200; iters++) {
617  float total_err = 0;
618 
619  if (move_x) {
620  best_pairs[0].p.p2.s[0] += move_x_val;
621  } else {
622  best_pairs[0].p.p2.s[0] += move_y_val;
623  }
624 
625  run_estimate_kernel(best_pairs, model);
626  compute_error(inliers, num_inliers, model, deshake_ctx->ransac_err);
627 
628  for (int j = 0; j < num_inliers; j++) {
629  total_err += deshake_ctx->ransac_err[j];
630  }
631 
632  if (total_err < best_err) {
633  for (int mi = 0; mi < 6; ++mi) {
634  model_out[mi] = model[mi];
635  }
636 
637  best_err = total_err;
638  last_changed = iters;
639  } else {
640  // Undo the change
641  if (move_x) {
642  best_pairs[0].p.p2.s[0] -= move_x_val;
643  } else {
644  best_pairs[0].p.p2.s[0] -= move_y_val;
645  }
646 
647  if (iters - last_changed > 4) {
648  // We've already improved the model as much as we can
649  break;
650  }
651 
652  old_move_x_val = move_x_val;
653 
654  if (move_x) {
655  move_x_val *= -1;
656  } else {
657  move_y_val *= -1;
658  }
659 
660  if (old_move_x_val < 0) {
661  move_x = 0;
662  } else {
663  move_x = 1;
664  }
665  }
666  }
667 }
668 
669 // Uses a process similar to that of RANSAC to find a transform that minimizes
670 // the total error for a set of point matches determined to be inliers
671 //
672 // (Pick random subsets, compute model, find total error, iterate until error
673 // is minimized.)
674 static int minimize_error(
675  DeshakeOpenCLContext *deshake_ctx,
676  MotionVector *inliers,
677  DebugMatches *debug_matches,
678  const int num_inliers,
679  double *model_out,
680  const int max_iters
681 ) {
682  int result = 0;
683  float best_err = FLT_MAX;
684  double best_model[6], model[6];
685  MotionVector pairs_subset[3], best_pairs[3];
686 
687  for (int i = 0; i < max_iters; i++) {
688  float total_err = 0;
689  int found = get_subset(&deshake_ctx->alfg, inliers, num_inliers, pairs_subset, 10000);
690 
691  if (!found) {
692  if (i == 0) {
693  return 0;
694  }
695 
696  break;
697  }
698 
699  run_estimate_kernel(pairs_subset, model);
700  compute_error(inliers, num_inliers, model, deshake_ctx->ransac_err);
701 
702  for (int j = 0; j < num_inliers; j++) {
703  total_err += deshake_ctx->ransac_err[j];
704  }
705 
706  if (total_err < best_err) {
707  for (int mi = 0; mi < 6; ++mi) {
708  best_model[mi] = model[mi];
709  }
710 
711  for (int pi = 0; pi < 3; pi++) {
712  best_pairs[pi] = pairs_subset[pi];
713  }
714 
715  best_err = total_err;
716  }
717  }
718 
719  for (int mi = 0; mi < 6; ++mi) {
720  model_out[mi] = best_model[mi];
721  }
722 
723  for (int pi = 0; pi < 3; ++pi) {
724  debug_matches->model_matches[pi] = best_pairs[pi];
725  }
726  debug_matches->num_model_matches = 3;
727  result = 1;
728 
729  optimize_model(deshake_ctx, best_pairs, inliers, num_inliers, best_err, model_out);
730  return result;
731 }
732 
733 // End code from OpenCV
734 
735 // Decomposes a similarity matrix into translation, rotation, scale, and skew
736 //
737 // See http://frederic-wang.fr/decomposition-of-2d-transform-matrices.html
738 static FrameDelta decompose_transform(double *model)
739 {
740  FrameDelta ret;
741 
742  double a = model[0];
743  double c = model[1];
744  double e = model[2];
745  double b = model[3];
746  double d = model[4];
747  double f = model[5];
748  double delta = a * d - b * c;
749 
750  memset(&ret, 0, sizeof(ret));
751 
752  ret.translation.s[0] = e;
753  ret.translation.s[1] = f;
754 
755  // This is the QR method
756  if (a != 0 || b != 0) {
757  double r = hypot(a, b);
758 
759  ret.rotation = FFSIGN(b) * acos(a / r);
760  ret.scale.s[0] = r;
761  ret.scale.s[1] = delta / r;
762  ret.skew.s[0] = atan((a * c + b * d) / (r * r));
763  ret.skew.s[1] = 0;
764  } else if (c != 0 || d != 0) {
765  double s = sqrt(c * c + d * d);
766 
767  ret.rotation = M_PI / 2 - FFSIGN(d) * acos(-c / s);
768  ret.scale.s[0] = delta / s;
769  ret.scale.s[1] = s;
770  ret.skew.s[0] = 0;
771  ret.skew.s[1] = atan((a * c + b * d) / (s * s));
772  } // otherwise there is only translation
773 
774  return ret;
775 }
776 
777 // Move valid vectors from the 2d buffer into a 1d buffer where they are contiguous
779  DeshakeOpenCLContext *deshake_ctx,
780  int size_y,
781  int size_x
782 ) {
783  int num_vectors = 0;
784 
785  for (int i = 0; i < size_y; ++i) {
786  for (int j = 0; j < size_x; ++j) {
787  MotionVector v = deshake_ctx->matches_host[j + i * size_x];
788 
789  if (v.should_consider) {
790  deshake_ctx->matches_contig_host[num_vectors] = v;
791  ++num_vectors;
792  }
793 
794  // Make sure we do not exceed the amount of space we allocated for these vectors
795  if (num_vectors == MATCHES_CONTIG_SIZE - 1) {
796  return num_vectors;
797  }
798  }
799  }
800  return num_vectors;
801 }
802 
803 // Returns the gaussian kernel value for the given x coordinate and sigma value
804 static float gaussian_for(int x, float sigma) {
805  return 1.0f / expf(((float)x * (float)x) / (2.0f * sigma * sigma));
806 }
807 
808 // Makes a normalized gaussian kernel of the given length for the given sigma
809 // and places it in gauss_kernel
810 static void make_gauss_kernel(float *gauss_kernel, float length, float sigma)
811 {
812  float gauss_sum = 0;
813  int window_half = length / 2;
814 
815  for (int i = 0; i < length; ++i) {
816  float val = gaussian_for(i - window_half, sigma);
817 
818  gauss_sum += val;
819  gauss_kernel[i] = val;
820  }
821 
822  // Normalize the gaussian values
823  for (int i = 0; i < length; ++i) {
824  gauss_kernel[i] /= gauss_sum;
825  }
826 }
827 
828 // Returns indices to start and end iteration at in order to iterate over a window
829 // of length size centered at the current frame in a ringbuffer
830 //
831 // Always returns numbers that result in a window of length size, even if that
832 // means specifying negative indices or indices past the end of the values in the
833 // ringbuffers. Make sure you clip indices appropriately within your loop.
834 static IterIndices start_end_for(DeshakeOpenCLContext *deshake_ctx, int length) {
835  IterIndices indices;
836 
837  indices.start = deshake_ctx->abs_motion.curr_frame_offset - (length / 2);
838  indices.end = deshake_ctx->abs_motion.curr_frame_offset + (length / 2) + (length % 2);
839 
840  return indices;
841 }
842 
843 // Sets val to the value in the given ringbuffer at the given offset, taking care of
844 // clipping the offset into the appropriate range
845 static void ringbuf_float_at(
846  DeshakeOpenCLContext *deshake_ctx,
847  AVFifo *values,
848  float *val,
849  int offset
850 ) {
851  int clip_start, clip_end, offset_clipped;
852  if (deshake_ctx->abs_motion.data_end_offset != -1) {
853  clip_end = deshake_ctx->abs_motion.data_end_offset;
854  } else {
855  // This expression represents the last valid index in the buffer,
856  // which we use repeatedly at the end of the video.
857  clip_end = deshake_ctx->smooth_window - av_fifo_can_write(values) - 1;
858  }
859 
860  if (deshake_ctx->abs_motion.data_start_offset != -1) {
861  clip_start = deshake_ctx->abs_motion.data_start_offset;
862  } else {
863  // Negative indices will occur at the start of the video, and we want
864  // them to be clipped to 0 in order to repeatedly use the position of
865  // the first frame.
866  clip_start = 0;
867  }
868 
869  offset_clipped = av_clip(
870  offset,
871  clip_start,
872  clip_end
873  );
874 
875  av_fifo_peek(values, val, 1, offset_clipped);
876 }
877 
878 // Returns smoothed current frame value of the given buffer of floats based on the
879 // given Gaussian kernel and its length (also the window length, centered around the
880 // current frame) and the "maximum value" of the motion.
881 //
882 // This "maximum value" should be the width / height of the image in the case of
883 // translation and an empirically chosen constant for rotation / scale.
884 //
885 // The sigma chosen to generate the final gaussian kernel with used to smooth the
886 // camera path is either hardcoded (set by user, deshake_ctx->smooth_percent) or
887 // adaptively chosen.
888 static float smooth(
889  DeshakeOpenCLContext *deshake_ctx,
890  float *gauss_kernel,
891  int length,
892  float max_val,
893  AVFifo *values
894 ) {
895  float new_large_s = 0, new_small_s = 0, new_best = 0, old, diff_between,
896  percent_of_max, inverted_percent;
897  IterIndices indices = start_end_for(deshake_ctx, length);
898  float large_sigma = 40.0f;
899  float small_sigma = 2.0f;
900  float best_sigma;
901 
902  if (deshake_ctx->smooth_percent) {
903  best_sigma = (large_sigma - 0.5f) * deshake_ctx->smooth_percent + 0.5f;
904  } else {
905  // Strategy to adaptively smooth trajectory:
906  //
907  // 1. Smooth path with large and small sigma values
908  // 2. Take the absolute value of the difference between them
909  // 3. Get a percentage by putting the difference over the "max value"
910  // 4, Invert the percentage
911  // 5. Calculate a new sigma value weighted towards the larger sigma value
912  // 6. Determine final smoothed trajectory value using that sigma
913 
914  make_gauss_kernel(gauss_kernel, length, large_sigma);
915  for (int i = indices.start, j = 0; i < indices.end; ++i, ++j) {
916  ringbuf_float_at(deshake_ctx, values, &old, i);
917  new_large_s += old * gauss_kernel[j];
918  }
919 
920  make_gauss_kernel(gauss_kernel, length, small_sigma);
921  for (int i = indices.start, j = 0; i < indices.end; ++i, ++j) {
922  ringbuf_float_at(deshake_ctx, values, &old, i);
923  new_small_s += old * gauss_kernel[j];
924  }
925 
926  diff_between = fabsf(new_large_s - new_small_s);
927  percent_of_max = diff_between / max_val;
928  inverted_percent = 1 - percent_of_max;
929  best_sigma = large_sigma * powf(inverted_percent, 40);
930  }
931 
932  make_gauss_kernel(gauss_kernel, length, best_sigma);
933  for (int i = indices.start, j = 0; i < indices.end; ++i, ++j) {
934  ringbuf_float_at(deshake_ctx, values, &old, i);
935  new_best += old * gauss_kernel[j];
936  }
937 
938  return new_best;
939 }
940 
941 // Returns the position of the given point after the transform is applied
942 static cl_float2 transformed_point(float x, float y, float *transform) {
943  cl_float2 ret;
944 
945  ret.s[0] = x * transform[0] + y * transform[1] + transform[2];
946  ret.s[1] = x * transform[3] + y * transform[4] + transform[5];
947 
948  return ret;
949 }
950 
951 // Creates an affine transform that scales from the center of a frame
953  float x_shift,
954  float y_shift,
955  float angle,
956  float scale_x,
957  float scale_y,
958  float center_w,
959  float center_h,
960  float *matrix
961 ) {
962  cl_float2 center_s;
963  float center_s_w, center_s_h;
964 
966  0,
967  0,
968  0,
969  scale_x,
970  scale_y,
971  matrix
972  );
973 
974  center_s = transformed_point(center_w, center_h, matrix);
975  center_s_w = center_w - center_s.s[0];
976  center_s_h = center_h - center_s.s[1];
977 
979  x_shift + center_s_w,
980  y_shift + center_s_h,
981  angle,
982  scale_x,
983  scale_y,
984  matrix
985  );
986 }
987 
988 // Determines the crop necessary to eliminate black borders from a smoothed frame
989 // and updates target crop accordingly
990 static void update_needed_crop(
991  CropInfo* crop,
992  float *transform,
993  float frame_width,
994  float frame_height
995 ) {
996  float new_width, new_height, adjusted_width, adjusted_height, adjusted_x, adjusted_y;
997 
998  cl_float2 top_left = transformed_point(0, 0, transform);
999  cl_float2 top_right = transformed_point(frame_width, 0, transform);
1000  cl_float2 bottom_left = transformed_point(0, frame_height, transform);
1001  cl_float2 bottom_right = transformed_point(frame_width, frame_height, transform);
1002  float ar_h = frame_height / frame_width;
1003  float ar_w = frame_width / frame_height;
1004 
1005  if (crop->bottom_right.s[0] == 0) {
1006  // The crop hasn't been set to the original size of the plane
1007  crop->bottom_right.s[0] = frame_width;
1008  crop->bottom_right.s[1] = frame_height;
1009  }
1010 
1011  crop->top_left.s[0] = FFMAX3(
1012  crop->top_left.s[0],
1013  top_left.s[0],
1014  bottom_left.s[0]
1015  );
1016 
1017  crop->top_left.s[1] = FFMAX3(
1018  crop->top_left.s[1],
1019  top_left.s[1],
1020  top_right.s[1]
1021  );
1022 
1023  crop->bottom_right.s[0] = FFMIN3(
1024  crop->bottom_right.s[0],
1025  bottom_right.s[0],
1026  top_right.s[0]
1027  );
1028 
1029  crop->bottom_right.s[1] = FFMIN3(
1030  crop->bottom_right.s[1],
1031  bottom_right.s[1],
1032  bottom_left.s[1]
1033  );
1034 
1035  // Make sure our potentially new bounding box has the same aspect ratio
1036  new_height = crop->bottom_right.s[1] - crop->top_left.s[1];
1037  new_width = crop->bottom_right.s[0] - crop->top_left.s[0];
1038 
1039  adjusted_width = new_height * ar_w;
1040  adjusted_x = crop->bottom_right.s[0] - adjusted_width;
1041 
1042  if (adjusted_x >= crop->top_left.s[0]) {
1043  crop->top_left.s[0] = adjusted_x;
1044  } else {
1045  adjusted_height = new_width * ar_h;
1046  adjusted_y = crop->bottom_right.s[1] - adjusted_height;
1047  crop->top_left.s[1] = adjusted_y;
1048  }
1049 }
1050 
1052 {
1053  DeshakeOpenCLContext *ctx = avctx->priv;
1054  cl_int cle;
1055 
1056  for (int i = 0; i < RingbufCount; i++)
1057  av_fifo_freep2(&ctx->abs_motion.ringbuffers[i]);
1058 
1059  if (ctx->debug_on)
1060  free_debug_matches(&ctx->abs_motion);
1061 
1062  if (ctx->gauss_kernel)
1063  av_freep(&ctx->gauss_kernel);
1064 
1065  if (ctx->ransac_err)
1066  av_freep(&ctx->ransac_err);
1067 
1068  if (ctx->matches_host)
1069  av_freep(&ctx->matches_host);
1070 
1071  if (ctx->matches_contig_host)
1072  av_freep(&ctx->matches_contig_host);
1073 
1074  if (ctx->inliers)
1075  av_freep(&ctx->inliers);
1076 
1077  ff_framequeue_free(&ctx->fq);
1078 
1079  CL_RELEASE_KERNEL(ctx->kernel_grayscale);
1080  CL_RELEASE_KERNEL(ctx->kernel_harris_response);
1081  CL_RELEASE_KERNEL(ctx->kernel_refine_features);
1082  CL_RELEASE_KERNEL(ctx->kernel_brief_descriptors);
1083  CL_RELEASE_KERNEL(ctx->kernel_match_descriptors);
1084  CL_RELEASE_KERNEL(ctx->kernel_crop_upscale);
1085  if (ctx->debug_on)
1086  CL_RELEASE_KERNEL(ctx->kernel_draw_debug_info);
1087 
1088  CL_RELEASE_QUEUE(ctx->command_queue);
1089 
1090  if (!ctx->is_yuv)
1091  CL_RELEASE_MEMORY(ctx->grayscale);
1092  CL_RELEASE_MEMORY(ctx->harris_buf);
1093  CL_RELEASE_MEMORY(ctx->refined_features);
1094  CL_RELEASE_MEMORY(ctx->prev_refined_features);
1095  CL_RELEASE_MEMORY(ctx->brief_pattern);
1096  CL_RELEASE_MEMORY(ctx->descriptors);
1097  CL_RELEASE_MEMORY(ctx->prev_descriptors);
1098  CL_RELEASE_MEMORY(ctx->matches);
1099  CL_RELEASE_MEMORY(ctx->matches_contig);
1100  CL_RELEASE_MEMORY(ctx->transform_y);
1101  CL_RELEASE_MEMORY(ctx->transform_uv);
1102  if (ctx->debug_on) {
1103  CL_RELEASE_MEMORY(ctx->debug_matches);
1104  CL_RELEASE_MEMORY(ctx->debug_model_matches);
1105  }
1106 
1107  ff_opencl_filter_uninit(avctx);
1108 }
1109 
1111 {
1112  DeshakeOpenCLContext *ctx = avctx->priv;
1113  AVFilterLink *outlink = avctx->outputs[0];
1114  AVFilterLink *inlink = avctx->inputs[0];
1115  // Pointer to the host-side pattern buffer to be initialized and then copied
1116  // to the GPU
1117  PointPair *pattern_host = NULL;
1118  cl_int cle;
1119  int err;
1120  cl_ulong8 zeroed_ulong8;
1121  FFFrameQueueGlobal fqg;
1122  cl_image_format grayscale_format;
1123  cl_image_desc grayscale_desc;
1124  cl_command_queue_properties queue_props;
1125 
1126  const enum AVPixelFormat disallowed_formats[14] = {
1141  };
1142 
1143  // Number of elements for an array
1144  const int image_grid_32 = ROUNDED_UP_DIV(outlink->h, 32) * ROUNDED_UP_DIV(outlink->w, 32);
1145 
1146  const int descriptor_buf_size = image_grid_32 * (BREIFN / 8);
1147  const int features_buf_size = image_grid_32 * sizeof(cl_float2);
1148 
1149  const AVHWFramesContext *hw_frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data;
1150  const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hw_frames_ctx->sw_format);
1151 
1152  av_assert0(hw_frames_ctx);
1153  av_assert0(desc);
1154 
1156  ff_framequeue_init(&ctx->fq, &fqg);
1157  ctx->eof = 0;
1158  ctx->smooth_window = (int)(av_q2d(avctx->inputs[0]->frame_rate) * ctx->smooth_window_multiplier);
1159  ctx->curr_frame = 0;
1160 
1161  memset(&zeroed_ulong8, 0, sizeof(cl_ulong8));
1162 
1163  ctx->gauss_kernel = av_malloc_array(ctx->smooth_window, sizeof(float));
1164  if (!ctx->gauss_kernel) {
1165  err = AVERROR(ENOMEM);
1166  goto fail;
1167  }
1168 
1169  ctx->ransac_err = av_malloc_array(MATCHES_CONTIG_SIZE, sizeof(float));
1170  if (!ctx->ransac_err) {
1171  err = AVERROR(ENOMEM);
1172  goto fail;
1173  }
1174 
1175  for (int i = 0; i < RingbufCount; i++) {
1176  ctx->abs_motion.ringbuffers[i] = av_fifo_alloc2(ctx->smooth_window,
1177  sizeof(float), 0);
1178 
1179  if (!ctx->abs_motion.ringbuffers[i]) {
1180  err = AVERROR(ENOMEM);
1181  goto fail;
1182  }
1183  }
1184 
1185  if (ctx->debug_on) {
1186  ctx->abs_motion.debug_matches = av_fifo_alloc2(
1187  ctx->smooth_window / 2,
1188  sizeof(DebugMatches), 0
1189  );
1190 
1191  if (!ctx->abs_motion.debug_matches) {
1192  err = AVERROR(ENOMEM);
1193  goto fail;
1194  }
1195  }
1196 
1197  ctx->abs_motion.curr_frame_offset = 0;
1198  ctx->abs_motion.data_start_offset = -1;
1199  ctx->abs_motion.data_end_offset = -1;
1200 
1201  pattern_host = av_malloc_array(BREIFN, sizeof(PointPair));
1202  if (!pattern_host) {
1203  err = AVERROR(ENOMEM);
1204  goto fail;
1205  }
1206 
1207  ctx->matches_host = av_malloc_array(image_grid_32, sizeof(MotionVector));
1208  if (!ctx->matches_host) {
1209  err = AVERROR(ENOMEM);
1210  goto fail;
1211  }
1212 
1213  ctx->matches_contig_host = av_malloc_array(MATCHES_CONTIG_SIZE, sizeof(MotionVector));
1214  if (!ctx->matches_contig_host) {
1215  err = AVERROR(ENOMEM);
1216  goto fail;
1217  }
1218 
1219  ctx->inliers = av_malloc_array(MATCHES_CONTIG_SIZE, sizeof(MotionVector));
1220  if (!ctx->inliers) {
1221  err = AVERROR(ENOMEM);
1222  goto fail;
1223  }
1224 
1225  // Initializing the patch pattern for building BREIF descriptors with
1226  av_lfg_init(&ctx->alfg, 234342424);
1227  for (int i = 0; i < BREIFN; ++i) {
1228  PointPair pair;
1229 
1230  for (int j = 0; j < 2; ++j) {
1231  pair.p1.s[j] = rand_in(-BRIEF_PATCH_SIZE_HALF, BRIEF_PATCH_SIZE_HALF + 1, &ctx->alfg);
1232  pair.p2.s[j] = rand_in(-BRIEF_PATCH_SIZE_HALF, BRIEF_PATCH_SIZE_HALF + 1, &ctx->alfg);
1233  }
1234 
1235  pattern_host[i] = pair;
1236  }
1237 
1238  for (int i = 0; i < 14; i++) {
1239  if (ctx->sw_format == disallowed_formats[i]) {
1240  av_log(avctx, AV_LOG_ERROR, "unsupported format in deshake_opencl.\n");
1241  err = AVERROR(ENOSYS);
1242  goto fail;
1243  }
1244  }
1245 
1246  if (desc->flags & AV_PIX_FMT_FLAG_RGB) {
1247  ctx->is_yuv = 0;
1248  } else {
1249  ctx->is_yuv = 1;
1250  }
1251  ctx->sw_format = hw_frames_ctx->sw_format;
1252 
1254  if (err < 0)
1255  goto fail;
1256 
1257  if (ctx->debug_on) {
1258  queue_props = CL_QUEUE_PROFILING_ENABLE;
1259  } else {
1260  queue_props = 0;
1261  }
1262  ctx->command_queue = clCreateCommandQueue(
1263  ctx->ocf.hwctx->context,
1264  ctx->ocf.hwctx->device_id,
1265  queue_props,
1266  &cle
1267  );
1268  CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create OpenCL command queue %d.\n", cle);
1269 
1270  CL_CREATE_KERNEL(ctx, grayscale);
1271  CL_CREATE_KERNEL(ctx, harris_response);
1272  CL_CREATE_KERNEL(ctx, refine_features);
1273  CL_CREATE_KERNEL(ctx, brief_descriptors);
1274  CL_CREATE_KERNEL(ctx, match_descriptors);
1276  CL_CREATE_KERNEL(ctx, crop_upscale);
1277  if (ctx->debug_on)
1278  CL_CREATE_KERNEL(ctx, draw_debug_info);
1279 
1280  if (!ctx->is_yuv) {
1281  grayscale_format.image_channel_order = CL_R;
1282  grayscale_format.image_channel_data_type = CL_FLOAT;
1283 
1284  grayscale_desc = (cl_image_desc) {
1285  .image_type = CL_MEM_OBJECT_IMAGE2D,
1286  .image_width = outlink->w,
1287  .image_height = outlink->h,
1288  .image_depth = 0,
1289  .image_array_size = 0,
1290  .image_row_pitch = 0,
1291  .image_slice_pitch = 0,
1292  .num_mip_levels = 0,
1293  .num_samples = 0,
1294  .buffer = NULL,
1295  };
1296 
1297  ctx->grayscale = clCreateImage(
1298  ctx->ocf.hwctx->context,
1299  0,
1300  &grayscale_format,
1301  &grayscale_desc,
1302  NULL,
1303  &cle
1304  );
1305  CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create grayscale image: %d.\n", cle);
1306  }
1307 
1308  CL_CREATE_BUFFER(ctx, harris_buf, outlink->h * outlink->w * sizeof(float));
1309  CL_CREATE_BUFFER(ctx, refined_features, features_buf_size);
1310  CL_CREATE_BUFFER(ctx, prev_refined_features, features_buf_size);
1312  ctx,
1313  brief_pattern,
1314  CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
1315  BREIFN * sizeof(PointPair),
1316  pattern_host
1317  );
1318  CL_CREATE_BUFFER(ctx, descriptors, descriptor_buf_size);
1319  CL_CREATE_BUFFER(ctx, prev_descriptors, descriptor_buf_size);
1320  CL_CREATE_BUFFER(ctx, matches, image_grid_32 * sizeof(MotionVector));
1321  CL_CREATE_BUFFER(ctx, matches_contig, MATCHES_CONTIG_SIZE * sizeof(MotionVector));
1322  CL_CREATE_BUFFER(ctx, transform_y, 9 * sizeof(float));
1323  CL_CREATE_BUFFER(ctx, transform_uv, 9 * sizeof(float));
1324  if (ctx->debug_on) {
1325  CL_CREATE_BUFFER(ctx, debug_matches, MATCHES_CONTIG_SIZE * sizeof(MotionVector));
1326  CL_CREATE_BUFFER(ctx, debug_model_matches, 3 * sizeof(MotionVector));
1327  }
1328 
1329  ctx->initialized = 1;
1330  av_freep(&pattern_host);
1331 
1332  return 0;
1333 
1334 fail:
1335  av_freep(&pattern_host);
1336  return err;
1337 }
1338 
1339 // Logs debug information about the transform data
1340 static void transform_debug(AVFilterContext *avctx, float *new_vals, float *old_vals, int curr_frame) {
1341  av_log(avctx, AV_LOG_VERBOSE,
1342  "Frame %d:\n"
1343  "\tframe moved from: %f x, %f y\n"
1344  "\t to: %f x, %f y\n"
1345  "\t rotated from: %f degrees\n"
1346  "\t to: %f degrees\n"
1347  "\t scaled from: %f x, %f y\n"
1348  "\t to: %f x, %f y\n"
1349  "\n"
1350  "\tframe moved by: %f x, %f y\n"
1351  "\t rotated by: %f degrees\n"
1352  "\t scaled by: %f x, %f y\n",
1353  curr_frame,
1354  old_vals[RingbufX], old_vals[RingbufY],
1355  new_vals[RingbufX], new_vals[RingbufY],
1356  old_vals[RingbufRot] * (180.0 / M_PI),
1357  new_vals[RingbufRot] * (180.0 / M_PI),
1358  old_vals[RingbufScaleX], old_vals[RingbufScaleY],
1359  new_vals[RingbufScaleX], new_vals[RingbufScaleY],
1360  old_vals[RingbufX] - new_vals[RingbufX], old_vals[RingbufY] - new_vals[RingbufY],
1361  old_vals[RingbufRot] * (180.0 / M_PI) - new_vals[RingbufRot] * (180.0 / M_PI),
1362  new_vals[RingbufScaleX] / old_vals[RingbufScaleX], new_vals[RingbufScaleY] / old_vals[RingbufScaleY]
1363  );
1364 }
1365 
1366 // Uses the buffered motion information to determine a transform that smooths the
1367 // given frame and applies it
1368 static int filter_frame(AVFilterLink *link, AVFrame *input_frame)
1369 {
1370  AVFilterContext *avctx = link->dst;
1371  AVFilterLink *outlink = avctx->outputs[0];
1372  DeshakeOpenCLContext *deshake_ctx = avctx->priv;
1373  AVFrame *cropped_frame = NULL, *transformed_frame = NULL;
1374  int err;
1375  cl_int cle;
1376  float new_vals[RingbufCount];
1377  float old_vals[RingbufCount];
1378  // Luma (in the case of YUV) transform, or just the transform in the case of RGB
1379  float transform_y[9];
1380  // Chroma transform
1381  float transform_uv[9];
1382  // Luma crop transform (or RGB)
1383  float transform_crop_y[9];
1384  // Chroma crop transform
1385  float transform_crop_uv[9];
1386  float transform_debug_rgb[9];
1387  size_t global_work[2];
1388  int64_t duration;
1389  cl_mem src, transformed, dst;
1390  cl_mem transforms[3];
1391  CropInfo crops[3];
1392  cl_event transform_event, crop_upscale_event;
1393  DebugMatches debug_matches;
1394  cl_int num_model_matches;
1395 
1396  const float center_w = (float)input_frame->width / 2;
1397  const float center_h = (float)input_frame->height / 2;
1398 
1399  const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(deshake_ctx->sw_format);
1400  const int chroma_width = AV_CEIL_RSHIFT(input_frame->width, desc->log2_chroma_w);
1401  const int chroma_height = AV_CEIL_RSHIFT(input_frame->height, desc->log2_chroma_h);
1402 
1403  const float center_w_chroma = (float)chroma_width / 2;
1404  const float center_h_chroma = (float)chroma_height / 2;
1405 
1406  const float luma_w_over_chroma_w = ((float)input_frame->width / (float)chroma_width);
1407  const float luma_h_over_chroma_h = ((float)input_frame->height / (float)chroma_height);
1408 
1409  if (deshake_ctx->debug_on) {
1410  av_fifo_read(
1411  deshake_ctx->abs_motion.debug_matches,
1412  &debug_matches, 1);
1413  }
1414 
1415 #if FF_API_PKT_DURATION
1417  if (input_frame->pkt_duration) {
1418  duration = input_frame->pkt_duration;
1419  } else
1421 #endif
1422  if (input_frame->duration) {
1423  duration = input_frame->duration;
1424  } else {
1425  duration = av_rescale_q(1, av_inv_q(outlink->frame_rate), outlink->time_base);
1426  }
1427  deshake_ctx->duration = input_frame->pts + duration;
1428 
1429  // Get the absolute transform data for this frame
1430  for (int i = 0; i < RingbufCount; i++) {
1431  av_fifo_peek(deshake_ctx->abs_motion.ringbuffers[i],
1432  &old_vals[i], 1,
1433  deshake_ctx->abs_motion.curr_frame_offset);
1434  }
1435 
1436  if (deshake_ctx->tripod_mode) {
1437  // If tripod mode is turned on we simply undo all motion relative to the
1438  // first frame
1439 
1440  new_vals[RingbufX] = 0.0f;
1441  new_vals[RingbufY] = 0.0f;
1442  new_vals[RingbufRot] = 0.0f;
1443  new_vals[RingbufScaleX] = 1.0f;
1444  new_vals[RingbufScaleY] = 1.0f;
1445  } else {
1446  // Tripod mode is off and we need to smooth a moving camera
1447 
1448  new_vals[RingbufX] = smooth(
1449  deshake_ctx,
1450  deshake_ctx->gauss_kernel,
1451  deshake_ctx->smooth_window,
1452  input_frame->width,
1453  deshake_ctx->abs_motion.ringbuffers[RingbufX]
1454  );
1455  new_vals[RingbufY] = smooth(
1456  deshake_ctx,
1457  deshake_ctx->gauss_kernel,
1458  deshake_ctx->smooth_window,
1459  input_frame->height,
1460  deshake_ctx->abs_motion.ringbuffers[RingbufY]
1461  );
1462  new_vals[RingbufRot] = smooth(
1463  deshake_ctx,
1464  deshake_ctx->gauss_kernel,
1465  deshake_ctx->smooth_window,
1466  M_PI / 4,
1467  deshake_ctx->abs_motion.ringbuffers[RingbufRot]
1468  );
1469  new_vals[RingbufScaleX] = smooth(
1470  deshake_ctx,
1471  deshake_ctx->gauss_kernel,
1472  deshake_ctx->smooth_window,
1473  2.0f,
1474  deshake_ctx->abs_motion.ringbuffers[RingbufScaleX]
1475  );
1476  new_vals[RingbufScaleY] = smooth(
1477  deshake_ctx,
1478  deshake_ctx->gauss_kernel,
1479  deshake_ctx->smooth_window,
1480  2.0f,
1481  deshake_ctx->abs_motion.ringbuffers[RingbufScaleY]
1482  );
1483  }
1484 
1486  old_vals[RingbufX] - new_vals[RingbufX],
1487  old_vals[RingbufY] - new_vals[RingbufY],
1488  old_vals[RingbufRot] - new_vals[RingbufRot],
1489  new_vals[RingbufScaleX] / old_vals[RingbufScaleX],
1490  new_vals[RingbufScaleY] / old_vals[RingbufScaleY],
1491  center_w,
1492  center_h,
1493  transform_y
1494  );
1495 
1497  (old_vals[RingbufX] - new_vals[RingbufX]) / luma_w_over_chroma_w,
1498  (old_vals[RingbufY] - new_vals[RingbufY]) / luma_h_over_chroma_h,
1499  old_vals[RingbufRot] - new_vals[RingbufRot],
1500  new_vals[RingbufScaleX] / old_vals[RingbufScaleX],
1501  new_vals[RingbufScaleY] / old_vals[RingbufScaleY],
1502  center_w_chroma,
1503  center_h_chroma,
1504  transform_uv
1505  );
1506 
1507  CL_BLOCKING_WRITE_BUFFER(deshake_ctx->command_queue, deshake_ctx->transform_y, 9 * sizeof(float), transform_y, NULL);
1508  CL_BLOCKING_WRITE_BUFFER(deshake_ctx->command_queue, deshake_ctx->transform_uv, 9 * sizeof(float), transform_uv, NULL);
1509 
1510  if (deshake_ctx->debug_on)
1511  transform_debug(avctx, new_vals, old_vals, deshake_ctx->curr_frame);
1512 
1513  cropped_frame = ff_get_video_buffer(outlink, outlink->w, outlink->h);
1514  if (!cropped_frame) {
1515  err = AVERROR(ENOMEM);
1516  goto fail;
1517  }
1518 
1519  transformed_frame = ff_get_video_buffer(outlink, outlink->w, outlink->h);
1520  if (!transformed_frame) {
1521  err = AVERROR(ENOMEM);
1522  goto fail;
1523  }
1524 
1525  transforms[0] = deshake_ctx->transform_y;
1526  transforms[1] = transforms[2] = deshake_ctx->transform_uv;
1527 
1528  for (int p = 0; p < FF_ARRAY_ELEMS(transformed_frame->data); p++) {
1529  // Transform all of the planes appropriately
1530  src = (cl_mem)input_frame->data[p];
1531  transformed = (cl_mem)transformed_frame->data[p];
1532 
1533  if (!transformed)
1534  break;
1535 
1536  err = ff_opencl_filter_work_size_from_image(avctx, global_work, input_frame, p, 0);
1537  if (err < 0)
1538  goto fail;
1539 
1541  deshake_ctx->command_queue,
1542  deshake_ctx->kernel_transform,
1543  global_work,
1544  NULL,
1545  &transform_event,
1546  { sizeof(cl_mem), &src },
1547  { sizeof(cl_mem), &transformed },
1548  { sizeof(cl_mem), &transforms[p] },
1549  );
1550  }
1551 
1552  if (deshake_ctx->debug_on && !deshake_ctx->is_yuv && debug_matches.num_matches > 0) {
1554  deshake_ctx->command_queue,
1555  deshake_ctx->debug_matches,
1556  debug_matches.num_matches * sizeof(MotionVector),
1557  debug_matches.matches,
1558  NULL
1559  );
1560 
1562  deshake_ctx->command_queue,
1563  deshake_ctx->debug_model_matches,
1564  debug_matches.num_model_matches * sizeof(MotionVector),
1565  debug_matches.model_matches,
1566  NULL
1567  );
1568 
1569  num_model_matches = debug_matches.num_model_matches;
1570 
1571  // Invert the transform
1573  new_vals[RingbufX] - old_vals[RingbufX],
1574  new_vals[RingbufY] - old_vals[RingbufY],
1575  new_vals[RingbufRot] - old_vals[RingbufRot],
1576  old_vals[RingbufScaleX] / new_vals[RingbufScaleX],
1577  old_vals[RingbufScaleY] / new_vals[RingbufScaleY],
1578  center_w,
1579  center_h,
1580  transform_debug_rgb
1581  );
1582 
1583  CL_BLOCKING_WRITE_BUFFER(deshake_ctx->command_queue, deshake_ctx->transform_y, 9 * sizeof(float), transform_debug_rgb, NULL);
1584 
1585  transformed = (cl_mem)transformed_frame->data[0];
1587  deshake_ctx->command_queue,
1588  deshake_ctx->kernel_draw_debug_info,
1589  (size_t[]){ debug_matches.num_matches },
1590  NULL,
1591  NULL,
1592  { sizeof(cl_mem), &transformed },
1593  { sizeof(cl_mem), &deshake_ctx->debug_matches },
1594  { sizeof(cl_mem), &deshake_ctx->debug_model_matches },
1595  { sizeof(cl_int), &num_model_matches },
1596  { sizeof(cl_mem), &deshake_ctx->transform_y }
1597  );
1598  }
1599 
1600  if (deshake_ctx->should_crop) {
1601  // Generate transforms for cropping
1603  (old_vals[RingbufX] - new_vals[RingbufX]) / 5,
1604  (old_vals[RingbufY] - new_vals[RingbufY]) / 5,
1605  (old_vals[RingbufRot] - new_vals[RingbufRot]) / 5,
1606  new_vals[RingbufScaleX] / old_vals[RingbufScaleX],
1607  new_vals[RingbufScaleY] / old_vals[RingbufScaleY],
1608  center_w,
1609  center_h,
1610  transform_crop_y
1611  );
1612  update_needed_crop(&deshake_ctx->crop_y, transform_crop_y, input_frame->width, input_frame->height);
1613 
1615  (old_vals[RingbufX] - new_vals[RingbufX]) / (5 * luma_w_over_chroma_w),
1616  (old_vals[RingbufY] - new_vals[RingbufY]) / (5 * luma_h_over_chroma_h),
1617  (old_vals[RingbufRot] - new_vals[RingbufRot]) / 5,
1618  new_vals[RingbufScaleX] / old_vals[RingbufScaleX],
1619  new_vals[RingbufScaleY] / old_vals[RingbufScaleY],
1620  center_w_chroma,
1621  center_h_chroma,
1622  transform_crop_uv
1623  );
1624  update_needed_crop(&deshake_ctx->crop_uv, transform_crop_uv, chroma_width, chroma_height);
1625 
1626  crops[0] = deshake_ctx->crop_y;
1627  crops[1] = crops[2] = deshake_ctx->crop_uv;
1628 
1629  for (int p = 0; p < FF_ARRAY_ELEMS(cropped_frame->data); p++) {
1630  // Crop all of the planes appropriately
1631  dst = (cl_mem)cropped_frame->data[p];
1632  transformed = (cl_mem)transformed_frame->data[p];
1633 
1634  if (!dst)
1635  break;
1636 
1637  err = ff_opencl_filter_work_size_from_image(avctx, global_work, input_frame, p, 0);
1638  if (err < 0)
1639  goto fail;
1640 
1642  deshake_ctx->command_queue,
1643  deshake_ctx->kernel_crop_upscale,
1644  global_work,
1645  NULL,
1646  &crop_upscale_event,
1647  { sizeof(cl_mem), &transformed },
1648  { sizeof(cl_mem), &dst },
1649  { sizeof(cl_float2), &crops[p].top_left },
1650  { sizeof(cl_float2), &crops[p].bottom_right },
1651  );
1652  }
1653  }
1654 
1655  if (deshake_ctx->curr_frame < deshake_ctx->smooth_window / 2) {
1656  // This means we are somewhere at the start of the video. We need to
1657  // increment the current frame offset until it reaches the center of
1658  // the ringbuffers (as the current frame will be located there for
1659  // the rest of the video).
1660  //
1661  // The end of the video is taken care of by draining motion data
1662  // one-by-one out of the buffer, causing the (at that point fixed)
1663  // offset to move towards later frames' data.
1664  ++deshake_ctx->abs_motion.curr_frame_offset;
1665  }
1666 
1667  if (deshake_ctx->abs_motion.data_end_offset != -1) {
1668  // Keep the end offset in sync with the frame it's supposed to be
1669  // positioned at
1670  --deshake_ctx->abs_motion.data_end_offset;
1671 
1672  if (deshake_ctx->abs_motion.data_end_offset == deshake_ctx->abs_motion.curr_frame_offset - 1) {
1673  // The end offset would be the start of the new video sequence; flip to
1674  // start offset
1675  deshake_ctx->abs_motion.data_end_offset = -1;
1676  deshake_ctx->abs_motion.data_start_offset = deshake_ctx->abs_motion.curr_frame_offset;
1677  }
1678  } else if (deshake_ctx->abs_motion.data_start_offset != -1) {
1679  // Keep the start offset in sync with the frame it's supposed to be
1680  // positioned at
1681  --deshake_ctx->abs_motion.data_start_offset;
1682  }
1683 
1684  if (deshake_ctx->debug_on) {
1685  deshake_ctx->transform_time += ff_opencl_get_event_time(transform_event);
1686  if (deshake_ctx->should_crop) {
1687  deshake_ctx->crop_upscale_time += ff_opencl_get_event_time(crop_upscale_event);
1688  }
1689  }
1690 
1691  ++deshake_ctx->curr_frame;
1692 
1693  if (deshake_ctx->debug_on)
1694  av_freep(&debug_matches.matches);
1695 
1696  if (deshake_ctx->should_crop) {
1697  err = av_frame_copy_props(cropped_frame, input_frame);
1698  if (err < 0)
1699  goto fail;
1700 
1701  av_frame_free(&transformed_frame);
1702  av_frame_free(&input_frame);
1703  return ff_filter_frame(outlink, cropped_frame);
1704 
1705  } else {
1706  err = av_frame_copy_props(transformed_frame, input_frame);
1707  if (err < 0)
1708  goto fail;
1709 
1710  av_frame_free(&cropped_frame);
1711  av_frame_free(&input_frame);
1712  return ff_filter_frame(outlink, transformed_frame);
1713  }
1714 
1715 fail:
1716  clFinish(deshake_ctx->command_queue);
1717 
1718  if (deshake_ctx->debug_on)
1719  if (debug_matches.matches)
1720  av_freep(&debug_matches.matches);
1721 
1722  av_frame_free(&input_frame);
1723  av_frame_free(&transformed_frame);
1724  av_frame_free(&cropped_frame);
1725  return err;
1726 }
1727 
1728 // Add the given frame to the frame queue to eventually be processed.
1729 //
1730 // Also determines the motion from the previous frame and updates the stored
1731 // motion information accordingly.
1732 static int queue_frame(AVFilterLink *link, AVFrame *input_frame)
1733 {
1734  AVFilterContext *avctx = link->dst;
1735  DeshakeOpenCLContext *deshake_ctx = avctx->priv;
1736  int err;
1737  int num_vectors;
1738  int num_inliers = 0;
1739  cl_int cle;
1740  FrameDelta relative;
1741  SimilarityMatrix model;
1742  size_t global_work[2];
1743  size_t harris_global_work[2];
1744  size_t grid_32_global_work[2];
1745  int grid_32_h, grid_32_w;
1746  size_t local_work[2];
1747  cl_mem src, temp;
1748  float prev_vals[5];
1749  float new_vals[5];
1750  cl_event grayscale_event, harris_response_event, refine_features_event,
1751  brief_event, match_descriptors_event, read_buf_event;
1752  DebugMatches debug_matches;
1753 
1754  num_vectors = 0;
1755 
1756  local_work[0] = 8;
1757  local_work[1] = 8;
1758 
1759  err = ff_opencl_filter_work_size_from_image(avctx, global_work, input_frame, 0, 0);
1760  if (err < 0)
1761  goto fail;
1762 
1763  err = ff_opencl_filter_work_size_from_image(avctx, harris_global_work, input_frame, 0, 8);
1764  if (err < 0)
1765  goto fail;
1766 
1767  err = ff_opencl_filter_work_size_from_image(avctx, grid_32_global_work, input_frame, 0, 32);
1768  if (err < 0)
1769  goto fail;
1770 
1771  // We want a single work-item for each 32x32 block of pixels in the input frame
1772  grid_32_global_work[0] /= 32;
1773  grid_32_global_work[1] /= 32;
1774 
1775  grid_32_h = ROUNDED_UP_DIV(input_frame->height, 32);
1776  grid_32_w = ROUNDED_UP_DIV(input_frame->width, 32);
1777 
1778  if (deshake_ctx->is_yuv) {
1779  deshake_ctx->grayscale = (cl_mem)input_frame->data[0];
1780  } else {
1781  src = (cl_mem)input_frame->data[0];
1782 
1784  deshake_ctx->command_queue,
1785  deshake_ctx->kernel_grayscale,
1786  global_work,
1787  NULL,
1788  &grayscale_event,
1789  { sizeof(cl_mem), &src },
1790  { sizeof(cl_mem), &deshake_ctx->grayscale }
1791  );
1792  }
1793 
1795  deshake_ctx->command_queue,
1796  deshake_ctx->kernel_harris_response,
1797  harris_global_work,
1798  local_work,
1799  &harris_response_event,
1800  { sizeof(cl_mem), &deshake_ctx->grayscale },
1801  { sizeof(cl_mem), &deshake_ctx->harris_buf }
1802  );
1803 
1805  deshake_ctx->command_queue,
1806  deshake_ctx->kernel_refine_features,
1807  grid_32_global_work,
1808  NULL,
1809  &refine_features_event,
1810  { sizeof(cl_mem), &deshake_ctx->grayscale },
1811  { sizeof(cl_mem), &deshake_ctx->harris_buf },
1812  { sizeof(cl_mem), &deshake_ctx->refined_features },
1813  { sizeof(cl_int), &deshake_ctx->refine_features }
1814  );
1815 
1817  deshake_ctx->command_queue,
1818  deshake_ctx->kernel_brief_descriptors,
1819  grid_32_global_work,
1820  NULL,
1821  &brief_event,
1822  { sizeof(cl_mem), &deshake_ctx->grayscale },
1823  { sizeof(cl_mem), &deshake_ctx->refined_features },
1824  { sizeof(cl_mem), &deshake_ctx->descriptors },
1825  { sizeof(cl_mem), &deshake_ctx->brief_pattern}
1826  );
1827 
1828  if (!av_fifo_can_read(deshake_ctx->abs_motion.ringbuffers[RingbufX])) {
1829  // This is the first frame we've been given to queue, meaning there is
1830  // no previous frame to match descriptors to
1831 
1832  goto no_motion_data;
1833  }
1834 
1836  deshake_ctx->command_queue,
1837  deshake_ctx->kernel_match_descriptors,
1838  grid_32_global_work,
1839  NULL,
1840  &match_descriptors_event,
1841  { sizeof(cl_mem), &deshake_ctx->prev_refined_features },
1842  { sizeof(cl_mem), &deshake_ctx->refined_features },
1843  { sizeof(cl_mem), &deshake_ctx->descriptors },
1844  { sizeof(cl_mem), &deshake_ctx->prev_descriptors },
1845  { sizeof(cl_mem), &deshake_ctx->matches }
1846  );
1847 
1848  cle = clEnqueueReadBuffer(
1849  deshake_ctx->command_queue,
1850  deshake_ctx->matches,
1851  CL_TRUE,
1852  0,
1853  grid_32_h * grid_32_w * sizeof(MotionVector),
1854  deshake_ctx->matches_host,
1855  0,
1856  NULL,
1857  &read_buf_event
1858  );
1859  CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to read matches to host: %d.\n", cle);
1860 
1861  num_vectors = make_vectors_contig(deshake_ctx, grid_32_h, grid_32_w);
1862 
1863  if (num_vectors < 10) {
1864  // Not enough matches to get reliable motion data for this frame
1865  //
1866  // From this point on all data is relative to this frame rather than the
1867  // original frame. We have to make sure that we don't mix values that were
1868  // relative to the original frame with the new values relative to this
1869  // frame when doing the gaussian smoothing. We keep track of where the old
1870  // values end using this data_end_offset field in order to accomplish
1871  // that goal.
1872  //
1873  // If no motion data is present for multiple frames in a short window of
1874  // time, we leave the end where it was to avoid mixing 0s in with the
1875  // old data (and just treat them all as part of the new values)
1876  if (deshake_ctx->abs_motion.data_end_offset == -1) {
1877  deshake_ctx->abs_motion.data_end_offset =
1878  av_fifo_can_read(deshake_ctx->abs_motion.ringbuffers[RingbufX]) - 1;
1879  }
1880 
1881  goto no_motion_data;
1882  }
1883 
1884  if (!estimate_affine_2d(
1885  deshake_ctx,
1886  deshake_ctx->matches_contig_host,
1887  &debug_matches,
1888  num_vectors,
1889  model.matrix,
1890  10.0,
1891  3000,
1892  0.999999999999
1893  )) {
1894  goto no_motion_data;
1895  }
1896 
1897  for (int i = 0; i < num_vectors; i++) {
1898  if (deshake_ctx->matches_contig_host[i].should_consider) {
1899  deshake_ctx->inliers[num_inliers] = deshake_ctx->matches_contig_host[i];
1900  num_inliers++;
1901  }
1902  }
1903 
1904  if (!minimize_error(
1905  deshake_ctx,
1906  deshake_ctx->inliers,
1907  &debug_matches,
1908  num_inliers,
1909  model.matrix,
1910  400
1911  )) {
1912  goto no_motion_data;
1913  }
1914 
1915 
1916  relative = decompose_transform(model.matrix);
1917 
1918  // Get the absolute transform data for the previous frame
1919  for (int i = 0; i < RingbufCount; i++) {
1920  av_fifo_peek(
1921  deshake_ctx->abs_motion.ringbuffers[i],
1922  &prev_vals[i], 1,
1923  av_fifo_can_read(deshake_ctx->abs_motion.ringbuffers[i]) - 1);
1924  }
1925 
1926  new_vals[RingbufX] = prev_vals[RingbufX] + relative.translation.s[0];
1927  new_vals[RingbufY] = prev_vals[RingbufY] + relative.translation.s[1];
1928  new_vals[RingbufRot] = prev_vals[RingbufRot] + relative.rotation;
1929  new_vals[RingbufScaleX] = prev_vals[RingbufScaleX] / relative.scale.s[0];
1930  new_vals[RingbufScaleY] = prev_vals[RingbufScaleY] / relative.scale.s[1];
1931 
1932  if (deshake_ctx->debug_on) {
1933  if (!deshake_ctx->is_yuv) {
1934  deshake_ctx->grayscale_time += ff_opencl_get_event_time(grayscale_event);
1935  }
1936  deshake_ctx->harris_response_time += ff_opencl_get_event_time(harris_response_event);
1937  deshake_ctx->refine_features_time += ff_opencl_get_event_time(refine_features_event);
1938  deshake_ctx->brief_descriptors_time += ff_opencl_get_event_time(brief_event);
1939  deshake_ctx->match_descriptors_time += ff_opencl_get_event_time(match_descriptors_event);
1940  deshake_ctx->read_buf_time += ff_opencl_get_event_time(read_buf_event);
1941  }
1942 
1943  goto end;
1944 
1945 no_motion_data:
1946  new_vals[RingbufX] = 0.0f;
1947  new_vals[RingbufY] = 0.0f;
1948  new_vals[RingbufRot] = 0.0f;
1949  new_vals[RingbufScaleX] = 1.0f;
1950  new_vals[RingbufScaleY] = 1.0f;
1951 
1952  for (int i = 0; i < num_vectors; i++) {
1953  deshake_ctx->matches_contig_host[i].should_consider = 0;
1954  }
1955  debug_matches.num_model_matches = 0;
1956 
1957  if (deshake_ctx->debug_on) {
1958  av_log(avctx, AV_LOG_VERBOSE,
1959  "\n[ALERT] No motion data found in queue_frame, motion reset to 0\n\n"
1960  );
1961  }
1962 
1963  goto end;
1964 
1965 end:
1966  // Swap the descriptor buffers (we don't need the previous frame's descriptors
1967  // again so we will use that space for the next frame's descriptors)
1968  temp = deshake_ctx->prev_descriptors;
1969  deshake_ctx->prev_descriptors = deshake_ctx->descriptors;
1970  deshake_ctx->descriptors = temp;
1971 
1972  // Same for the refined features
1973  temp = deshake_ctx->prev_refined_features;
1974  deshake_ctx->prev_refined_features = deshake_ctx->refined_features;
1975  deshake_ctx->refined_features = temp;
1976 
1977  if (deshake_ctx->debug_on) {
1978  if (num_vectors == 0) {
1979  debug_matches.matches = NULL;
1980  } else {
1981  debug_matches.matches = av_malloc_array(num_vectors, sizeof(MotionVector));
1982 
1983  if (!debug_matches.matches) {
1984  err = AVERROR(ENOMEM);
1985  goto fail;
1986  }
1987  }
1988 
1989  for (int i = 0; i < num_vectors; i++) {
1990  debug_matches.matches[i] = deshake_ctx->matches_contig_host[i];
1991  }
1992  debug_matches.num_matches = num_vectors;
1993 
1994  av_fifo_write(
1995  deshake_ctx->abs_motion.debug_matches,
1996  &debug_matches, 1);
1997  }
1998 
1999  for (int i = 0; i < RingbufCount; i++) {
2000  av_fifo_write(deshake_ctx->abs_motion.ringbuffers[i], &new_vals[i], 1);
2001  }
2002 
2003  return ff_framequeue_add(&deshake_ctx->fq, input_frame);
2004 
2005 fail:
2006  clFinish(deshake_ctx->command_queue);
2007  av_frame_free(&input_frame);
2008  return err;
2009 }
2010 
2012 {
2013  AVFilterLink *inlink = ctx->inputs[0];
2014  AVFilterLink *outlink = ctx->outputs[0];
2015  DeshakeOpenCLContext *deshake_ctx = ctx->priv;
2016  AVFrame *frame = NULL;
2017  int ret, status;
2018  int64_t pts;
2019 
2021 
2022  if (!deshake_ctx->eof) {
2024  if (ret < 0)
2025  return ret;
2026  if (ret > 0) {
2027  if (!frame->hw_frames_ctx)
2028  return AVERROR(EINVAL);
2029 
2030  if (!deshake_ctx->initialized) {
2032  if (ret < 0)
2033  return ret;
2034  }
2035 
2036  // If there is no more space in the ringbuffers, remove the oldest
2037  // values to make room for the new ones
2038  if (!av_fifo_can_write(deshake_ctx->abs_motion.ringbuffers[RingbufX])) {
2039  for (int i = 0; i < RingbufCount; i++) {
2040  av_fifo_drain2(deshake_ctx->abs_motion.ringbuffers[i], 1);
2041  }
2042  }
2044  if (ret < 0)
2045  return ret;
2046  if (ret >= 0) {
2047  // See if we have enough buffered frames to process one
2048  //
2049  // "enough" is half the smooth window of queued frames into the future
2050  if (ff_framequeue_queued_frames(&deshake_ctx->fq) >= deshake_ctx->smooth_window / 2) {
2051  return filter_frame(inlink, ff_framequeue_take(&deshake_ctx->fq));
2052  }
2053  }
2054  }
2055  }
2056 
2057  if (!deshake_ctx->eof && ff_inlink_acknowledge_status(inlink, &status, &pts)) {
2058  if (status == AVERROR_EOF) {
2059  deshake_ctx->eof = 1;
2060  }
2061  }
2062 
2063  if (deshake_ctx->eof) {
2064  // Finish processing the rest of the frames in the queue.
2065  while(ff_framequeue_queued_frames(&deshake_ctx->fq) != 0) {
2066  for (int i = 0; i < RingbufCount; i++) {
2067  av_fifo_drain2(deshake_ctx->abs_motion.ringbuffers[i], 1);
2068  }
2069 
2070  ret = filter_frame(inlink, ff_framequeue_take(&deshake_ctx->fq));
2071  if (ret < 0) {
2072  return ret;
2073  }
2074  }
2075 
2076  if (deshake_ctx->debug_on) {
2078  "Average kernel execution times:\n"
2079  "\t grayscale: %0.3f ms\n"
2080  "\t harris_response: %0.3f ms\n"
2081  "\t refine_features: %0.3f ms\n"
2082  "\tbrief_descriptors: %0.3f ms\n"
2083  "\tmatch_descriptors: %0.3f ms\n"
2084  "\t transform: %0.3f ms\n"
2085  "\t crop_upscale: %0.3f ms\n"
2086  "Average buffer read times:\n"
2087  "\t features buf: %0.3f ms\n",
2088  averaged_event_time_ms(deshake_ctx->grayscale_time, deshake_ctx->curr_frame),
2089  averaged_event_time_ms(deshake_ctx->harris_response_time, deshake_ctx->curr_frame),
2090  averaged_event_time_ms(deshake_ctx->refine_features_time, deshake_ctx->curr_frame),
2091  averaged_event_time_ms(deshake_ctx->brief_descriptors_time, deshake_ctx->curr_frame),
2092  averaged_event_time_ms(deshake_ctx->match_descriptors_time, deshake_ctx->curr_frame),
2093  averaged_event_time_ms(deshake_ctx->transform_time, deshake_ctx->curr_frame),
2094  averaged_event_time_ms(deshake_ctx->crop_upscale_time, deshake_ctx->curr_frame),
2095  averaged_event_time_ms(deshake_ctx->read_buf_time, deshake_ctx->curr_frame)
2096  );
2097  }
2098 
2099  ff_outlink_set_status(outlink, AVERROR_EOF, deshake_ctx->duration);
2100  return 0;
2101  }
2102 
2103  if (!deshake_ctx->eof) {
2104  FF_FILTER_FORWARD_WANTED(outlink, inlink);
2105  }
2106 
2107  return FFERROR_NOT_READY;
2108 }
2109 
2111  {
2112  .name = "default",
2113  .type = AVMEDIA_TYPE_VIDEO,
2114  .config_props = &ff_opencl_filter_config_input,
2115  },
2116 };
2117 
2119  {
2120  .name = "default",
2121  .type = AVMEDIA_TYPE_VIDEO,
2122  .config_props = &ff_opencl_filter_config_output,
2123  },
2124 };
2125 
2126 #define OFFSET(x) offsetof(DeshakeOpenCLContext, x)
2127 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
2128 
2130  {
2131  "tripod", "simulates a tripod by preventing any camera movement whatsoever "
2132  "from the original frame",
2133  OFFSET(tripod_mode), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, FLAGS
2134  },
2135  {
2136  "debug", "turn on additional debugging information",
2137  OFFSET(debug_on), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, FLAGS
2138  },
2139  {
2140  "adaptive_crop", "attempt to subtly crop borders to reduce mirrored content",
2141  OFFSET(should_crop), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, FLAGS
2142  },
2143  {
2144  "refine_features", "refine feature point locations at a sub-pixel level",
2145  OFFSET(refine_features), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, FLAGS
2146  },
2147  {
2148  "smooth_strength", "smoothing strength (0 attempts to adaptively determine optimal strength)",
2149  OFFSET(smooth_percent), AV_OPT_TYPE_FLOAT, {.dbl = 0.0f}, 0.0f, 1.0f, FLAGS
2150  },
2151  {
2152  "smooth_window_multiplier", "multiplier for number of frames to buffer for motion data",
2153  OFFSET(smooth_window_multiplier), AV_OPT_TYPE_FLOAT, {.dbl = 2.0}, 0.1, 10.0, FLAGS
2154  },
2155  { NULL }
2156 };
2157 
2158 AVFILTER_DEFINE_CLASS(deshake_opencl);
2159 
2161  .name = "deshake_opencl",
2162  .description = NULL_IF_CONFIG_SMALL("Feature-point based video stabilization filter"),
2163  .priv_size = sizeof(DeshakeOpenCLContext),
2164  .priv_class = &deshake_opencl_class,
2167  .activate = activate,
2171  .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
2172  .flags = AVFILTER_FLAG_HWDEVICE,
2173 };
deshake_opencl_inputs
static const AVFilterPad deshake_opencl_inputs[]
Definition: vf_deshake_opencl.c:2110
AVFILTER_DEFINE_CLASS
AVFILTER_DEFINE_CLASS(deshake_opencl)
MATCHES_CONTIG_SIZE
#define MATCHES_CONTIG_SIZE
Definition: vf_deshake_opencl.c:92
ff_get_video_buffer
AVFrame * ff_get_video_buffer(AVFilterLink *link, int w, int h)
Request a picture buffer with a specific set of permissions.
Definition: video.c:108
CL_RUN_KERNEL_WITH_ARGS
#define CL_RUN_KERNEL_WITH_ARGS(queue, kernel, global_work_size, local_work_size, event,...)
Uses the above macro to enqueue the given kernel and then additionally runs it to completion via clFi...
Definition: opencl.h:180
av_fifo_drain2
void av_fifo_drain2(AVFifo *f, size_t size)
Discard the specified amount of data from an AVFifo.
Definition: fifo.c:266
FF_ENABLE_DEPRECATION_WARNINGS
#define FF_ENABLE_DEPRECATION_WARNINGS
Definition: internal.h:73
F2
#define F2(l, r, i)
Definition: cast5.c:44
AVPixelFormat
AVPixelFormat
Pixel format.
Definition: pixfmt.h:64
av_fifo_can_write
size_t av_fifo_can_write(const AVFifo *f)
Definition: fifo.c:94
free_debug_matches
static void free_debug_matches(AbsoluteFrameMotion *afm)
Definition: vf_deshake_opencl.c:151
FrameDelta::translation
cl_float2 translation
Definition: vf_deshake_opencl.c:166
av_clip
#define av_clip
Definition: common.h:96
IterIndices
Definition: vf_deshake_opencl.c:186
r
const char * r
Definition: vf_curves.c:126
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
opt.h
DeshakeOpenCLContext::brief_pattern
cl_mem brief_pattern
Definition: vf_deshake_opencl.c:256
deshake_opencl_init
static int deshake_opencl_init(AVFilterContext *avctx)
Definition: vf_deshake_opencl.c:1110
estimate_affine_2d
static int estimate_affine_2d(DeshakeOpenCLContext *deshake_ctx, MotionVector *point_pairs, DebugMatches *debug_matches, const int num_point_pairs, double *model_out, const double threshold, const int max_iters, const double confidence)
Definition: vf_deshake_opencl.c:518
minimize_error
static int minimize_error(DeshakeOpenCLContext *deshake_ctx, MotionVector *inliers, DebugMatches *debug_matches, const int num_inliers, double *model_out, const int max_iters)
Definition: vf_deshake_opencl.c:674
OFFSET
#define OFFSET(x)
Definition: vf_deshake_opencl.c:2126
av_lfg_init
av_cold void av_lfg_init(AVLFG *c, unsigned int seed)
Definition: lfg.c:32
FF_FILTER_FLAG_HWFRAME_AWARE
#define FF_FILTER_FLAG_HWFRAME_AWARE
The filter is aware of hardware frames, and any hardware frame context should not be automatically pr...
Definition: internal.h:364
AV_PIX_FMT_GBRP16BE
@ AV_PIX_FMT_GBRP16BE
planar GBR 4:4:4 48bpp, big-endian
Definition: pixfmt.h:164
DeshakeOpenCLContext::refine_features_time
unsigned long long refine_features_time
Definition: vf_deshake_opencl.c:293
ff_filter_frame
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:978
AbsoluteFrameMotion
Definition: vf_deshake_opencl.c:134
AVFrame::duration
int64_t duration
Duration of the frame, in the same units as pts.
Definition: frame.h:807
AV_PIX_FMT_GBRP10BE
@ AV_PIX_FMT_GBRP10BE
planar GBR 4:4:4 30bpp, big-endian
Definition: pixfmt.h:162
av_pix_fmt_desc_get
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:2964
AVERROR_EOF
#define AVERROR_EOF
End of file.
Definition: error.h:57
FFERROR_NOT_READY
return FFERROR_NOT_READY
Definition: filter_design.txt:204
DeshakeOpenCLContext::kernel_draw_debug_info
cl_kernel kernel_draw_debug_info
Definition: vf_deshake_opencl.c:286
PointPair::p2
cl_float2 p2
Definition: vf_deshake_opencl.c:100
matrix
Definition: vc1dsp.c:42
DeshakeOpenCLContext::transform_uv
cl_mem transform_uv
Definition: vf_deshake_opencl.c:267
inlink
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
Definition: filter_design.txt:212
normalize.log
log
Definition: normalize.py:21
av_frame_free
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:100
DeshakeOpenCLContext::should_crop
int should_crop
Definition: vf_deshake_opencl.c:273
av_fifo_peek
int av_fifo_peek(const AVFifo *f, void *buf, size_t nb_elems, size_t offset)
Read data from a FIFO without modifying FIFO state.
Definition: fifo.c:255
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:340
pixdesc.h
AVFrame::pts
int64_t pts
Presentation timestamp in time_base units (time when frame should be shown to user).
Definition: frame.h:452
AVFrame::width
int width
Definition: frame.h:412
ff_source_deshake_cl
const char * ff_source_deshake_cl
DeshakeOpenCLContext::prev_descriptors
cl_mem prev_descriptors
Definition: vf_deshake_opencl.c:260
opencl.h
AVOption
AVOption.
Definition: opt.h:251
b
#define b
Definition: input.c:41
RingbufX
@ RingbufX
Definition: vf_deshake_opencl.c:111
RingbufRot
@ RingbufRot
Definition: vf_deshake_opencl.c:113
DeshakeOpenCLContext::abs_motion
AbsoluteFrameMotion abs_motion
Definition: vf_deshake_opencl.c:206
expf
#define expf(x)
Definition: libm.h:283
AbsoluteFrameMotion::curr_frame_offset
int curr_frame_offset
Definition: vf_deshake_opencl.c:140
AV_LOG_VERBOSE
#define AV_LOG_VERBOSE
Detailed information.
Definition: log.h:196
float.h
ff_opencl_filter_load_program
int ff_opencl_filter_load_program(AVFilterContext *avctx, const char **program_source_array, int nb_strings)
Load a new OpenCL program from strings in memory.
Definition: opencl.c:156
update_needed_crop
static void update_needed_crop(CropInfo *crop, float *transform, float frame_width, float frame_height)
Definition: vf_deshake_opencl.c:990
ff_framequeue_init
void ff_framequeue_init(FFFrameQueue *fq, FFFrameQueueGlobal *fqg)
Init a frame queue and attach it to a global structure.
Definition: framequeue.c:47
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
AVFilter::name
const char * name
Filter name.
Definition: avfilter.h:170
points_not_collinear
static int points_not_collinear(const cl_float2 **points)
Definition: vf_deshake_opencl.c:350
video.h
FF_FILTER_FORWARD_STATUS_BACK
#define FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink)
Forward the status on an output link to an input link.
Definition: filters.h:199
CL_CREATE_KERNEL
#define CL_CREATE_KERNEL(ctx, kernel_name)
Create a kernel with the given name.
Definition: opencl.h:93
CL_CREATE_BUFFER
#define CL_CREATE_BUFFER(ctx, buffer_name, size)
Create a buffer with the given information.
Definition: opencl.h:237
AVFrame::data
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:361
BRIEF_PATCH_SIZE_HALF
#define BRIEF_PATCH_SIZE_HALF
Definition: vf_deshake_opencl.c:90
DeshakeOpenCLContext::harris_buf
cl_mem harris_buf
Definition: vf_deshake_opencl.c:248
DeshakeOpenCLContext::smooth_window_multiplier
float smooth_window_multiplier
Definition: vf_deshake_opencl.c:282
AV_PIX_FMT_GBRAP12LE
@ AV_PIX_FMT_GBRAP12LE
planar GBR 4:4:4:4 48bpp, little-endian
Definition: pixfmt.h:308
ff_inlink_consume_frame
int ff_inlink_consume_frame(AVFilterLink *link, AVFrame **rframe)
Take a frame from the link's FIFO and update the link's stats.
Definition: avfilter.c:1383
ff_opencl_filter_work_size_from_image
int ff_opencl_filter_work_size_from_image(AVFilterContext *avctx, size_t *work_size, AVFrame *frame, int plane, int block_alignment)
Find the work size needed needed for a given plane of an image.
Definition: opencl.c:263
fifo.h
DeshakeOpenCLContext::debug_matches
cl_mem debug_matches
Definition: vf_deshake_opencl.c:287
AbsoluteFrameMotion::data_end_offset
int data_end_offset
Definition: vf_deshake_opencl.c:144
AV_PIX_FMT_GBRAP
@ AV_PIX_FMT_GBRAP
planar GBRA 4:4:4:4 32bpp
Definition: pixfmt.h:205
AVFilterContext::priv
void * priv
private data for use by the filter
Definition: avfilter.h:412
IterIndices::start
int start
Definition: vf_deshake_opencl.c:187
fail
#define fail()
Definition: checkasm.h:138
av_fifo_write
int av_fifo_write(AVFifo *f, const void *buf, size_t nb_elems)
Write data into a FIFO.
Definition: fifo.c:188
FFSIGN
#define FFSIGN(a)
Definition: common.h:66
DeshakeOpenCLContext::curr_frame
int curr_frame
Definition: vf_deshake_opencl.c:212
DeshakeOpenCLContext::kernel_transform
cl_kernel kernel_transform
Definition: vf_deshake_opencl.c:242
AbsoluteFrameMotion::ringbuffers
AVFifo * ringbuffers[RingbufCount]
Definition: vf_deshake_opencl.c:136
RingbufScaleY
@ RingbufScaleY
Definition: vf_deshake_opencl.c:115
IterIndices::end
int end
Definition: vf_deshake_opencl.c:188
val
static double val(void *priv, double ch)
Definition: aeval.c:78
deshake_opencl_outputs
static const AVFilterPad deshake_opencl_outputs[]
Definition: vf_deshake_opencl.c:2118
pts
static int64_t pts
Definition: transcode_aac.c:643
fabsf
static __device__ float fabsf(float a)
Definition: cuda_runtime.h:181
DeshakeOpenCLContext::kernel_crop_upscale
cl_kernel kernel_crop_upscale
Definition: vf_deshake_opencl.c:243
DeshakeOpenCLContext::initialized
int initialized
Definition: vf_deshake_opencl.c:194
DeshakeOpenCLContext::smooth_window
int smooth_window
Definition: vf_deshake_opencl.c:210
FrameDelta::skew
cl_float2 skew
Definition: vf_deshake_opencl.c:169
RingbufScaleX
@ RingbufScaleX
Definition: vf_deshake_opencl.c:114
ff_opencl_filter_config_output
int ff_opencl_filter_config_output(AVFilterLink *outlink)
Create a suitable hardware frames context for the output.
Definition: opencl.c:81
AVFilterPad
A filter pad used for either input or output.
Definition: internal.h:47
DeshakeOpenCLContext::eof
int eof
Definition: vf_deshake_opencl.c:198
avassert.h
DeshakeOpenCLContext::read_buf_time
unsigned long long read_buf_time
Definition: vf_deshake_opencl.c:300
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:180
FF_ARRAY_ELEMS
#define FF_ARRAY_ELEMS(a)
Definition: sinewin_tablegen.c:29
av_cold
#define av_cold
Definition: attributes.h:90
av_fifo_read
int av_fifo_read(AVFifo *f, void *buf, size_t nb_elems)
Read data from a FIFO.
Definition: fifo.c:240
DeshakeOpenCLContext::debug_model_matches
cl_mem debug_model_matches
Definition: vf_deshake_opencl.c:288
DebugMatches::num_model_matches
int num_model_matches
Definition: vf_deshake_opencl.c:129
duration
int64_t duration
Definition: movenc.c:64
float
float
Definition: af_crystalizer.c:121
AV_PIX_FMT_GBRAP16BE
@ AV_PIX_FMT_GBRAP16BE
planar GBRA 4:4:4:4 64bpp, big-endian
Definition: pixfmt.h:206
ff_outlink_set_status
static void ff_outlink_set_status(AVFilterLink *link, int status, int64_t pts)
Set the status field of a link from the source filter.
Definition: filters.h:189
s
#define s(width, name)
Definition: cbs_vp9.c:198
AV_PIX_FMT_GBRP16LE
@ AV_PIX_FMT_GBRP16LE
planar GBR 4:4:4 48bpp, little-endian
Definition: pixfmt.h:165
RingbufY
@ RingbufY
Definition: vf_deshake_opencl.c:112
av_lfg_get
static unsigned int av_lfg_get(AVLFG *c)
Get the next random unsigned 32-bit number using an ALFG.
Definition: lfg.h:53
AV_CEIL_RSHIFT
#define AV_CEIL_RSHIFT(a, b)
Definition: common.h:51
DeshakeOpenCLContext::smooth_percent
float smooth_percent
Definition: vf_deshake_opencl.c:279
FFFrameQueueGlobal
Structure to hold global options and statistics for frame queues.
Definition: framequeue.h:46
mi
#define mi
Definition: vf_colormatrix.c:106
DeshakeOpenCLContext::grayscale_time
unsigned long long grayscale_time
Definition: vf_deshake_opencl.c:291
DeshakeOpenCLContext::kernel_brief_descriptors
cl_kernel kernel_brief_descriptors
Definition: vf_deshake_opencl.c:240
av_q2d
static double av_q2d(AVRational a)
Convert an AVRational to a double.
Definition: rational.h:104
lfg.h
init
int(* init)(AVBSFContext *ctx)
Definition: dts2pts_bsf.c:365
deshake_opencl_uninit
static av_cold void deshake_opencl_uninit(AVFilterContext *avctx)
Definition: vf_deshake_opencl.c:1051
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:40
filters.h
DeshakeOpenCLContext::kernel_match_descriptors
cl_kernel kernel_match_descriptors
Definition: vf_deshake_opencl.c:241
ctx
AVFormatContext * ctx
Definition: movenc.c:48
av_rescale_q
int64_t av_rescale_q(int64_t a, AVRational bq, AVRational cq)
Rescale a 64-bit integer by 2 rational numbers.
Definition: mathematics.c:142
AV_PIX_FMT_GBRP10LE
@ AV_PIX_FMT_GBRP10LE
planar GBR 4:4:4 30bpp, little-endian
Definition: pixfmt.h:163
FILTER_INPUTS
#define FILTER_INPUTS(array)
Definition: internal.h:192
link
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a link
Definition: filter_design.txt:23
CL_RELEASE_KERNEL
#define CL_RELEASE_KERNEL(k)
release an OpenCL Kernel
Definition: opencl.h:101
frame
static AVFrame * frame
Definition: demux_decode.c:54
ff_framequeue_take
AVFrame * ff_framequeue_take(FFFrameQueue *fq)
Take the first frame in the queue.
Definition: framequeue.c:97
RingbufferIndices
RingbufferIndices
Definition: vf_deshake_opencl.c:110
rand_in
static int rand_in(int low, int high, AVLFG *alfg)
Definition: vf_deshake_opencl.c:304
averaged_event_time_ms
static double averaged_event_time_ms(unsigned long long total_time, int num_frames)
Definition: vf_deshake_opencl.c:310
AbsoluteFrameMotion::debug_matches
AVFifo * debug_matches
Definition: vf_deshake_opencl.c:146
filter_frame
static int filter_frame(AVFilterLink *link, AVFrame *input_frame)
Definition: vf_deshake_opencl.c:1368
result
and forward the result(frame or status change) to the corresponding input. If nothing is possible
AV_PIX_FMT_GBRAP12BE
@ AV_PIX_FMT_GBRAP12BE
planar GBR 4:4:4:4 48bpp, big-endian
Definition: pixfmt.h:307
fabs
static __device__ float fabs(float a)
Definition: cuda_runtime.h:182
NULL
#define NULL
Definition: coverity.c:32
DeshakeOpenCLContext::alfg
AVLFG alfg
Definition: vf_deshake_opencl.c:201
av_frame_copy_props
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
Definition: frame.c:736
transform_center_scale
static void transform_center_scale(float x_shift, float y_shift, float angle, float scale_x, float scale_y, float center_w, float center_h, float *matrix)
Definition: vf_deshake_opencl.c:952
ff_framequeue_add
int ff_framequeue_add(FFFrameQueue *fq, AVFrame *frame)
Add a frame.
Definition: framequeue.c:63
ff_framequeue_free
void ff_framequeue_free(FFFrameQueue *fq)
Free the queue and all queued frames.
Definition: framequeue.c:53
DeshakeOpenCLContext::ransac_err
float * ransac_err
Definition: vf_deshake_opencl.c:218
DeshakeOpenCLContext::kernel_harris_response
cl_kernel kernel_harris_response
Definition: vf_deshake_opencl.c:238
framequeue.h
transform
static const int8_t transform[32][32]
Definition: hevcdsp.c:27
AVFilterContext::inputs
AVFilterLink ** inputs
array of pointers to input links
Definition: avfilter.h:405
start_end_for
static IterIndices start_end_for(DeshakeOpenCLContext *deshake_ctx, int length)
Definition: vf_deshake_opencl.c:834
av_fifo_can_read
size_t av_fifo_can_read(const AVFifo *f)
Definition: fifo.c:87
double
double
Definition: af_crystalizer.c:131
AV_PIX_FMT_OPENCL
@ AV_PIX_FMT_OPENCL
Hardware surfaces for OpenCL.
Definition: pixfmt.h:355
DeshakeOpenCLContext::matches_contig
cl_mem matches_contig
Definition: vf_deshake_opencl.c:263
DebugMatches::model_matches
MotionVector model_matches[3]
Definition: vf_deshake_opencl.c:125
DeshakeOpenCLContext::duration
int64_t duration
Definition: vf_deshake_opencl.c:197
DeshakeOpenCLContext
Definition: vf_deshake_opencl.c:191
ff_inlink_acknowledge_status
int ff_inlink_acknowledge_status(AVFilterLink *link, int *rstatus, int64_t *rpts)
Test and acknowledge the change of status on the link.
Definition: avfilter.c:1337
DeshakeOpenCLContext::crop_uv
CropInfo crop_uv
Definition: vf_deshake_opencl.c:223
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
transformed_point
static cl_float2 transformed_point(float x, float y, float *transform)
Definition: vf_deshake_opencl.c:942
DeshakeOpenCLContext::harris_response_time
unsigned long long harris_response_time
Definition: vf_deshake_opencl.c:292
make_gauss_kernel
static void make_gauss_kernel(float *gauss_kernel, float length, float sigma)
Definition: vf_deshake_opencl.c:810
FFFrameQueue
Queue of AVFrame pointers.
Definition: framequeue.h:53
DeshakeOpenCLContext::crop_upscale_time
unsigned long long crop_upscale_time
Definition: vf_deshake_opencl.c:297
AVLFG
Context structure for the Lagged Fibonacci PRNG.
Definition: lfg.h:33
f
f
Definition: af_crystalizer.c:121
F3
#define F3(l, r, i)
Definition: cast5.c:37
AVFifo
Definition: fifo.c:35
DeshakeOpenCLContext::matches_contig_host
MotionVector * matches_contig_host
Definition: vf_deshake_opencl.c:232
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:106
powf
#define powf(x, y)
Definition: libm.h:50
CL_RELEASE_MEMORY
#define CL_RELEASE_MEMORY(m)
release an OpenCL Memory Object
Definition: opencl.h:114
AV_PIX_FMT_FLAG_RGB
#define AV_PIX_FMT_FLAG_RGB
The pixel format contains RGB-like data (as opposed to YUV/grayscale).
Definition: pixdesc.h:136
AVFILTER_FLAG_HWDEVICE
#define AVFILTER_FLAG_HWDEVICE
The filter can create hardware frames using AVFilterContext.hw_device_ctx.
Definition: avfilter.h:138
hypot
static av_const double hypot(double x, double y)
Definition: libm.h:366
CropInfo::bottom_right
cl_float2 bottom_right
Definition: vf_deshake_opencl.c:181
AV_PIX_FMT_GBRP9BE
@ AV_PIX_FMT_GBRP9BE
planar GBR 4:4:4 27bpp, big-endian
Definition: pixfmt.h:160
get_subset
static int get_subset(AVLFG *alfg, const MotionVector *point_pairs, const int num_point_pairs, MotionVector *pairs_subset, int max_attempts)
Definition: vf_deshake_opencl.c:394
ROUNDED_UP_DIV
#define ROUNDED_UP_DIV(a, b)
Definition: vf_deshake_opencl.c:94
AV_PIX_FMT_GBRP9LE
@ AV_PIX_FMT_GBRP9LE
planar GBR 4:4:4 27bpp, little-endian
Definition: pixfmt.h:161
FLAGS
#define FLAGS
Definition: vf_deshake_opencl.c:2127
MotionVector::should_consider
cl_int should_consider
Definition: vf_deshake_opencl.c:106
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
AV_PIX_FMT_GBRAP10LE
@ AV_PIX_FMT_GBRAP10LE
planar GBR 4:4:4:4 40bpp, little-endian
Definition: pixfmt.h:311
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
FF_FILTER_FORWARD_WANTED
FF_FILTER_FORWARD_WANTED(outlink, inlink)
MotionVector
Definition: agm.c:38
opencl_source.h
M_PI
#define M_PI
Definition: mathematics.h:67
ff_opencl_filter_config_input
int ff_opencl_filter_config_input(AVFilterLink *inlink)
Check that the input link contains a suitable hardware frames context and extract the device from it.
Definition: opencl.c:45
internal.h
AV_OPT_TYPE_FLOAT
@ AV_OPT_TYPE_FLOAT
Definition: opt.h:228
FILTER_SINGLE_PIXFMT
#define FILTER_SINGLE_PIXFMT(pix_fmt_)
Definition: internal.h:182
DeshakeOpenCLContext::inliers
MotionVector * inliers
Definition: vf_deshake_opencl.c:234
FrameDelta::rotation
float rotation
Definition: vf_deshake_opencl.c:167
ff_get_matrix
void ff_get_matrix(float x_shift, float y_shift, float angle, float scale_x, float scale_y, float *matrix)
Get an affine transformation matrix from given translation, rotation, and zoom factors.
Definition: transform.c:106
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:255
queue_frame
static int queue_frame(AVFilterLink *link, AVFrame *input_frame)
Definition: vf_deshake_opencl.c:1732
round
static av_always_inline av_const double round(double x)
Definition: libm.h:444
DeshakeOpenCLContext::transform_y
cl_mem transform_y
Definition: vf_deshake_opencl.c:265
FFMIN3
#define FFMIN3(a, b, c)
Definition: macros.h:50
transform_debug
static void transform_debug(AVFilterContext *avctx, float *new_vals, float *old_vals, int curr_frame)
Definition: vf_deshake_opencl.c:1340
av_malloc_array
#define av_malloc_array(a, b)
Definition: tableprint_vlc.h:31
common.h
make_vectors_contig
static int make_vectors_contig(DeshakeOpenCLContext *deshake_ctx, int size_y, int size_x)
Definition: vf_deshake_opencl.c:778
delta
float delta
Definition: vorbis_enc_data.h:430
activate
static int activate(AVFilterContext *ctx)
Definition: vf_deshake_opencl.c:2011
DeshakeOpenCLContext::is_yuv
int is_yuv
Definition: vf_deshake_opencl.c:226
AVFrame::pkt_duration
attribute_deprecated int64_t pkt_duration
duration of the corresponding packet, expressed in AVStream->time_base units, 0 if unknown.
Definition: frame.h:700
DeshakeOpenCLContext::fq
FFFrameQueue fq
Definition: vf_deshake_opencl.c:204
av_inv_q
static av_always_inline AVRational av_inv_q(AVRational q)
Invert a rational.
Definition: rational.h:159
DeshakeOpenCLContext::grayscale
cl_mem grayscale
Definition: vf_deshake_opencl.c:246
AVFilterPad::name
const char * name
Pad name.
Definition: internal.h:53
DeshakeOpenCLContext::transform_time
unsigned long long transform_time
Definition: vf_deshake_opencl.c:296
RingbufCount
@ RingbufCount
Definition: vf_deshake_opencl.c:118
AVFilter
Filter definition.
Definition: avfilter.h:166
AVHWFramesContext
This struct describes a set or pool of "hardware" frames (i.e.
Definition: hwcontext.h:124
DeshakeOpenCLContext::ocf
OpenCLFilterContext ocf
Definition: vf_deshake_opencl.c:192
AV_PIX_FMT_GBRAP16LE
@ AV_PIX_FMT_GBRAP16LE
planar GBRA 4:4:4:4 64bpp, little-endian
Definition: pixfmt.h:207
ff_opencl_filter_init
int ff_opencl_filter_init(AVFilterContext *avctx)
Initialise an OpenCL filter context.
Definition: opencl.c:132
CL_BLOCKING_WRITE_BUFFER
#define CL_BLOCKING_WRITE_BUFFER(queue, buffer, size, host_ptr, event)
Perform a blocking write to a buffer.
Definition: opencl.h:214
ret
ret
Definition: filter_design.txt:187
FrameDelta
Definition: vf_deshake_opencl.c:165
DeshakeOpenCLContext::matches_host
MotionVector * matches_host
Definition: vf_deshake_opencl.c:231
compute_error
static void compute_error(const MotionVector *point_pairs, const int num_point_pairs, const double *model, float *err)
Definition: vf_deshake_opencl.c:436
ransac_update_num_iters
static int ransac_update_num_iters(double confidence, double num_outliers, int max_iters)
Definition: vf_deshake_opencl.c:496
pixfmt.h
DeshakeOpenCLContext::prev_refined_features
cl_mem prev_refined_features
Definition: vf_deshake_opencl.c:253
FrameDelta::scale
cl_float2 scale
Definition: vf_deshake_opencl.c:168
av_fifo_alloc2
AVFifo * av_fifo_alloc2(size_t nb_elems, size_t elem_size, unsigned int flags)
Allocate and initialize an AVFifo with a given element size.
Definition: fifo.c:47
AbsoluteFrameMotion::data_start_offset
int data_start_offset
Definition: vf_deshake_opencl.c:143
DeshakeOpenCLContext::refined_features
cl_mem refined_features
Definition: vf_deshake_opencl.c:251
transform.h
check_subset
static int check_subset(const MotionVector *pairs_subset)
Definition: vf_deshake_opencl.c:376
ff_framequeue_queued_frames
static size_t ff_framequeue_queued_frames(const FFFrameQueue *fq)
Get the number of queued frames.
Definition: framequeue.h:146
AVFrame::hw_frames_ctx
AVBufferRef * hw_frames_ctx
For hwaccel-format frames, this should be a reference to the AVHWFramesContext describing the frame.
Definition: frame.h:752
run_estimate_kernel
static void run_estimate_kernel(const MotionVector *point_pairs, double *model)
Definition: vf_deshake_opencl.c:320
CropInfo
Definition: vf_deshake_opencl.c:177
AVFrame::height
int height
Definition: frame.h:412
status
ov_status_e status
Definition: dnn_backend_openvino.c:119
DebugMatches::matches
MotionVector * matches
Definition: vf_deshake_opencl.c:123
DeshakeOpenCLContext::tripod_mode
int tripod_mode
Definition: vf_deshake_opencl.c:271
DeshakeOpenCLContext::match_descriptors_time
unsigned long long match_descriptors_time
Definition: vf_deshake_opencl.c:295
ff_framequeue_global_init
void ff_framequeue_global_init(FFFrameQueueGlobal *fqg)
Init a global structure.
Definition: framequeue.c:30
avfilter.h
gaussian_for
static float gaussian_for(int x, float sigma)
Definition: vf_deshake_opencl.c:804
values
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return values
Definition: filter_design.txt:263
temp
else temp
Definition: vf_mcdeint.c:263
OpenCLFilterContext
Definition: opencl.h:36
ff_opencl_filter_uninit
void ff_opencl_filter_uninit(AVFilterContext *avctx)
Uninitialise an OpenCL filter context.
Definition: opencl.c:141
DeshakeOpenCLContext::debug_on
int debug_on
Definition: vf_deshake_opencl.c:272
DebugMatches::num_matches
int num_matches
Definition: vf_deshake_opencl.c:127
CL_RELEASE_QUEUE
#define CL_RELEASE_QUEUE(q)
release an OpenCL Command Queue
Definition: opencl.h:127
DeshakeOpenCLContext::kernel_refine_features
cl_kernel kernel_refine_features
Definition: vf_deshake_opencl.c:239
AVFilterContext
An instance of a filter.
Definition: avfilter.h:397
FF_DISABLE_DEPRECATION_WARNINGS
#define FF_DISABLE_DEPRECATION_WARNINGS
Definition: internal.h:72
AV_PIX_FMT_GBRP
@ AV_PIX_FMT_GBRP
planar GBR 4:4:4 24bpp
Definition: pixfmt.h:158
desc
const char * desc
Definition: libsvtav1.c:83
find_inliers
static int find_inliers(MotionVector *point_pairs, const int num_point_pairs, const double *model, float *err, double thresh)
Definition: vf_deshake_opencl.c:460
AVMEDIA_TYPE_VIDEO
@ AVMEDIA_TYPE_VIDEO
Definition: avutil.h:201
deshake_opencl_options
static const AVOption deshake_opencl_options[]
Definition: vf_deshake_opencl.c:2129
mem.h
PointPair::p1
cl_float2 p1
Definition: vf_deshake_opencl.c:98
AVPixFmtDescriptor
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:69
transforms
static const struct @91 transforms[18]
ff_opencl_get_event_time
cl_ulong ff_opencl_get_event_time(cl_event event)
Gets the command start and end times for the given event and returns the difference (the time that th...
Definition: opencl.c:339
smooth
static float smooth(DeshakeOpenCLContext *deshake_ctx, float *gauss_kernel, int length, float max_val, AVFifo *values)
Definition: vf_deshake_opencl.c:888
DeshakeOpenCLContext::kernel_grayscale
cl_kernel kernel_grayscale
Definition: vf_deshake_opencl.c:237
AV_OPT_TYPE_BOOL
@ AV_OPT_TYPE_BOOL
Definition: opt.h:244
FILTER_OUTPUTS
#define FILTER_OUTPUTS(array)
Definition: internal.h:193
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:34
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
FFMAX3
#define FFMAX3(a, b, c)
Definition: macros.h:48
DebugMatches
Definition: vf_deshake_opencl.c:122
d
d
Definition: ffmpeg_filter.c:368
CL_CREATE_BUFFER_FLAGS
#define CL_CREATE_BUFFER_FLAGS(ctx, buffer_name, flags, size, host_ptr)
Create a buffer with the given information.
Definition: opencl.h:197
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
CL_FAIL_ON_ERROR
#define CL_FAIL_ON_ERROR(errcode,...)
A helper macro to handle OpenCL errors.
Definition: opencl.h:74
av_fifo_freep2
void av_fifo_freep2(AVFifo **f)
Free an AVFifo and reset pointer to NULL.
Definition: fifo.c:286
AV_PIX_FMT_GBRAP10BE
@ AV_PIX_FMT_GBRAP10BE
planar GBR 4:4:4:4 40bpp, big-endian
Definition: pixfmt.h:310
uninit
static av_cold int uninit(AVCodecContext *avctx)
Definition: crystalhd.c:285
DeshakeOpenCLContext::descriptors
cl_mem descriptors
Definition: vf_deshake_opencl.c:258
DeshakeOpenCLContext::matches
cl_mem matches
Definition: vf_deshake_opencl.c:262
int
int
Definition: ffmpeg_filter.c:368
MotionVector::p
PointPair p
Definition: vf_deshake_opencl.c:104
DeshakeOpenCLContext::brief_descriptors_time
unsigned long long brief_descriptors_time
Definition: vf_deshake_opencl.c:294
DeshakeOpenCLContext::command_queue
cl_command_queue command_queue
Definition: vf_deshake_opencl.c:236
PointPair
Definition: vf_deshake_opencl.c:96
BREIFN
#define BREIFN
Definition: vf_deshake_opencl.c:86
DeshakeOpenCLContext::sw_format
int sw_format
Definition: vf_deshake_opencl.c:228
DeshakeOpenCLContext::gauss_kernel
float * gauss_kernel
Definition: vf_deshake_opencl.c:215
DeshakeOpenCLContext::refine_features
cl_int refine_features
Definition: vf_deshake_opencl.c:276
SimilarityMatrix
Definition: vf_deshake_opencl.c:172
ringbuf_float_at
static void ringbuf_float_at(DeshakeOpenCLContext *deshake_ctx, AVFifo *values, float *val, int offset)
Definition: vf_deshake_opencl.c:845
optimize_model
static void optimize_model(DeshakeOpenCLContext *deshake_ctx, MotionVector *best_pairs, MotionVector *inliers, const int num_inliers, float best_err, double *model_out)
Definition: vf_deshake_opencl.c:601
ff_vf_deshake_opencl
const AVFilter ff_vf_deshake_opencl
Definition: vf_deshake_opencl.c:2160
DeshakeOpenCLContext::crop_y
CropInfo crop_y
Definition: vf_deshake_opencl.c:221
av_clipd
av_clipd
Definition: af_crystalizer.c:131
CropInfo::top_left
cl_float2 top_left
Definition: vf_deshake_opencl.c:179
decompose_transform
static FrameDelta decompose_transform(double *model)
Definition: vf_deshake_opencl.c:738
AVFilterContext::outputs
AVFilterLink ** outputs
array of pointers to output links
Definition: avfilter.h:409
F1
#define F1(l, r, i)
Definition: cast5.c:51