FFmpeg
vf_deshake_opencl.c
Go to the documentation of this file.
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with FFmpeg; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  *
18  * Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
19  * Copyright (C) 2009, Willow Garage Inc., all rights reserved.
20  * Copyright (C) 2013, OpenCV Foundation, all rights reserved.
21  * Third party copyrights are property of their respective owners.
22  *
23  * Redistribution and use in source and binary forms, with or without modification,
24  * are permitted provided that the following conditions are met:
25  *
26  * * Redistribution's of source code must retain the above copyright notice,
27  * this list of conditions and the following disclaimer.
28  *
29  * * Redistribution's in binary form must reproduce the above copyright notice,
30  * this list of conditions and the following disclaimer in the documentation
31  * and/or other materials provided with the distribution.
32  *
33  * * The name of the copyright holders may not be used to endorse or promote products
34  * derived from this software without specific prior written permission.
35  *
36  * This software is provided by the copyright holders and contributors "as is" and
37  * any express or implied warranties, including, but not limited to, the implied
38  * warranties of merchantability and fitness for a particular purpose are disclaimed.
39  * In no event shall the Intel Corporation or contributors be liable for any direct,
40  * indirect, incidental, special, exemplary, or consequential damages
41  * (including, but not limited to, procurement of substitute goods or services;
42  * loss of use, data, or profits; or business interruption) however caused
43  * and on any theory of liability, whether in contract, strict liability,
44  * or tort (including negligence or otherwise) arising in any way out of
45  * the use of this software, even if advised of the possibility of such damage.
46  */
47 
48 #include <float.h>
49 #include <libavutil/lfg.h>
50 #include "libavutil/opt.h"
51 #include "libavutil/mem.h"
52 #include "libavutil/fifo.h"
53 #include "libavutil/common.h"
54 #include "libavutil/avassert.h"
55 #include "libavutil/pixdesc.h"
56 #include "libavutil/pixfmt.h"
57 #include "avfilter.h"
58 #include "framequeue.h"
59 #include "filters.h"
60 #include "transform.h"
61 #include "opencl.h"
62 #include "opencl_source.h"
63 #include "video.h"
64 
65 /*
66 This filter matches feature points between frames (dealing with outliers) and then
67 uses the matches to estimate an affine transform between frames. This transform is
68 decomposed into various values (translation, scale, rotation) and the values are
69 summed relative to the start of the video to obtain on absolute camera position
70 for each frame. This "camera path" is then smoothed via a gaussian filter, resulting
71 in a new path that is turned back into an affine transform and applied to each
72 frame to render it.
73 
74 High-level overview:
75 
76 All of the work to extract motion data from frames occurs in queue_frame. Motion data
77 is buffered in a smoothing window, so queue_frame simply computes the absolute camera
78 positions and places them in ringbuffers.
79 
80 filter_frame is responsible for looking at the absolute camera positions currently
81 in the ringbuffers, applying the gaussian filter, and then transforming the frames.
82 */
83 
84 // Number of bits for BRIEF descriptors
85 #define BREIFN 512
86 // Size of the patch from which a BRIEF descriptor is extracted
87 // This is the size used in OpenCV
88 #define BRIEF_PATCH_SIZE 31
89 #define BRIEF_PATCH_SIZE_HALF (BRIEF_PATCH_SIZE / 2)
90 
91 #define MATCHES_CONTIG_SIZE 2000
92 
93 #define ROUNDED_UP_DIV(a, b) ((a + (b - 1)) / b)
94 
95 typedef struct PointPair {
96  // Previous frame
97  cl_float2 p1;
98  // Current frame
99  cl_float2 p2;
100 } PointPair;
101 
102 typedef struct MotionVector {
104  // Used to mark vectors as potential outliers
106 } MotionVector;
107 
108 // Denotes the indices for the different types of motion in the ringbuffers array
115 
116  // Should always be last
118 };
119 
120 // Struct that holds data for drawing point match debug data
121 typedef struct DebugMatches {
123  // The points used to calculate the affine transform for a frame
125 
127  // For cases where we couldn't calculate a model
129 } DebugMatches;
130 
131 // Groups together the ringbuffers that store absolute distortion / position values
132 // for each frame
133 typedef struct AbsoluteFrameMotion {
134  // Array with the various ringbuffers, indexed via the RingbufferIndices enum
136 
137  // Offset to get to the current frame being processed
138  // (not in bytes)
140  // Keeps track of where the start and end of contiguous motion data is (to
141  // deal with cases where no motion data is found between two frames)
144 
147 
148 // Takes care of freeing the arrays within the DebugMatches inside of the
149 // debug_matches ringbuffer and then freeing the buffer itself.
151  DebugMatches dm;
152 
153  if (!afm->debug_matches) {
154  return;
155  }
156 
157  while (av_fifo_read(afm->debug_matches, &dm, 1) >= 0)
158  av_freep(&dm.matches);
159 
161 }
162 
163 // Stores the translation, scale, rotation, and skew deltas between two frames
164 typedef struct FrameDelta {
165  cl_float2 translation;
166  float rotation;
167  cl_float2 scale;
168  cl_float2 skew;
169 } FrameDelta;
170 
171 typedef struct SimilarityMatrix {
172  // The 2x3 similarity matrix
173  double matrix[6];
175 
176 typedef struct CropInfo {
177  // The top left corner of the bounding box for the crop
178  cl_float2 top_left;
179  // The bottom right corner of the bounding box for the crop
180  cl_float2 bottom_right;
181 } CropInfo;
182 
183 // Returned from function that determines start and end values for iteration
184 // around the current frame in a ringbuffer
185 typedef struct IterIndices {
186  int start;
187  int end;
188 } IterIndices;
189 
190 typedef struct DeshakeOpenCLContext {
192  // Whether or not the above `OpenCLFilterContext` has been initialized
194 
195  // These variables are used in the activate callback
197  int eof;
198 
199  // State for random number generation
201 
202  // FIFO frame queue used to buffer future frames for processing
204  // Ringbuffers for frame positions
206 
207  // The number of frames' motion to consider before and after the frame we are
208  // smoothing
210  // The number of the frame we are currently processing
212 
213  // Stores a 1d array of normalised gaussian kernel values for convolution
214  float *gauss_kernel;
215 
216  // Buffer for error values used in RANSAC code
217  float *ransac_err;
218 
219  // Information regarding how to crop the smoothed luminance (or RGB) planes
221  // Information regarding how to crop the smoothed chroma planes
223 
224  // Whether or not we are processing YUV input (as oppposed to RGB)
225  int is_yuv;
226  // The underlying format of the hardware surfaces
228 
229  // Buffer to copy `matches` into for the CPU to work with
232 
234 
235  cl_command_queue command_queue;
236  cl_kernel kernel_grayscale;
241  cl_kernel kernel_transform;
243 
244  // Stores a frame converted to grayscale
245  cl_mem grayscale;
246  // Stores the harris response for a frame (measure of "cornerness" for each pixel)
247  cl_mem harris_buf;
248 
249  // Detected features after non-maximum suppression and sub-pixel refinement
251  // Saved from the previous frame
253 
254  // BRIEF sampling pattern that is randomly initialized
256  // Feature point descriptors for the current frame
257  cl_mem descriptors;
258  // Feature point descriptors for the previous frame
260  // Vectors between points in current and previous frame
261  cl_mem matches;
263  // Holds the matrix to transform luminance (or RGB) with
264  cl_mem transform_y;
265  // Holds the matrix to transform chroma with
266  cl_mem transform_uv;
267 
268  // Configurable options
269 
271  int debug_on;
273 
274  // Whether or not feature points should be refined at a sub-pixel level
276  // If the user sets a value other than the default, 0, this percentage is
277  // translated into a sigma value ranging from 0.5 to 40.0
279  // This number is multiplied by the video frame rate to determine the size
280  // of the smooth window
282 
283  // Debug stuff
284 
288 
289  // These store the total time spent executing the different kernels in nanoseconds
290  unsigned long long grayscale_time;
291  unsigned long long harris_response_time;
292  unsigned long long refine_features_time;
293  unsigned long long brief_descriptors_time;
294  unsigned long long match_descriptors_time;
295  unsigned long long transform_time;
296  unsigned long long crop_upscale_time;
297 
298  // Time spent copying matched features from the device to the host
299  unsigned long long read_buf_time;
301 
302 // Returns a random uniformly-distributed number in [low, high]
303 static int rand_in(int low, int high, AVLFG *alfg) {
304  return (av_lfg_get(alfg) % (high - low)) + low;
305 }
306 
307 // Returns the average execution time for an event given the total time and the
308 // number of frames processed.
309 static double averaged_event_time_ms(unsigned long long total_time, int num_frames) {
310  return (double)total_time / (double)num_frames / 1000000.0;
311 }
312 
313 // The following code is loosely ported from OpenCV
314 
315 // Estimates affine transform from 3 point pairs
316 // model is a 2x3 matrix:
317 // a b c
318 // d e f
319 static void run_estimate_kernel(const MotionVector *point_pairs, double *model)
320 {
321  // src points
322  double x1 = point_pairs[0].p.p1.s[0];
323  double y1 = point_pairs[0].p.p1.s[1];
324  double x2 = point_pairs[1].p.p1.s[0];
325  double y2 = point_pairs[1].p.p1.s[1];
326  double x3 = point_pairs[2].p.p1.s[0];
327  double y3 = point_pairs[2].p.p1.s[1];
328 
329  // dest points
330  double X1 = point_pairs[0].p.p2.s[0];
331  double Y1 = point_pairs[0].p.p2.s[1];
332  double X2 = point_pairs[1].p.p2.s[0];
333  double Y2 = point_pairs[1].p.p2.s[1];
334  double X3 = point_pairs[2].p.p2.s[0];
335  double Y3 = point_pairs[2].p.p2.s[1];
336 
337  double d = 1.0 / ( x1*(y2-y3) + x2*(y3-y1) + x3*(y1-y2) );
338 
339  model[0] = d * ( X1*(y2-y3) + X2*(y3-y1) + X3*(y1-y2) );
340  model[1] = d * ( X1*(x3-x2) + X2*(x1-x3) + X3*(x2-x1) );
341  model[2] = d * ( X1*(x2*y3 - x3*y2) + X2*(x3*y1 - x1*y3) + X3*(x1*y2 - x2*y1) );
342 
343  model[3] = d * ( Y1*(y2-y3) + Y2*(y3-y1) + Y3*(y1-y2) );
344  model[4] = d * ( Y1*(x3-x2) + Y2*(x1-x3) + Y3*(x2-x1) );
345  model[5] = d * ( Y1*(x2*y3 - x3*y2) + Y2*(x3*y1 - x1*y3) + Y3*(x1*y2 - x2*y1) );
346 }
347 
348 // Checks that the 3 points in the given array are not collinear
349 static int points_not_collinear(const cl_float2 **points)
350 {
351  int j, k, i = 2;
352 
353  for (j = 0; j < i; j++) {
354  double dx1 = points[j]->s[0] - points[i]->s[0];
355  double dy1 = points[j]->s[1] - points[i]->s[1];
356 
357  for (k = 0; k < j; k++) {
358  double dx2 = points[k]->s[0] - points[i]->s[0];
359  double dy2 = points[k]->s[1] - points[i]->s[1];
360 
361  // Assuming a 3840 x 2160 video with a point at (0, 0) and one at
362  // (3839, 2159), this prevents a third point from being within roughly
363  // 0.5 of a pixel of the line connecting the two on both axes
364  if (fabs(dx2*dy1 - dy2*dx1) <= 1.0) {
365  return 0;
366  }
367  }
368  }
369 
370  return 1;
371 }
372 
373 // Checks a subset of 3 point pairs to make sure that the points are not collinear
374 // and not too close to each other
375 static int check_subset(const MotionVector *pairs_subset)
376 {
377  const cl_float2 *prev_points[] = {
378  &pairs_subset[0].p.p1,
379  &pairs_subset[1].p.p1,
380  &pairs_subset[2].p.p1
381  };
382 
383  const cl_float2 *curr_points[] = {
384  &pairs_subset[0].p.p2,
385  &pairs_subset[1].p.p2,
386  &pairs_subset[2].p.p2
387  };
388 
389  return points_not_collinear(prev_points) && points_not_collinear(curr_points);
390 }
391 
392 // Selects a random subset of 3 points from point_pairs and places them in pairs_subset
393 static int get_subset(
394  AVLFG *alfg,
395  const MotionVector *point_pairs,
396  const int num_point_pairs,
397  MotionVector *pairs_subset,
398  int max_attempts
399 ) {
400  int idx[3];
401  int i = 0, j, iters = 0;
402 
403  for (; iters < max_attempts; iters++) {
404  for (i = 0; i < 3 && iters < max_attempts;) {
405  int idx_i = 0;
406 
407  for (;;) {
408  idx_i = idx[i] = rand_in(0, num_point_pairs, alfg);
409 
410  for (j = 0; j < i; j++) {
411  if (idx_i == idx[j]) {
412  break;
413  }
414  }
415 
416  if (j == i) {
417  break;
418  }
419  }
420 
421  pairs_subset[i] = point_pairs[idx[i]];
422  i++;
423  }
424 
425  if (i == 3 && !check_subset(pairs_subset)) {
426  continue;
427  }
428  break;
429  }
430 
431  return i == 3 && iters < max_attempts;
432 }
433 
434 // Computes the error for each of the given points based on the given model.
435 static void compute_error(
436  const MotionVector *point_pairs,
437  const int num_point_pairs,
438  const double *model,
439  float *err
440 ) {
441  double F0 = model[0], F1 = model[1], F2 = model[2];
442  double F3 = model[3], F4 = model[4], F5 = model[5];
443 
444  for (int i = 0; i < num_point_pairs; i++) {
445  const cl_float2 *f = &point_pairs[i].p.p1;
446  const cl_float2 *t = &point_pairs[i].p.p2;
447 
448  double a = F0*f->s[0] + F1*f->s[1] + F2 - t->s[0];
449  double b = F3*f->s[0] + F4*f->s[1] + F5 - t->s[1];
450 
451  err[i] = a*a + b*b;
452  }
453 }
454 
455 // Determines which of the given point matches are inliers for the given model
456 // based on the specified threshold.
457 //
458 // err must be an array of num_point_pairs length
459 static int find_inliers(
460  MotionVector *point_pairs,
461  const int num_point_pairs,
462  const double *model,
463  float *err,
464  double thresh
465 ) {
466  float t = (float)(thresh * thresh);
467  int i, n = num_point_pairs, num_inliers = 0;
468 
469  compute_error(point_pairs, num_point_pairs, model, err);
470 
471  for (i = 0; i < n; i++) {
472  if (err[i] <= t) {
473  // This is an inlier
474  point_pairs[i].should_consider = 1;
475  num_inliers += 1;
476  } else {
477  point_pairs[i].should_consider = 0;
478  }
479  }
480 
481  return num_inliers;
482 }
483 
484 // Determines the number of iterations required to achieve the desired confidence level.
485 //
486 // The equation used to determine the number of iterations to do is:
487 // 1 - confidence = (1 - inlier_probability^num_points)^num_iters
488 //
489 // Solving for num_iters:
490 //
491 // num_iters = log(1 - confidence) / log(1 - inlier_probability^num_points)
492 //
493 // A more in-depth explanation can be found at https://en.wikipedia.org/wiki/Random_sample_consensus
494 // under the 'Parameters' heading
495 static int ransac_update_num_iters(double confidence, double num_outliers, int max_iters)
496 {
497  double num, denom;
498 
499  confidence = av_clipd(confidence, 0.0, 1.0);
500  num_outliers = av_clipd(num_outliers, 0.0, 1.0);
501 
502  // avoid inf's & nan's
503  num = FFMAX(1.0 - confidence, DBL_MIN);
504  denom = 1.0 - pow(1.0 - num_outliers, 3);
505  if (denom < DBL_MIN) {
506  return 0;
507  }
508 
509  num = log(num);
510  denom = log(denom);
511 
512  return denom >= 0 || -num >= max_iters * (-denom) ? max_iters : (int)round(num / denom);
513 }
514 
515 // Estimates an affine transform between the given pairs of points using RANdom
516 // SAmple Consensus
518  DeshakeOpenCLContext *deshake_ctx,
519  MotionVector *point_pairs,
520  DebugMatches *debug_matches,
521  const int num_point_pairs,
522  double *model_out,
523  const double threshold,
524  const int max_iters,
525  const double confidence
526 ) {
527  int result = 0;
528  double best_model[6], model[6];
529  MotionVector pairs_subset[3], best_pairs[3];
530 
531  int iter, niters = FFMAX(max_iters, 1);
532  int good_count, max_good_count = 0;
533 
534  // We need at least 3 points to build a model from
535  if (num_point_pairs < 3) {
536  return 0;
537  } else if (num_point_pairs == 3) {
538  // There are only 3 points, so RANSAC doesn't apply here
539  run_estimate_kernel(point_pairs, model_out);
540 
541  for (int i = 0; i < 3; ++i) {
542  point_pairs[i].should_consider = 1;
543  }
544 
545  return 1;
546  }
547 
548  for (iter = 0; iter < niters; ++iter) {
549  int found = get_subset(&deshake_ctx->alfg, point_pairs, num_point_pairs, pairs_subset, 10000);
550 
551  if (!found) {
552  if (iter == 0) {
553  return 0;
554  }
555 
556  break;
557  }
558 
559  run_estimate_kernel(pairs_subset, model);
560  good_count = find_inliers(point_pairs, num_point_pairs, model, deshake_ctx->ransac_err, threshold);
561 
562  if (good_count > FFMAX(max_good_count, 2)) {
563  for (int mi = 0; mi < 6; ++mi) {
564  best_model[mi] = model[mi];
565  }
566 
567  for (int pi = 0; pi < 3; pi++) {
568  best_pairs[pi] = pairs_subset[pi];
569  }
570 
571  max_good_count = good_count;
572  niters = ransac_update_num_iters(
573  confidence,
574  (double)(num_point_pairs - good_count) / num_point_pairs,
575  niters
576  );
577  }
578  }
579 
580  if (max_good_count > 0) {
581  for (int mi = 0; mi < 6; ++mi) {
582  model_out[mi] = best_model[mi];
583  }
584 
585  for (int pi = 0; pi < 3; ++pi) {
586  debug_matches->model_matches[pi] = best_pairs[pi];
587  }
588  debug_matches->num_model_matches = 3;
589 
590  // Find the inliers again for the best model for debugging
591  find_inliers(point_pairs, num_point_pairs, best_model, deshake_ctx->ransac_err, threshold);
592  result = 1;
593  }
594 
595  return result;
596 }
597 
598 // "Wiggles" the first point in best_pairs around a tiny bit in order to decrease the
599 // total error
600 static void optimize_model(
601  DeshakeOpenCLContext *deshake_ctx,
602  MotionVector *best_pairs,
603  MotionVector *inliers,
604  const int num_inliers,
605  float best_err,
606  double *model_out
607 ) {
608  float move_x_val = 0.01;
609  float move_y_val = 0.01;
610  int move_x = 1;
611  float old_move_x_val = 0;
612  double model[6];
613  int last_changed = 0;
614 
615  for (int iters = 0; iters < 200; iters++) {
616  float total_err = 0;
617 
618  if (move_x) {
619  best_pairs[0].p.p2.s[0] += move_x_val;
620  } else {
621  best_pairs[0].p.p2.s[0] += move_y_val;
622  }
623 
624  run_estimate_kernel(best_pairs, model);
625  compute_error(inliers, num_inliers, model, deshake_ctx->ransac_err);
626 
627  for (int j = 0; j < num_inliers; j++) {
628  total_err += deshake_ctx->ransac_err[j];
629  }
630 
631  if (total_err < best_err) {
632  for (int mi = 0; mi < 6; ++mi) {
633  model_out[mi] = model[mi];
634  }
635 
636  best_err = total_err;
637  last_changed = iters;
638  } else {
639  // Undo the change
640  if (move_x) {
641  best_pairs[0].p.p2.s[0] -= move_x_val;
642  } else {
643  best_pairs[0].p.p2.s[0] -= move_y_val;
644  }
645 
646  if (iters - last_changed > 4) {
647  // We've already improved the model as much as we can
648  break;
649  }
650 
651  old_move_x_val = move_x_val;
652 
653  if (move_x) {
654  move_x_val *= -1;
655  } else {
656  move_y_val *= -1;
657  }
658 
659  if (old_move_x_val < 0) {
660  move_x = 0;
661  } else {
662  move_x = 1;
663  }
664  }
665  }
666 }
667 
668 // Uses a process similar to that of RANSAC to find a transform that minimizes
669 // the total error for a set of point matches determined to be inliers
670 //
671 // (Pick random subsets, compute model, find total error, iterate until error
672 // is minimized.)
673 static int minimize_error(
674  DeshakeOpenCLContext *deshake_ctx,
675  MotionVector *inliers,
676  DebugMatches *debug_matches,
677  const int num_inliers,
678  double *model_out,
679  const int max_iters
680 ) {
681  int result = 0;
682  float best_err = FLT_MAX;
683  double best_model[6], model[6];
684  MotionVector pairs_subset[3], best_pairs[3];
685 
686  for (int i = 0; i < max_iters; i++) {
687  float total_err = 0;
688  int found = get_subset(&deshake_ctx->alfg, inliers, num_inliers, pairs_subset, 10000);
689 
690  if (!found) {
691  if (i == 0) {
692  return 0;
693  }
694 
695  break;
696  }
697 
698  run_estimate_kernel(pairs_subset, model);
699  compute_error(inliers, num_inliers, model, deshake_ctx->ransac_err);
700 
701  for (int j = 0; j < num_inliers; j++) {
702  total_err += deshake_ctx->ransac_err[j];
703  }
704 
705  if (i == 0 || total_err < best_err) {
706  for (int mi = 0; mi < 6; ++mi) {
707  best_model[mi] = model[mi];
708  }
709 
710  for (int pi = 0; pi < 3; pi++) {
711  best_pairs[pi] = pairs_subset[pi];
712  }
713 
714  best_err = total_err;
715  }
716  }
717 
718  for (int mi = 0; mi < 6; ++mi) {
719  model_out[mi] = best_model[mi];
720  }
721 
722  for (int pi = 0; pi < 3; ++pi) {
723  debug_matches->model_matches[pi] = best_pairs[pi];
724  }
725  debug_matches->num_model_matches = 3;
726  result = 1;
727 
728  optimize_model(deshake_ctx, best_pairs, inliers, num_inliers, best_err, model_out);
729  return result;
730 }
731 
732 // End code from OpenCV
733 
734 // Decomposes a similarity matrix into translation, rotation, scale, and skew
735 //
736 // See http://frederic-wang.fr/decomposition-of-2d-transform-matrices.html
737 static FrameDelta decompose_transform(double *model)
738 {
739  FrameDelta ret;
740 
741  double a = model[0];
742  double c = model[1];
743  double e = model[2];
744  double b = model[3];
745  double d = model[4];
746  double f = model[5];
747  double delta = a * d - b * c;
748 
749  memset(&ret, 0, sizeof(ret));
750 
751  ret.translation.s[0] = e;
752  ret.translation.s[1] = f;
753 
754  // This is the QR method
755  if (a != 0 || b != 0) {
756  double r = hypot(a, b);
757 
758  ret.rotation = FFSIGN(b) * acos(a / r);
759  ret.scale.s[0] = r;
760  ret.scale.s[1] = delta / r;
761  ret.skew.s[0] = atan((a * c + b * d) / (r * r));
762  ret.skew.s[1] = 0;
763  } else if (c != 0 || d != 0) {
764  double s = sqrt(c * c + d * d);
765 
766  ret.rotation = M_PI / 2 - FFSIGN(d) * acos(-c / s);
767  ret.scale.s[0] = delta / s;
768  ret.scale.s[1] = s;
769  ret.skew.s[0] = 0;
770  ret.skew.s[1] = atan((a * c + b * d) / (s * s));
771  } // otherwise there is only translation
772 
773  return ret;
774 }
775 
776 // Move valid vectors from the 2d buffer into a 1d buffer where they are contiguous
778  DeshakeOpenCLContext *deshake_ctx,
779  int size_y,
780  int size_x
781 ) {
782  int num_vectors = 0;
783 
784  for (int i = 0; i < size_y; ++i) {
785  for (int j = 0; j < size_x; ++j) {
786  MotionVector v = deshake_ctx->matches_host[j + i * size_x];
787 
788  if (v.should_consider) {
789  deshake_ctx->matches_contig_host[num_vectors] = v;
790  ++num_vectors;
791  }
792 
793  // Make sure we do not exceed the amount of space we allocated for these vectors
794  if (num_vectors == MATCHES_CONTIG_SIZE - 1) {
795  return num_vectors;
796  }
797  }
798  }
799  return num_vectors;
800 }
801 
802 // Returns the gaussian kernel value for the given x coordinate and sigma value
803 static float gaussian_for(int x, float sigma) {
804  return 1.0f / expf(((float)x * (float)x) / (2.0f * sigma * sigma));
805 }
806 
807 // Makes a normalized gaussian kernel of the given length for the given sigma
808 // and places it in gauss_kernel
809 static void make_gauss_kernel(float *gauss_kernel, float length, float sigma)
810 {
811  float gauss_sum = 0;
812  int window_half = length / 2;
813 
814  for (int i = 0; i < length; ++i) {
815  float val = gaussian_for(i - window_half, sigma);
816 
817  gauss_sum += val;
818  gauss_kernel[i] = val;
819  }
820 
821  // Normalize the gaussian values
822  for (int i = 0; i < length; ++i) {
823  gauss_kernel[i] /= gauss_sum;
824  }
825 }
826 
827 // Returns indices to start and end iteration at in order to iterate over a window
828 // of length size centered at the current frame in a ringbuffer
829 //
830 // Always returns numbers that result in a window of length size, even if that
831 // means specifying negative indices or indices past the end of the values in the
832 // ringbuffers. Make sure you clip indices appropriately within your loop.
833 static IterIndices start_end_for(DeshakeOpenCLContext *deshake_ctx, int length) {
834  IterIndices indices;
835 
836  indices.start = deshake_ctx->abs_motion.curr_frame_offset - (length / 2);
837  indices.end = deshake_ctx->abs_motion.curr_frame_offset + (length / 2) + (length % 2);
838 
839  return indices;
840 }
841 
842 // Sets val to the value in the given ringbuffer at the given offset, taking care of
843 // clipping the offset into the appropriate range
844 static void ringbuf_float_at(
845  DeshakeOpenCLContext *deshake_ctx,
846  AVFifo *values,
847  float *val,
848  int offset
849 ) {
850  int clip_start, clip_end, offset_clipped;
851  if (deshake_ctx->abs_motion.data_end_offset != -1) {
852  clip_end = deshake_ctx->abs_motion.data_end_offset;
853  } else {
854  // This expression represents the last valid index in the buffer,
855  // which we use repeatedly at the end of the video.
856  clip_end = deshake_ctx->smooth_window - av_fifo_can_write(values) - 1;
857  }
858 
859  if (deshake_ctx->abs_motion.data_start_offset != -1) {
860  clip_start = deshake_ctx->abs_motion.data_start_offset;
861  } else {
862  // Negative indices will occur at the start of the video, and we want
863  // them to be clipped to 0 in order to repeatedly use the position of
864  // the first frame.
865  clip_start = 0;
866  }
867 
868  offset_clipped = av_clip(
869  offset,
870  clip_start,
871  clip_end
872  );
873 
874  av_fifo_peek(values, val, 1, offset_clipped);
875 }
876 
877 // Returns smoothed current frame value of the given buffer of floats based on the
878 // given Gaussian kernel and its length (also the window length, centered around the
879 // current frame) and the "maximum value" of the motion.
880 //
881 // This "maximum value" should be the width / height of the image in the case of
882 // translation and an empirically chosen constant for rotation / scale.
883 //
884 // The sigma chosen to generate the final gaussian kernel with used to smooth the
885 // camera path is either hardcoded (set by user, deshake_ctx->smooth_percent) or
886 // adaptively chosen.
887 static float smooth(
888  DeshakeOpenCLContext *deshake_ctx,
889  float *gauss_kernel,
890  int length,
891  float max_val,
892  AVFifo *values
893 ) {
894  float new_large_s = 0, new_small_s = 0, new_best = 0, old, diff_between,
895  percent_of_max, inverted_percent;
896  IterIndices indices = start_end_for(deshake_ctx, length);
897  float large_sigma = 40.0f;
898  float small_sigma = 2.0f;
899  float best_sigma;
900 
901  if (deshake_ctx->smooth_percent) {
902  best_sigma = (large_sigma - 0.5f) * deshake_ctx->smooth_percent + 0.5f;
903  } else {
904  // Strategy to adaptively smooth trajectory:
905  //
906  // 1. Smooth path with large and small sigma values
907  // 2. Take the absolute value of the difference between them
908  // 3. Get a percentage by putting the difference over the "max value"
909  // 4, Invert the percentage
910  // 5. Calculate a new sigma value weighted towards the larger sigma value
911  // 6. Determine final smoothed trajectory value using that sigma
912 
913  make_gauss_kernel(gauss_kernel, length, large_sigma);
914  for (int i = indices.start, j = 0; i < indices.end; ++i, ++j) {
915  ringbuf_float_at(deshake_ctx, values, &old, i);
916  new_large_s += old * gauss_kernel[j];
917  }
918 
919  make_gauss_kernel(gauss_kernel, length, small_sigma);
920  for (int i = indices.start, j = 0; i < indices.end; ++i, ++j) {
921  ringbuf_float_at(deshake_ctx, values, &old, i);
922  new_small_s += old * gauss_kernel[j];
923  }
924 
925  diff_between = fabsf(new_large_s - new_small_s);
926  percent_of_max = diff_between / max_val;
927  inverted_percent = 1 - percent_of_max;
928  best_sigma = large_sigma * powf(inverted_percent, 40);
929  }
930 
931  make_gauss_kernel(gauss_kernel, length, best_sigma);
932  for (int i = indices.start, j = 0; i < indices.end; ++i, ++j) {
933  ringbuf_float_at(deshake_ctx, values, &old, i);
934  new_best += old * gauss_kernel[j];
935  }
936 
937  return new_best;
938 }
939 
940 // Returns the position of the given point after the transform is applied
941 static cl_float2 transformed_point(float x, float y, float *transform) {
942  cl_float2 ret;
943 
944  ret.s[0] = x * transform[0] + y * transform[1] + transform[2];
945  ret.s[1] = x * transform[3] + y * transform[4] + transform[5];
946 
947  return ret;
948 }
949 
950 // Creates an affine transform that scales from the center of a frame
952  float x_shift,
953  float y_shift,
954  float angle,
955  float scale_x,
956  float scale_y,
957  float center_w,
958  float center_h,
959  float *matrix
960 ) {
961  cl_float2 center_s;
962  float center_s_w, center_s_h;
963 
965  0,
966  0,
967  0,
968  scale_x,
969  scale_y,
970  matrix
971  );
972 
973  center_s = transformed_point(center_w, center_h, matrix);
974  center_s_w = center_w - center_s.s[0];
975  center_s_h = center_h - center_s.s[1];
976 
978  x_shift + center_s_w,
979  y_shift + center_s_h,
980  angle,
981  scale_x,
982  scale_y,
983  matrix
984  );
985 }
986 
987 // Determines the crop necessary to eliminate black borders from a smoothed frame
988 // and updates target crop accordingly
989 static void update_needed_crop(
990  CropInfo* crop,
991  float *transform,
992  float frame_width,
993  float frame_height
994 ) {
995  float new_width, new_height, adjusted_width, adjusted_height, adjusted_x, adjusted_y;
996 
997  cl_float2 top_left = transformed_point(0, 0, transform);
998  cl_float2 top_right = transformed_point(frame_width, 0, transform);
999  cl_float2 bottom_left = transformed_point(0, frame_height, transform);
1000  cl_float2 bottom_right = transformed_point(frame_width, frame_height, transform);
1001  float ar_h = frame_height / frame_width;
1002  float ar_w = frame_width / frame_height;
1003 
1004  if (crop->bottom_right.s[0] == 0) {
1005  // The crop hasn't been set to the original size of the plane
1006  crop->bottom_right.s[0] = frame_width;
1007  crop->bottom_right.s[1] = frame_height;
1008  }
1009 
1010  crop->top_left.s[0] = FFMAX3(
1011  crop->top_left.s[0],
1012  top_left.s[0],
1013  bottom_left.s[0]
1014  );
1015 
1016  crop->top_left.s[1] = FFMAX3(
1017  crop->top_left.s[1],
1018  top_left.s[1],
1019  top_right.s[1]
1020  );
1021 
1022  crop->bottom_right.s[0] = FFMIN3(
1023  crop->bottom_right.s[0],
1024  bottom_right.s[0],
1025  top_right.s[0]
1026  );
1027 
1028  crop->bottom_right.s[1] = FFMIN3(
1029  crop->bottom_right.s[1],
1030  bottom_right.s[1],
1031  bottom_left.s[1]
1032  );
1033 
1034  // Make sure our potentially new bounding box has the same aspect ratio
1035  new_height = crop->bottom_right.s[1] - crop->top_left.s[1];
1036  new_width = crop->bottom_right.s[0] - crop->top_left.s[0];
1037 
1038  adjusted_width = new_height * ar_w;
1039  adjusted_x = crop->bottom_right.s[0] - adjusted_width;
1040 
1041  if (adjusted_x >= crop->top_left.s[0]) {
1042  crop->top_left.s[0] = adjusted_x;
1043  } else {
1044  adjusted_height = new_width * ar_h;
1045  adjusted_y = crop->bottom_right.s[1] - adjusted_height;
1046  crop->top_left.s[1] = adjusted_y;
1047  }
1048 }
1049 
1051 {
1052  DeshakeOpenCLContext *ctx = avctx->priv;
1053  cl_int cle;
1054 
1055  for (int i = 0; i < RingbufCount; i++)
1056  av_fifo_freep2(&ctx->abs_motion.ringbuffers[i]);
1057 
1058  if (ctx->debug_on)
1059  free_debug_matches(&ctx->abs_motion);
1060 
1061  if (ctx->gauss_kernel)
1062  av_freep(&ctx->gauss_kernel);
1063 
1064  if (ctx->ransac_err)
1065  av_freep(&ctx->ransac_err);
1066 
1067  if (ctx->matches_host)
1068  av_freep(&ctx->matches_host);
1069 
1070  if (ctx->matches_contig_host)
1071  av_freep(&ctx->matches_contig_host);
1072 
1073  if (ctx->inliers)
1074  av_freep(&ctx->inliers);
1075 
1076  ff_framequeue_free(&ctx->fq);
1077 
1078  CL_RELEASE_KERNEL(ctx->kernel_grayscale);
1079  CL_RELEASE_KERNEL(ctx->kernel_harris_response);
1080  CL_RELEASE_KERNEL(ctx->kernel_refine_features);
1081  CL_RELEASE_KERNEL(ctx->kernel_brief_descriptors);
1082  CL_RELEASE_KERNEL(ctx->kernel_match_descriptors);
1083  CL_RELEASE_KERNEL(ctx->kernel_crop_upscale);
1084  if (ctx->debug_on)
1085  CL_RELEASE_KERNEL(ctx->kernel_draw_debug_info);
1086 
1087  CL_RELEASE_QUEUE(ctx->command_queue);
1088 
1089  if (!ctx->is_yuv)
1090  CL_RELEASE_MEMORY(ctx->grayscale);
1091  CL_RELEASE_MEMORY(ctx->harris_buf);
1092  CL_RELEASE_MEMORY(ctx->refined_features);
1093  CL_RELEASE_MEMORY(ctx->prev_refined_features);
1094  CL_RELEASE_MEMORY(ctx->brief_pattern);
1095  CL_RELEASE_MEMORY(ctx->descriptors);
1096  CL_RELEASE_MEMORY(ctx->prev_descriptors);
1097  CL_RELEASE_MEMORY(ctx->matches);
1098  CL_RELEASE_MEMORY(ctx->matches_contig);
1099  CL_RELEASE_MEMORY(ctx->transform_y);
1100  CL_RELEASE_MEMORY(ctx->transform_uv);
1101  if (ctx->debug_on) {
1102  CL_RELEASE_MEMORY(ctx->debug_matches);
1103  CL_RELEASE_MEMORY(ctx->debug_model_matches);
1104  }
1105 
1106  ff_opencl_filter_uninit(avctx);
1107 }
1108 
1110 {
1111  DeshakeOpenCLContext *ctx = avctx->priv;
1112  AVFilterLink *outlink = avctx->outputs[0];
1113  AVFilterLink *inlink = avctx->inputs[0];
1115  // Pointer to the host-side pattern buffer to be initialized and then copied
1116  // to the GPU
1117  PointPair *pattern_host = NULL;
1118  cl_int cle;
1119  int err;
1120  cl_ulong8 zeroed_ulong8;
1121  FFFrameQueueGlobal fqg;
1122  cl_image_format grayscale_format;
1123  cl_image_desc grayscale_desc;
1124  cl_command_queue_properties queue_props;
1125 
1126  const enum AVPixelFormat disallowed_formats[14] = {
1141  };
1142 
1143  // Number of elements for an array
1144  const int image_grid_32 = ROUNDED_UP_DIV(outlink->h, 32) * ROUNDED_UP_DIV(outlink->w, 32);
1145 
1146  const int descriptor_buf_size = image_grid_32 * (BREIFN / 8);
1147  const int features_buf_size = image_grid_32 * sizeof(cl_float2);
1148 
1149  const AVHWFramesContext *hw_frames_ctx = (AVHWFramesContext*)inl->hw_frames_ctx->data;
1150  const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hw_frames_ctx->sw_format);
1151 
1152  av_assert0(hw_frames_ctx);
1153  av_assert0(desc);
1154 
1156  ff_framequeue_init(&ctx->fq, &fqg);
1157  ctx->eof = 0;
1158  ctx->smooth_window = (int)(av_q2d(inl->frame_rate) * ctx->smooth_window_multiplier);
1159  ctx->curr_frame = 0;
1160 
1161  memset(&zeroed_ulong8, 0, sizeof(cl_ulong8));
1162 
1163  ctx->gauss_kernel = av_malloc_array(ctx->smooth_window, sizeof(float));
1164  if (!ctx->gauss_kernel) {
1165  err = AVERROR(ENOMEM);
1166  goto fail;
1167  }
1168 
1169  ctx->ransac_err = av_malloc_array(MATCHES_CONTIG_SIZE, sizeof(float));
1170  if (!ctx->ransac_err) {
1171  err = AVERROR(ENOMEM);
1172  goto fail;
1173  }
1174 
1175  for (int i = 0; i < RingbufCount; i++) {
1176  ctx->abs_motion.ringbuffers[i] = av_fifo_alloc2(ctx->smooth_window,
1177  sizeof(float), 0);
1178 
1179  if (!ctx->abs_motion.ringbuffers[i]) {
1180  err = AVERROR(ENOMEM);
1181  goto fail;
1182  }
1183  }
1184 
1185  if (ctx->debug_on) {
1186  ctx->abs_motion.debug_matches = av_fifo_alloc2(
1187  ctx->smooth_window / 2,
1188  sizeof(DebugMatches), 0
1189  );
1190 
1191  if (!ctx->abs_motion.debug_matches) {
1192  err = AVERROR(ENOMEM);
1193  goto fail;
1194  }
1195  }
1196 
1197  ctx->abs_motion.curr_frame_offset = 0;
1198  ctx->abs_motion.data_start_offset = -1;
1199  ctx->abs_motion.data_end_offset = -1;
1200 
1201  pattern_host = av_malloc_array(BREIFN, sizeof(PointPair));
1202  if (!pattern_host) {
1203  err = AVERROR(ENOMEM);
1204  goto fail;
1205  }
1206 
1207  ctx->matches_host = av_malloc_array(image_grid_32, sizeof(MotionVector));
1208  if (!ctx->matches_host) {
1209  err = AVERROR(ENOMEM);
1210  goto fail;
1211  }
1212 
1213  ctx->matches_contig_host = av_malloc_array(MATCHES_CONTIG_SIZE, sizeof(MotionVector));
1214  if (!ctx->matches_contig_host) {
1215  err = AVERROR(ENOMEM);
1216  goto fail;
1217  }
1218 
1219  ctx->inliers = av_malloc_array(MATCHES_CONTIG_SIZE, sizeof(MotionVector));
1220  if (!ctx->inliers) {
1221  err = AVERROR(ENOMEM);
1222  goto fail;
1223  }
1224 
1225  // Initializing the patch pattern for building BREIF descriptors with
1226  av_lfg_init(&ctx->alfg, 234342424);
1227  for (int i = 0; i < BREIFN; ++i) {
1228  PointPair pair;
1229 
1230  for (int j = 0; j < 2; ++j) {
1231  pair.p1.s[j] = rand_in(-BRIEF_PATCH_SIZE_HALF, BRIEF_PATCH_SIZE_HALF + 1, &ctx->alfg);
1232  pair.p2.s[j] = rand_in(-BRIEF_PATCH_SIZE_HALF, BRIEF_PATCH_SIZE_HALF + 1, &ctx->alfg);
1233  }
1234 
1235  pattern_host[i] = pair;
1236  }
1237 
1238  for (int i = 0; i < 14; i++) {
1239  if (ctx->sw_format == disallowed_formats[i]) {
1240  av_log(avctx, AV_LOG_ERROR, "unsupported format in deshake_opencl.\n");
1241  err = AVERROR(ENOSYS);
1242  goto fail;
1243  }
1244  }
1245 
1246  if (desc->flags & AV_PIX_FMT_FLAG_RGB) {
1247  ctx->is_yuv = 0;
1248  } else {
1249  ctx->is_yuv = 1;
1250  }
1251  ctx->sw_format = hw_frames_ctx->sw_format;
1252 
1254  if (err < 0)
1255  goto fail;
1256 
1257  if (ctx->debug_on) {
1258  queue_props = CL_QUEUE_PROFILING_ENABLE;
1259  } else {
1260  queue_props = 0;
1261  }
1262  ctx->command_queue = clCreateCommandQueue(
1263  ctx->ocf.hwctx->context,
1264  ctx->ocf.hwctx->device_id,
1265  queue_props,
1266  &cle
1267  );
1268  CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create OpenCL command queue %d.\n", cle);
1269 
1270  CL_CREATE_KERNEL(ctx, grayscale);
1271  CL_CREATE_KERNEL(ctx, harris_response);
1272  CL_CREATE_KERNEL(ctx, refine_features);
1273  CL_CREATE_KERNEL(ctx, brief_descriptors);
1274  CL_CREATE_KERNEL(ctx, match_descriptors);
1276  CL_CREATE_KERNEL(ctx, crop_upscale);
1277  if (ctx->debug_on)
1278  CL_CREATE_KERNEL(ctx, draw_debug_info);
1279 
1280  if (!ctx->is_yuv) {
1281  grayscale_format.image_channel_order = CL_R;
1282  grayscale_format.image_channel_data_type = CL_FLOAT;
1283 
1284  grayscale_desc = (cl_image_desc) {
1285  .image_type = CL_MEM_OBJECT_IMAGE2D,
1286  .image_width = outlink->w,
1287  .image_height = outlink->h,
1288  .image_depth = 0,
1289  .image_array_size = 0,
1290  .image_row_pitch = 0,
1291  .image_slice_pitch = 0,
1292  .num_mip_levels = 0,
1293  .num_samples = 0,
1294  .buffer = NULL,
1295  };
1296 
1297  ctx->grayscale = clCreateImage(
1298  ctx->ocf.hwctx->context,
1299  0,
1300  &grayscale_format,
1301  &grayscale_desc,
1302  NULL,
1303  &cle
1304  );
1305  CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create grayscale image: %d.\n", cle);
1306  }
1307 
1308  CL_CREATE_BUFFER(ctx, harris_buf, outlink->h * outlink->w * sizeof(float));
1309  CL_CREATE_BUFFER(ctx, refined_features, features_buf_size);
1310  CL_CREATE_BUFFER(ctx, prev_refined_features, features_buf_size);
1312  ctx,
1313  brief_pattern,
1314  CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
1315  BREIFN * sizeof(PointPair),
1316  pattern_host
1317  );
1318  CL_CREATE_BUFFER(ctx, descriptors, descriptor_buf_size);
1319  CL_CREATE_BUFFER(ctx, prev_descriptors, descriptor_buf_size);
1320  CL_CREATE_BUFFER(ctx, matches, image_grid_32 * sizeof(MotionVector));
1321  CL_CREATE_BUFFER(ctx, matches_contig, MATCHES_CONTIG_SIZE * sizeof(MotionVector));
1322  CL_CREATE_BUFFER(ctx, transform_y, 9 * sizeof(float));
1323  CL_CREATE_BUFFER(ctx, transform_uv, 9 * sizeof(float));
1324  if (ctx->debug_on) {
1325  CL_CREATE_BUFFER(ctx, debug_matches, MATCHES_CONTIG_SIZE * sizeof(MotionVector));
1326  CL_CREATE_BUFFER(ctx, debug_model_matches, 3 * sizeof(MotionVector));
1327  }
1328 
1329  ctx->initialized = 1;
1330  av_freep(&pattern_host);
1331 
1332  return 0;
1333 
1334 fail:
1335  av_freep(&pattern_host);
1336  return err;
1337 }
1338 
1339 // Logs debug information about the transform data
1340 static void transform_debug(AVFilterContext *avctx, float *new_vals, float *old_vals, int curr_frame) {
1341  av_log(avctx, AV_LOG_VERBOSE,
1342  "Frame %d:\n"
1343  "\tframe moved from: %f x, %f y\n"
1344  "\t to: %f x, %f y\n"
1345  "\t rotated from: %f degrees\n"
1346  "\t to: %f degrees\n"
1347  "\t scaled from: %f x, %f y\n"
1348  "\t to: %f x, %f y\n"
1349  "\n"
1350  "\tframe moved by: %f x, %f y\n"
1351  "\t rotated by: %f degrees\n"
1352  "\t scaled by: %f x, %f y\n",
1353  curr_frame,
1354  old_vals[RingbufX], old_vals[RingbufY],
1355  new_vals[RingbufX], new_vals[RingbufY],
1356  old_vals[RingbufRot] * (180.0 / M_PI),
1357  new_vals[RingbufRot] * (180.0 / M_PI),
1358  old_vals[RingbufScaleX], old_vals[RingbufScaleY],
1359  new_vals[RingbufScaleX], new_vals[RingbufScaleY],
1360  old_vals[RingbufX] - new_vals[RingbufX], old_vals[RingbufY] - new_vals[RingbufY],
1361  old_vals[RingbufRot] * (180.0 / M_PI) - new_vals[RingbufRot] * (180.0 / M_PI),
1362  new_vals[RingbufScaleX] / old_vals[RingbufScaleX], new_vals[RingbufScaleY] / old_vals[RingbufScaleY]
1363  );
1364 }
1365 
1366 // Uses the buffered motion information to determine a transform that smooths the
1367 // given frame and applies it
1368 static int filter_frame(AVFilterLink *link, AVFrame *input_frame)
1369 {
1370  AVFilterContext *avctx = link->dst;
1371  AVFilterLink *outlink = avctx->outputs[0];
1372  FilterLink *outl = ff_filter_link(outlink);
1373  DeshakeOpenCLContext *deshake_ctx = avctx->priv;
1374  AVFrame *cropped_frame = NULL, *transformed_frame = NULL;
1375  int err;
1376  cl_int cle;
1377  float new_vals[RingbufCount];
1378  float old_vals[RingbufCount];
1379  // Luma (in the case of YUV) transform, or just the transform in the case of RGB
1380  float transform_y[9];
1381  // Chroma transform
1382  float transform_uv[9];
1383  // Luma crop transform (or RGB)
1384  float transform_crop_y[9];
1385  // Chroma crop transform
1386  float transform_crop_uv[9];
1387  float transform_debug_rgb[9];
1388  size_t global_work[2];
1389  int64_t duration;
1390  cl_mem src, transformed, dst;
1393  cl_event transform_event, crop_upscale_event;
1394  DebugMatches debug_matches;
1395  cl_int num_model_matches;
1396 
1397  const float center_w = (float)input_frame->width / 2;
1398  const float center_h = (float)input_frame->height / 2;
1399 
1400  const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(deshake_ctx->sw_format);
1401  const int chroma_width = AV_CEIL_RSHIFT(input_frame->width, desc->log2_chroma_w);
1402  const int chroma_height = AV_CEIL_RSHIFT(input_frame->height, desc->log2_chroma_h);
1403 
1404  const float center_w_chroma = (float)chroma_width / 2;
1405  const float center_h_chroma = (float)chroma_height / 2;
1406 
1407  const float luma_w_over_chroma_w = ((float)input_frame->width / (float)chroma_width);
1408  const float luma_h_over_chroma_h = ((float)input_frame->height / (float)chroma_height);
1409 
1410  if (deshake_ctx->debug_on) {
1411  av_fifo_read(
1412  deshake_ctx->abs_motion.debug_matches,
1413  &debug_matches, 1);
1414  }
1415 
1416  if (input_frame->duration) {
1417  duration = input_frame->duration;
1418  } else {
1419  duration = av_rescale_q(1, av_inv_q(outl->frame_rate), outlink->time_base);
1420  }
1421  deshake_ctx->duration = input_frame->pts + duration;
1422 
1423  // Get the absolute transform data for this frame
1424  for (int i = 0; i < RingbufCount; i++) {
1425  av_fifo_peek(deshake_ctx->abs_motion.ringbuffers[i],
1426  &old_vals[i], 1,
1427  deshake_ctx->abs_motion.curr_frame_offset);
1428  }
1429 
1430  if (deshake_ctx->tripod_mode) {
1431  // If tripod mode is turned on we simply undo all motion relative to the
1432  // first frame
1433 
1434  new_vals[RingbufX] = 0.0f;
1435  new_vals[RingbufY] = 0.0f;
1436  new_vals[RingbufRot] = 0.0f;
1437  new_vals[RingbufScaleX] = 1.0f;
1438  new_vals[RingbufScaleY] = 1.0f;
1439  } else {
1440  // Tripod mode is off and we need to smooth a moving camera
1441 
1442  new_vals[RingbufX] = smooth(
1443  deshake_ctx,
1444  deshake_ctx->gauss_kernel,
1445  deshake_ctx->smooth_window,
1446  input_frame->width,
1447  deshake_ctx->abs_motion.ringbuffers[RingbufX]
1448  );
1449  new_vals[RingbufY] = smooth(
1450  deshake_ctx,
1451  deshake_ctx->gauss_kernel,
1452  deshake_ctx->smooth_window,
1453  input_frame->height,
1454  deshake_ctx->abs_motion.ringbuffers[RingbufY]
1455  );
1456  new_vals[RingbufRot] = smooth(
1457  deshake_ctx,
1458  deshake_ctx->gauss_kernel,
1459  deshake_ctx->smooth_window,
1460  M_PI / 4,
1461  deshake_ctx->abs_motion.ringbuffers[RingbufRot]
1462  );
1463  new_vals[RingbufScaleX] = smooth(
1464  deshake_ctx,
1465  deshake_ctx->gauss_kernel,
1466  deshake_ctx->smooth_window,
1467  2.0f,
1468  deshake_ctx->abs_motion.ringbuffers[RingbufScaleX]
1469  );
1470  new_vals[RingbufScaleY] = smooth(
1471  deshake_ctx,
1472  deshake_ctx->gauss_kernel,
1473  deshake_ctx->smooth_window,
1474  2.0f,
1475  deshake_ctx->abs_motion.ringbuffers[RingbufScaleY]
1476  );
1477  }
1478 
1480  old_vals[RingbufX] - new_vals[RingbufX],
1481  old_vals[RingbufY] - new_vals[RingbufY],
1482  old_vals[RingbufRot] - new_vals[RingbufRot],
1483  new_vals[RingbufScaleX] / old_vals[RingbufScaleX],
1484  new_vals[RingbufScaleY] / old_vals[RingbufScaleY],
1485  center_w,
1486  center_h,
1487  transform_y
1488  );
1489 
1491  (old_vals[RingbufX] - new_vals[RingbufX]) / luma_w_over_chroma_w,
1492  (old_vals[RingbufY] - new_vals[RingbufY]) / luma_h_over_chroma_h,
1493  old_vals[RingbufRot] - new_vals[RingbufRot],
1494  new_vals[RingbufScaleX] / old_vals[RingbufScaleX],
1495  new_vals[RingbufScaleY] / old_vals[RingbufScaleY],
1496  center_w_chroma,
1497  center_h_chroma,
1498  transform_uv
1499  );
1500 
1501  CL_BLOCKING_WRITE_BUFFER(deshake_ctx->command_queue, deshake_ctx->transform_y, 9 * sizeof(float), transform_y, NULL);
1502  CL_BLOCKING_WRITE_BUFFER(deshake_ctx->command_queue, deshake_ctx->transform_uv, 9 * sizeof(float), transform_uv, NULL);
1503 
1504  if (deshake_ctx->debug_on)
1505  transform_debug(avctx, new_vals, old_vals, deshake_ctx->curr_frame);
1506 
1507  cropped_frame = ff_get_video_buffer(outlink, outlink->w, outlink->h);
1508  if (!cropped_frame) {
1509  err = AVERROR(ENOMEM);
1510  goto fail;
1511  }
1512 
1513  transformed_frame = ff_get_video_buffer(outlink, outlink->w, outlink->h);
1514  if (!transformed_frame) {
1515  err = AVERROR(ENOMEM);
1516  goto fail;
1517  }
1518 
1519  transforms[0] = deshake_ctx->transform_y;
1520  transforms[1] = transforms[2] = deshake_ctx->transform_uv;
1521 
1522  for (int p = 0; p < AV_VIDEO_MAX_PLANES; p++) {
1523  // Transform all of the planes appropriately
1524  src = (cl_mem)input_frame->data[p];
1525  transformed = (cl_mem)transformed_frame->data[p];
1526 
1527  if (!transformed)
1528  break;
1529 
1530  err = ff_opencl_filter_work_size_from_image(avctx, global_work, input_frame, p, 0);
1531  if (err < 0)
1532  goto fail;
1533 
1535  deshake_ctx->command_queue,
1536  deshake_ctx->kernel_transform,
1537  global_work,
1538  NULL,
1539  &transform_event,
1540  { sizeof(cl_mem), &src },
1541  { sizeof(cl_mem), &transformed },
1542  { sizeof(cl_mem), &transforms[p] },
1543  );
1544  }
1545 
1546  if (deshake_ctx->debug_on && !deshake_ctx->is_yuv && debug_matches.num_matches > 0) {
1548  deshake_ctx->command_queue,
1549  deshake_ctx->debug_matches,
1550  debug_matches.num_matches * sizeof(MotionVector),
1551  debug_matches.matches,
1552  NULL
1553  );
1554 
1556  deshake_ctx->command_queue,
1557  deshake_ctx->debug_model_matches,
1558  debug_matches.num_model_matches * sizeof(MotionVector),
1559  debug_matches.model_matches,
1560  NULL
1561  );
1562 
1563  num_model_matches = debug_matches.num_model_matches;
1564 
1565  // Invert the transform
1567  new_vals[RingbufX] - old_vals[RingbufX],
1568  new_vals[RingbufY] - old_vals[RingbufY],
1569  new_vals[RingbufRot] - old_vals[RingbufRot],
1570  old_vals[RingbufScaleX] / new_vals[RingbufScaleX],
1571  old_vals[RingbufScaleY] / new_vals[RingbufScaleY],
1572  center_w,
1573  center_h,
1574  transform_debug_rgb
1575  );
1576 
1577  CL_BLOCKING_WRITE_BUFFER(deshake_ctx->command_queue, deshake_ctx->transform_y, 9 * sizeof(float), transform_debug_rgb, NULL);
1578 
1579  transformed = (cl_mem)transformed_frame->data[0];
1581  deshake_ctx->command_queue,
1582  deshake_ctx->kernel_draw_debug_info,
1583  (size_t[]){ debug_matches.num_matches },
1584  NULL,
1585  NULL,
1586  { sizeof(cl_mem), &transformed },
1587  { sizeof(cl_mem), &deshake_ctx->debug_matches },
1588  { sizeof(cl_mem), &deshake_ctx->debug_model_matches },
1589  { sizeof(cl_int), &num_model_matches },
1590  { sizeof(cl_mem), &deshake_ctx->transform_y }
1591  );
1592  }
1593 
1594  if (deshake_ctx->should_crop) {
1595  // Generate transforms for cropping
1597  (old_vals[RingbufX] - new_vals[RingbufX]) / 5,
1598  (old_vals[RingbufY] - new_vals[RingbufY]) / 5,
1599  (old_vals[RingbufRot] - new_vals[RingbufRot]) / 5,
1600  new_vals[RingbufScaleX] / old_vals[RingbufScaleX],
1601  new_vals[RingbufScaleY] / old_vals[RingbufScaleY],
1602  center_w,
1603  center_h,
1604  transform_crop_y
1605  );
1606  update_needed_crop(&deshake_ctx->crop_y, transform_crop_y, input_frame->width, input_frame->height);
1607 
1609  (old_vals[RingbufX] - new_vals[RingbufX]) / (5 * luma_w_over_chroma_w),
1610  (old_vals[RingbufY] - new_vals[RingbufY]) / (5 * luma_h_over_chroma_h),
1611  (old_vals[RingbufRot] - new_vals[RingbufRot]) / 5,
1612  new_vals[RingbufScaleX] / old_vals[RingbufScaleX],
1613  new_vals[RingbufScaleY] / old_vals[RingbufScaleY],
1614  center_w_chroma,
1615  center_h_chroma,
1616  transform_crop_uv
1617  );
1618  update_needed_crop(&deshake_ctx->crop_uv, transform_crop_uv, chroma_width, chroma_height);
1619 
1620  crops[0] = deshake_ctx->crop_y;
1621  crops[1] = crops[2] = deshake_ctx->crop_uv;
1622 
1623  for (int p = 0; p < AV_VIDEO_MAX_PLANES; p++) {
1624  // Crop all of the planes appropriately
1625  dst = (cl_mem)cropped_frame->data[p];
1626  transformed = (cl_mem)transformed_frame->data[p];
1627 
1628  if (!dst)
1629  break;
1630 
1631  err = ff_opencl_filter_work_size_from_image(avctx, global_work, input_frame, p, 0);
1632  if (err < 0)
1633  goto fail;
1634 
1636  deshake_ctx->command_queue,
1637  deshake_ctx->kernel_crop_upscale,
1638  global_work,
1639  NULL,
1640  &crop_upscale_event,
1641  { sizeof(cl_mem), &transformed },
1642  { sizeof(cl_mem), &dst },
1643  { sizeof(cl_float2), &crops[p].top_left },
1644  { sizeof(cl_float2), &crops[p].bottom_right },
1645  );
1646  }
1647  }
1648 
1649  if (deshake_ctx->curr_frame < deshake_ctx->smooth_window / 2) {
1650  // This means we are somewhere at the start of the video. We need to
1651  // increment the current frame offset until it reaches the center of
1652  // the ringbuffers (as the current frame will be located there for
1653  // the rest of the video).
1654  //
1655  // The end of the video is taken care of by draining motion data
1656  // one-by-one out of the buffer, causing the (at that point fixed)
1657  // offset to move towards later frames' data.
1658  ++deshake_ctx->abs_motion.curr_frame_offset;
1659  }
1660 
1661  if (deshake_ctx->abs_motion.data_end_offset != -1) {
1662  // Keep the end offset in sync with the frame it's supposed to be
1663  // positioned at
1664  --deshake_ctx->abs_motion.data_end_offset;
1665 
1666  if (deshake_ctx->abs_motion.data_end_offset == deshake_ctx->abs_motion.curr_frame_offset - 1) {
1667  // The end offset would be the start of the new video sequence; flip to
1668  // start offset
1669  deshake_ctx->abs_motion.data_end_offset = -1;
1670  deshake_ctx->abs_motion.data_start_offset = deshake_ctx->abs_motion.curr_frame_offset;
1671  }
1672  } else if (deshake_ctx->abs_motion.data_start_offset != -1) {
1673  // Keep the start offset in sync with the frame it's supposed to be
1674  // positioned at
1675  --deshake_ctx->abs_motion.data_start_offset;
1676  }
1677 
1678  if (deshake_ctx->debug_on) {
1679  deshake_ctx->transform_time += ff_opencl_get_event_time(transform_event);
1680  if (deshake_ctx->should_crop) {
1681  deshake_ctx->crop_upscale_time += ff_opencl_get_event_time(crop_upscale_event);
1682  }
1683  }
1684 
1685  ++deshake_ctx->curr_frame;
1686 
1687  if (deshake_ctx->debug_on)
1688  av_freep(&debug_matches.matches);
1689 
1690  if (deshake_ctx->should_crop) {
1691  err = av_frame_copy_props(cropped_frame, input_frame);
1692  if (err < 0)
1693  goto fail;
1694 
1695  av_frame_free(&transformed_frame);
1696  av_frame_free(&input_frame);
1697  return ff_filter_frame(outlink, cropped_frame);
1698 
1699  } else {
1700  err = av_frame_copy_props(transformed_frame, input_frame);
1701  if (err < 0)
1702  goto fail;
1703 
1704  av_frame_free(&cropped_frame);
1705  av_frame_free(&input_frame);
1706  return ff_filter_frame(outlink, transformed_frame);
1707  }
1708 
1709 fail:
1710  clFinish(deshake_ctx->command_queue);
1711 
1712  if (deshake_ctx->debug_on)
1713  if (debug_matches.matches)
1714  av_freep(&debug_matches.matches);
1715 
1716  av_frame_free(&input_frame);
1717  av_frame_free(&transformed_frame);
1718  av_frame_free(&cropped_frame);
1719  return err;
1720 }
1721 
1722 // Add the given frame to the frame queue to eventually be processed.
1723 //
1724 // Also determines the motion from the previous frame and updates the stored
1725 // motion information accordingly.
1726 static int queue_frame(AVFilterLink *link, AVFrame *input_frame)
1727 {
1728  AVFilterContext *avctx = link->dst;
1729  DeshakeOpenCLContext *deshake_ctx = avctx->priv;
1730  int err;
1731  int num_vectors;
1732  int num_inliers = 0;
1733  cl_int cle;
1734  FrameDelta relative;
1735  SimilarityMatrix model;
1736  size_t global_work[2];
1737  size_t harris_global_work[2];
1738  size_t grid_32_global_work[2];
1739  int grid_32_h, grid_32_w;
1740  size_t local_work[2];
1741  cl_mem src, temp;
1742  float prev_vals[5];
1743  float new_vals[5];
1744  cl_event grayscale_event, harris_response_event, refine_features_event,
1745  brief_event, match_descriptors_event, read_buf_event;
1746  DebugMatches debug_matches;
1747 
1748  num_vectors = 0;
1749 
1750  local_work[0] = 8;
1751  local_work[1] = 8;
1752 
1753  err = ff_opencl_filter_work_size_from_image(avctx, global_work, input_frame, 0, 0);
1754  if (err < 0)
1755  goto fail;
1756 
1757  err = ff_opencl_filter_work_size_from_image(avctx, harris_global_work, input_frame, 0, 8);
1758  if (err < 0)
1759  goto fail;
1760 
1761  err = ff_opencl_filter_work_size_from_image(avctx, grid_32_global_work, input_frame, 0, 32);
1762  if (err < 0)
1763  goto fail;
1764 
1765  // We want a single work-item for each 32x32 block of pixels in the input frame
1766  grid_32_global_work[0] /= 32;
1767  grid_32_global_work[1] /= 32;
1768 
1769  grid_32_h = ROUNDED_UP_DIV(input_frame->height, 32);
1770  grid_32_w = ROUNDED_UP_DIV(input_frame->width, 32);
1771 
1772  if (deshake_ctx->is_yuv) {
1773  deshake_ctx->grayscale = (cl_mem)input_frame->data[0];
1774  } else {
1775  src = (cl_mem)input_frame->data[0];
1776 
1778  deshake_ctx->command_queue,
1779  deshake_ctx->kernel_grayscale,
1780  global_work,
1781  NULL,
1782  &grayscale_event,
1783  { sizeof(cl_mem), &src },
1784  { sizeof(cl_mem), &deshake_ctx->grayscale }
1785  );
1786  }
1787 
1789  deshake_ctx->command_queue,
1790  deshake_ctx->kernel_harris_response,
1791  harris_global_work,
1792  local_work,
1793  &harris_response_event,
1794  { sizeof(cl_mem), &deshake_ctx->grayscale },
1795  { sizeof(cl_mem), &deshake_ctx->harris_buf }
1796  );
1797 
1799  deshake_ctx->command_queue,
1800  deshake_ctx->kernel_refine_features,
1801  grid_32_global_work,
1802  NULL,
1803  &refine_features_event,
1804  { sizeof(cl_mem), &deshake_ctx->grayscale },
1805  { sizeof(cl_mem), &deshake_ctx->harris_buf },
1806  { sizeof(cl_mem), &deshake_ctx->refined_features },
1807  { sizeof(cl_int), &deshake_ctx->refine_features }
1808  );
1809 
1811  deshake_ctx->command_queue,
1812  deshake_ctx->kernel_brief_descriptors,
1813  grid_32_global_work,
1814  NULL,
1815  &brief_event,
1816  { sizeof(cl_mem), &deshake_ctx->grayscale },
1817  { sizeof(cl_mem), &deshake_ctx->refined_features },
1818  { sizeof(cl_mem), &deshake_ctx->descriptors },
1819  { sizeof(cl_mem), &deshake_ctx->brief_pattern}
1820  );
1821 
1822  if (!av_fifo_can_read(deshake_ctx->abs_motion.ringbuffers[RingbufX])) {
1823  // This is the first frame we've been given to queue, meaning there is
1824  // no previous frame to match descriptors to
1825 
1826  goto no_motion_data;
1827  }
1828 
1830  deshake_ctx->command_queue,
1831  deshake_ctx->kernel_match_descriptors,
1832  grid_32_global_work,
1833  NULL,
1834  &match_descriptors_event,
1835  { sizeof(cl_mem), &deshake_ctx->prev_refined_features },
1836  { sizeof(cl_mem), &deshake_ctx->refined_features },
1837  { sizeof(cl_mem), &deshake_ctx->descriptors },
1838  { sizeof(cl_mem), &deshake_ctx->prev_descriptors },
1839  { sizeof(cl_mem), &deshake_ctx->matches }
1840  );
1841 
1842  cle = clEnqueueReadBuffer(
1843  deshake_ctx->command_queue,
1844  deshake_ctx->matches,
1845  CL_TRUE,
1846  0,
1847  grid_32_h * grid_32_w * sizeof(MotionVector),
1848  deshake_ctx->matches_host,
1849  0,
1850  NULL,
1851  &read_buf_event
1852  );
1853  CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to read matches to host: %d.\n", cle);
1854 
1855  num_vectors = make_vectors_contig(deshake_ctx, grid_32_h, grid_32_w);
1856 
1857  if (num_vectors < 10) {
1858  // Not enough matches to get reliable motion data for this frame
1859  //
1860  // From this point on all data is relative to this frame rather than the
1861  // original frame. We have to make sure that we don't mix values that were
1862  // relative to the original frame with the new values relative to this
1863  // frame when doing the gaussian smoothing. We keep track of where the old
1864  // values end using this data_end_offset field in order to accomplish
1865  // that goal.
1866  //
1867  // If no motion data is present for multiple frames in a short window of
1868  // time, we leave the end where it was to avoid mixing 0s in with the
1869  // old data (and just treat them all as part of the new values)
1870  if (deshake_ctx->abs_motion.data_end_offset == -1) {
1871  deshake_ctx->abs_motion.data_end_offset =
1872  av_fifo_can_read(deshake_ctx->abs_motion.ringbuffers[RingbufX]) - 1;
1873  }
1874 
1875  goto no_motion_data;
1876  }
1877 
1878  if (!estimate_affine_2d(
1879  deshake_ctx,
1880  deshake_ctx->matches_contig_host,
1881  &debug_matches,
1882  num_vectors,
1883  model.matrix,
1884  10.0,
1885  3000,
1886  0.999999999999
1887  )) {
1888  goto no_motion_data;
1889  }
1890 
1891  for (int i = 0; i < num_vectors; i++) {
1892  if (deshake_ctx->matches_contig_host[i].should_consider) {
1893  deshake_ctx->inliers[num_inliers] = deshake_ctx->matches_contig_host[i];
1894  num_inliers++;
1895  }
1896  }
1897 
1898  if (!minimize_error(
1899  deshake_ctx,
1900  deshake_ctx->inliers,
1901  &debug_matches,
1902  num_inliers,
1903  model.matrix,
1904  400
1905  )) {
1906  goto no_motion_data;
1907  }
1908 
1909 
1910  relative = decompose_transform(model.matrix);
1911 
1912  // Get the absolute transform data for the previous frame
1913  for (int i = 0; i < RingbufCount; i++) {
1914  av_fifo_peek(
1915  deshake_ctx->abs_motion.ringbuffers[i],
1916  &prev_vals[i], 1,
1917  av_fifo_can_read(deshake_ctx->abs_motion.ringbuffers[i]) - 1);
1918  }
1919 
1920  new_vals[RingbufX] = prev_vals[RingbufX] + relative.translation.s[0];
1921  new_vals[RingbufY] = prev_vals[RingbufY] + relative.translation.s[1];
1922  new_vals[RingbufRot] = prev_vals[RingbufRot] + relative.rotation;
1923  new_vals[RingbufScaleX] = prev_vals[RingbufScaleX] / relative.scale.s[0];
1924  new_vals[RingbufScaleY] = prev_vals[RingbufScaleY] / relative.scale.s[1];
1925 
1926  if (deshake_ctx->debug_on) {
1927  if (!deshake_ctx->is_yuv) {
1928  deshake_ctx->grayscale_time += ff_opencl_get_event_time(grayscale_event);
1929  }
1930  deshake_ctx->harris_response_time += ff_opencl_get_event_time(harris_response_event);
1931  deshake_ctx->refine_features_time += ff_opencl_get_event_time(refine_features_event);
1932  deshake_ctx->brief_descriptors_time += ff_opencl_get_event_time(brief_event);
1933  deshake_ctx->match_descriptors_time += ff_opencl_get_event_time(match_descriptors_event);
1934  deshake_ctx->read_buf_time += ff_opencl_get_event_time(read_buf_event);
1935  }
1936 
1937  goto end;
1938 
1939 no_motion_data:
1940  new_vals[RingbufX] = 0.0f;
1941  new_vals[RingbufY] = 0.0f;
1942  new_vals[RingbufRot] = 0.0f;
1943  new_vals[RingbufScaleX] = 1.0f;
1944  new_vals[RingbufScaleY] = 1.0f;
1945 
1946  for (int i = 0; i < num_vectors; i++) {
1947  deshake_ctx->matches_contig_host[i].should_consider = 0;
1948  }
1949  debug_matches.num_model_matches = 0;
1950 
1951  if (deshake_ctx->debug_on) {
1952  av_log(avctx, AV_LOG_VERBOSE,
1953  "\n[ALERT] No motion data found in queue_frame, motion reset to 0\n\n"
1954  );
1955  }
1956 
1957  goto end;
1958 
1959 end:
1960  // Swap the descriptor buffers (we don't need the previous frame's descriptors
1961  // again so we will use that space for the next frame's descriptors)
1962  temp = deshake_ctx->prev_descriptors;
1963  deshake_ctx->prev_descriptors = deshake_ctx->descriptors;
1964  deshake_ctx->descriptors = temp;
1965 
1966  // Same for the refined features
1967  temp = deshake_ctx->prev_refined_features;
1968  deshake_ctx->prev_refined_features = deshake_ctx->refined_features;
1969  deshake_ctx->refined_features = temp;
1970 
1971  if (deshake_ctx->debug_on) {
1972  if (num_vectors == 0) {
1973  debug_matches.matches = NULL;
1974  } else {
1975  debug_matches.matches = av_malloc_array(num_vectors, sizeof(MotionVector));
1976 
1977  if (!debug_matches.matches) {
1978  err = AVERROR(ENOMEM);
1979  goto fail;
1980  }
1981  }
1982 
1983  for (int i = 0; i < num_vectors; i++) {
1984  debug_matches.matches[i] = deshake_ctx->matches_contig_host[i];
1985  }
1986  debug_matches.num_matches = num_vectors;
1987 
1988  av_fifo_write(
1989  deshake_ctx->abs_motion.debug_matches,
1990  &debug_matches, 1);
1991  }
1992 
1993  for (int i = 0; i < RingbufCount; i++) {
1994  av_fifo_write(deshake_ctx->abs_motion.ringbuffers[i], &new_vals[i], 1);
1995  }
1996 
1997  return ff_framequeue_add(&deshake_ctx->fq, input_frame);
1998 
1999 fail:
2000  clFinish(deshake_ctx->command_queue);
2001  av_frame_free(&input_frame);
2002  return err;
2003 }
2004 
2006 {
2007  AVFilterLink *inlink = ctx->inputs[0];
2008  AVFilterLink *outlink = ctx->outputs[0];
2009  DeshakeOpenCLContext *deshake_ctx = ctx->priv;
2010  AVFrame *frame = NULL;
2011  int ret, status;
2012  int64_t pts;
2013 
2015 
2016  if (!deshake_ctx->eof) {
2018  if (ret < 0)
2019  return ret;
2020  if (ret > 0) {
2021  if (!frame->hw_frames_ctx)
2022  return AVERROR(EINVAL);
2023 
2024  if (!deshake_ctx->initialized) {
2026  if (ret < 0)
2027  return ret;
2028  }
2029 
2030  // If there is no more space in the ringbuffers, remove the oldest
2031  // values to make room for the new ones
2032  if (!av_fifo_can_write(deshake_ctx->abs_motion.ringbuffers[RingbufX])) {
2033  for (int i = 0; i < RingbufCount; i++) {
2034  av_fifo_drain2(deshake_ctx->abs_motion.ringbuffers[i], 1);
2035  }
2036  }
2038  if (ret < 0)
2039  return ret;
2040  if (ret >= 0) {
2041  // See if we have enough buffered frames to process one
2042  //
2043  // "enough" is half the smooth window of queued frames into the future
2044  if (ff_framequeue_queued_frames(&deshake_ctx->fq) >= deshake_ctx->smooth_window / 2) {
2045  return filter_frame(inlink, ff_framequeue_take(&deshake_ctx->fq));
2046  }
2047  }
2048  }
2049  }
2050 
2051  if (!deshake_ctx->eof && ff_inlink_acknowledge_status(inlink, &status, &pts)) {
2052  if (status == AVERROR_EOF) {
2053  deshake_ctx->eof = 1;
2054  }
2055  }
2056 
2057  if (deshake_ctx->eof) {
2058  // Finish processing the rest of the frames in the queue.
2059  while(ff_framequeue_queued_frames(&deshake_ctx->fq) != 0) {
2060  for (int i = 0; i < RingbufCount; i++) {
2061  av_fifo_drain2(deshake_ctx->abs_motion.ringbuffers[i], 1);
2062  }
2063 
2064  ret = filter_frame(inlink, ff_framequeue_take(&deshake_ctx->fq));
2065  if (ret < 0) {
2066  return ret;
2067  }
2068  }
2069 
2070  if (deshake_ctx->debug_on) {
2072  "Average kernel execution times:\n"
2073  "\t grayscale: %0.3f ms\n"
2074  "\t harris_response: %0.3f ms\n"
2075  "\t refine_features: %0.3f ms\n"
2076  "\tbrief_descriptors: %0.3f ms\n"
2077  "\tmatch_descriptors: %0.3f ms\n"
2078  "\t transform: %0.3f ms\n"
2079  "\t crop_upscale: %0.3f ms\n"
2080  "Average buffer read times:\n"
2081  "\t features buf: %0.3f ms\n",
2082  averaged_event_time_ms(deshake_ctx->grayscale_time, deshake_ctx->curr_frame),
2083  averaged_event_time_ms(deshake_ctx->harris_response_time, deshake_ctx->curr_frame),
2084  averaged_event_time_ms(deshake_ctx->refine_features_time, deshake_ctx->curr_frame),
2085  averaged_event_time_ms(deshake_ctx->brief_descriptors_time, deshake_ctx->curr_frame),
2086  averaged_event_time_ms(deshake_ctx->match_descriptors_time, deshake_ctx->curr_frame),
2087  averaged_event_time_ms(deshake_ctx->transform_time, deshake_ctx->curr_frame),
2088  averaged_event_time_ms(deshake_ctx->crop_upscale_time, deshake_ctx->curr_frame),
2089  averaged_event_time_ms(deshake_ctx->read_buf_time, deshake_ctx->curr_frame)
2090  );
2091  }
2092 
2093  ff_outlink_set_status(outlink, AVERROR_EOF, deshake_ctx->duration);
2094  return 0;
2095  }
2096 
2097  if (!deshake_ctx->eof) {
2098  FF_FILTER_FORWARD_WANTED(outlink, inlink);
2099  }
2100 
2101  return FFERROR_NOT_READY;
2102 }
2103 
2105  {
2106  .name = "default",
2107  .type = AVMEDIA_TYPE_VIDEO,
2108  .config_props = &ff_opencl_filter_config_input,
2109  },
2110 };
2111 
2113  {
2114  .name = "default",
2115  .type = AVMEDIA_TYPE_VIDEO,
2116  .config_props = &ff_opencl_filter_config_output,
2117  },
2118 };
2119 
2120 #define OFFSET(x) offsetof(DeshakeOpenCLContext, x)
2121 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
2122 
2124  {
2125  "tripod", "simulates a tripod by preventing any camera movement whatsoever "
2126  "from the original frame",
2127  OFFSET(tripod_mode), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, FLAGS
2128  },
2129  {
2130  "debug", "turn on additional debugging information",
2131  OFFSET(debug_on), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, FLAGS
2132  },
2133  {
2134  "adaptive_crop", "attempt to subtly crop borders to reduce mirrored content",
2135  OFFSET(should_crop), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, FLAGS
2136  },
2137  {
2138  "refine_features", "refine feature point locations at a sub-pixel level",
2139  OFFSET(refine_features), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, FLAGS
2140  },
2141  {
2142  "smooth_strength", "smoothing strength (0 attempts to adaptively determine optimal strength)",
2143  OFFSET(smooth_percent), AV_OPT_TYPE_FLOAT, {.dbl = 0.0f}, 0.0f, 1.0f, FLAGS
2144  },
2145  {
2146  "smooth_window_multiplier", "multiplier for number of frames to buffer for motion data",
2147  OFFSET(smooth_window_multiplier), AV_OPT_TYPE_FLOAT, {.dbl = 2.0}, 0.1, 10.0, FLAGS
2148  },
2149  { NULL }
2150 };
2151 
2152 AVFILTER_DEFINE_CLASS(deshake_opencl);
2153 
2155  .name = "deshake_opencl",
2156  .description = NULL_IF_CONFIG_SMALL("Feature-point based video stabilization filter"),
2157  .priv_size = sizeof(DeshakeOpenCLContext),
2158  .priv_class = &deshake_opencl_class,
2161  .activate = activate,
2165  .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
2166  .flags = AVFILTER_FLAG_HWDEVICE,
2167 };
deshake_opencl_inputs
static const AVFilterPad deshake_opencl_inputs[]
Definition: vf_deshake_opencl.c:2104
AVFILTER_DEFINE_CLASS
AVFILTER_DEFINE_CLASS(deshake_opencl)
MATCHES_CONTIG_SIZE
#define MATCHES_CONTIG_SIZE
Definition: vf_deshake_opencl.c:91
ff_get_video_buffer
AVFrame * ff_get_video_buffer(AVFilterLink *link, int w, int h)
Request a picture buffer with a specific set of permissions.
Definition: video.c:116
CL_RUN_KERNEL_WITH_ARGS
#define CL_RUN_KERNEL_WITH_ARGS(queue, kernel, global_work_size, local_work_size, event,...)
Uses the above macro to enqueue the given kernel and then additionally runs it to completion via clFi...
Definition: opencl.h:180
av_fifo_drain2
void av_fifo_drain2(AVFifo *f, size_t size)
Discard the specified amount of data from an AVFifo.
Definition: fifo.c:266
F2
#define F2(l, r, i)
Definition: cast5.c:44
AVPixelFormat
AVPixelFormat
Pixel format.
Definition: pixfmt.h:71
av_fifo_can_write
size_t av_fifo_can_write(const AVFifo *f)
Definition: fifo.c:94
free_debug_matches
static void free_debug_matches(AbsoluteFrameMotion *afm)
Definition: vf_deshake_opencl.c:150
FrameDelta::translation
cl_float2 translation
Definition: vf_deshake_opencl.c:165
av_clip
#define av_clip
Definition: common.h:100
IterIndices
Definition: vf_deshake_opencl.c:185
r
const char * r
Definition: vf_curves.c:127
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
opt.h
DeshakeOpenCLContext::brief_pattern
cl_mem brief_pattern
Definition: vf_deshake_opencl.c:255
deshake_opencl_init
static int deshake_opencl_init(AVFilterContext *avctx)
Definition: vf_deshake_opencl.c:1109
estimate_affine_2d
static int estimate_affine_2d(DeshakeOpenCLContext *deshake_ctx, MotionVector *point_pairs, DebugMatches *debug_matches, const int num_point_pairs, double *model_out, const double threshold, const int max_iters, const double confidence)
Definition: vf_deshake_opencl.c:517
minimize_error
static int minimize_error(DeshakeOpenCLContext *deshake_ctx, MotionVector *inliers, DebugMatches *debug_matches, const int num_inliers, double *model_out, const int max_iters)
Definition: vf_deshake_opencl.c:673
OFFSET
#define OFFSET(x)
Definition: vf_deshake_opencl.c:2120
av_lfg_init
av_cold void av_lfg_init(AVLFG *c, unsigned int seed)
Definition: lfg.c:32
AV_PIX_FMT_GBRP16BE
@ AV_PIX_FMT_GBRP16BE
planar GBR 4:4:4 48bpp, big-endian
Definition: pixfmt.h:171
DeshakeOpenCLContext::refine_features_time
unsigned long long refine_features_time
Definition: vf_deshake_opencl.c:292
ff_filter_frame
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1062
AbsoluteFrameMotion
Definition: vf_deshake_opencl.c:133
AVFrame::duration
int64_t duration
Duration of the frame, in the same units as pts.
Definition: frame.h:795
AV_PIX_FMT_GBRP10BE
@ AV_PIX_FMT_GBRP10BE
planar GBR 4:4:4 30bpp, big-endian
Definition: pixfmt.h:169
av_pix_fmt_desc_get
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:3170
AVERROR_EOF
#define AVERROR_EOF
End of file.
Definition: error.h:57
AVBufferRef::data
uint8_t * data
The data buffer.
Definition: buffer.h:90
FFERROR_NOT_READY
return FFERROR_NOT_READY
Definition: filter_design.txt:204
DeshakeOpenCLContext::kernel_draw_debug_info
cl_kernel kernel_draw_debug_info
Definition: vf_deshake_opencl.c:285
PointPair::p2
cl_float2 p2
Definition: vf_deshake_opencl.c:99
matrix
Definition: vc1dsp.c:43
DeshakeOpenCLContext::transform_uv
cl_mem transform_uv
Definition: vf_deshake_opencl.c:266
int64_t
long long int64_t
Definition: coverity.c:34
inlink
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
Definition: filter_design.txt:212
AV_VIDEO_MAX_PLANES
#define AV_VIDEO_MAX_PLANES
Maximum number of planes in any pixel format.
Definition: pixfmt.h:40
normalize.log
log
Definition: normalize.py:21
av_frame_free
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:162
DeshakeOpenCLContext::should_crop
int should_crop
Definition: vf_deshake_opencl.c:272
av_fifo_peek
int av_fifo_peek(const AVFifo *f, void *buf, size_t nb_elems, size_t offset)
Read data from a FIFO without modifying FIFO state.
Definition: fifo.c:255
FILTER_INPUTS
#define FILTER_INPUTS(array)
Definition: filters.h:262
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:389
pixdesc.h
AVFrame::pts
int64_t pts
Presentation timestamp in time_base units (time when frame should be shown to user).
Definition: frame.h:501
AVFrame::width
int width
Definition: frame.h:461
ff_source_deshake_cl
const char * ff_source_deshake_cl
DeshakeOpenCLContext::prev_descriptors
cl_mem prev_descriptors
Definition: vf_deshake_opencl.c:259
opencl.h
AVOption
AVOption.
Definition: opt.h:429
b
#define b
Definition: input.c:41
RingbufX
@ RingbufX
Definition: vf_deshake_opencl.c:110
RingbufRot
@ RingbufRot
Definition: vf_deshake_opencl.c:112
DeshakeOpenCLContext::abs_motion
AbsoluteFrameMotion abs_motion
Definition: vf_deshake_opencl.c:205
expf
#define expf(x)
Definition: libm.h:283
high
int high
Definition: dovi_rpuenc.c:38
AbsoluteFrameMotion::curr_frame_offset
int curr_frame_offset
Definition: vf_deshake_opencl.c:139
AV_LOG_VERBOSE
#define AV_LOG_VERBOSE
Detailed information.
Definition: log.h:225
float.h
ff_opencl_filter_load_program
int ff_opencl_filter_load_program(AVFilterContext *avctx, const char **program_source_array, int nb_strings)
Load a new OpenCL program from strings in memory.
Definition: opencl.c:159
update_needed_crop
static void update_needed_crop(CropInfo *crop, float *transform, float frame_width, float frame_height)
Definition: vf_deshake_opencl.c:989
ff_framequeue_init
void ff_framequeue_init(FFFrameQueue *fq, FFFrameQueueGlobal *fqg)
Init a frame queue and attach it to a global structure.
Definition: framequeue.c:48
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
AVFilter::name
const char * name
Filter name.
Definition: avfilter.h:205
points_not_collinear
static int points_not_collinear(const cl_float2 **points)
Definition: vf_deshake_opencl.c:349
video.h
FF_FILTER_FORWARD_STATUS_BACK
#define FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink)
Forward the status on an output link to an input link.
Definition: filters.h:434
CL_CREATE_KERNEL
#define CL_CREATE_KERNEL(ctx, kernel_name)
Create a kernel with the given name.
Definition: opencl.h:93
CL_CREATE_BUFFER
#define CL_CREATE_BUFFER(ctx, buffer_name, size)
Create a buffer with the given information.
Definition: opencl.h:237
AVFrame::data
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:410
BRIEF_PATCH_SIZE_HALF
#define BRIEF_PATCH_SIZE_HALF
Definition: vf_deshake_opencl.c:89
DeshakeOpenCLContext::harris_buf
cl_mem harris_buf
Definition: vf_deshake_opencl.c:247
DeshakeOpenCLContext::smooth_window_multiplier
float smooth_window_multiplier
Definition: vf_deshake_opencl.c:281
AV_PIX_FMT_GBRAP12LE
@ AV_PIX_FMT_GBRAP12LE
planar GBR 4:4:4:4 48bpp, little-endian
Definition: pixfmt.h:311
ff_inlink_consume_frame
int ff_inlink_consume_frame(AVFilterLink *link, AVFrame **rframe)
Take a frame from the link's FIFO and update the link's stats.
Definition: avfilter.c:1491
ff_opencl_filter_work_size_from_image
int ff_opencl_filter_work_size_from_image(AVFilterContext *avctx, size_t *work_size, AVFrame *frame, int plane, int block_alignment)
Find the work size needed needed for a given plane of an image.
Definition: opencl.c:266
fifo.h
DeshakeOpenCLContext::debug_matches
cl_mem debug_matches
Definition: vf_deshake_opencl.c:286
AbsoluteFrameMotion::data_end_offset
int data_end_offset
Definition: vf_deshake_opencl.c:143
AV_PIX_FMT_GBRAP
@ AV_PIX_FMT_GBRAP
planar GBRA 4:4:4:4 32bpp
Definition: pixfmt.h:212
AVFilterContext::priv
void * priv
private data for use by the filter
Definition: avfilter.h:472
IterIndices::start
int start
Definition: vf_deshake_opencl.c:186
fail
#define fail()
Definition: checkasm.h:189
av_fifo_write
int av_fifo_write(AVFifo *f, const void *buf, size_t nb_elems)
Write data into a FIFO.
Definition: fifo.c:188
FFSIGN
#define FFSIGN(a)
Definition: common.h:75
DeshakeOpenCLContext::curr_frame
int curr_frame
Definition: vf_deshake_opencl.c:211
DeshakeOpenCLContext::kernel_transform
cl_kernel kernel_transform
Definition: vf_deshake_opencl.c:241
AbsoluteFrameMotion::ringbuffers
AVFifo * ringbuffers[RingbufCount]
Definition: vf_deshake_opencl.c:135
RingbufScaleY
@ RingbufScaleY
Definition: vf_deshake_opencl.c:114
IterIndices::end
int end
Definition: vf_deshake_opencl.c:187
val
static double val(void *priv, double ch)
Definition: aeval.c:77
deshake_opencl_outputs
static const AVFilterPad deshake_opencl_outputs[]
Definition: vf_deshake_opencl.c:2112
pts
static int64_t pts
Definition: transcode_aac.c:644
fabsf
static __device__ float fabsf(float a)
Definition: cuda_runtime.h:181
DeshakeOpenCLContext::kernel_crop_upscale
cl_kernel kernel_crop_upscale
Definition: vf_deshake_opencl.c:242
DeshakeOpenCLContext::initialized
int initialized
Definition: vf_deshake_opencl.c:193
DeshakeOpenCLContext::smooth_window
int smooth_window
Definition: vf_deshake_opencl.c:209
FrameDelta::skew
cl_float2 skew
Definition: vf_deshake_opencl.c:168
RingbufScaleX
@ RingbufScaleX
Definition: vf_deshake_opencl.c:113
ff_opencl_filter_config_output
int ff_opencl_filter_config_output(AVFilterLink *outlink)
Create a suitable hardware frames context for the output.
Definition: opencl.c:83
AVFilterPad
A filter pad used for either input or output.
Definition: filters.h:38
DeshakeOpenCLContext::eof
int eof
Definition: vf_deshake_opencl.c:197
avassert.h
DeshakeOpenCLContext::read_buf_time
unsigned long long read_buf_time
Definition: vf_deshake_opencl.c:299
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:209
av_cold
#define av_cold
Definition: attributes.h:90
av_fifo_read
int av_fifo_read(AVFifo *f, void *buf, size_t nb_elems)
Read data from a FIFO.
Definition: fifo.c:240
DeshakeOpenCLContext::debug_model_matches
cl_mem debug_model_matches
Definition: vf_deshake_opencl.c:287
DebugMatches::num_model_matches
int num_model_matches
Definition: vf_deshake_opencl.c:128
duration
int64_t duration
Definition: movenc.c:65
float
float
Definition: af_crystalizer.c:122
AV_PIX_FMT_GBRAP16BE
@ AV_PIX_FMT_GBRAP16BE
planar GBRA 4:4:4:4 64bpp, big-endian
Definition: pixfmt.h:213
ff_outlink_set_status
static void ff_outlink_set_status(AVFilterLink *link, int status, int64_t pts)
Set the status field of a link from the source filter.
Definition: filters.h:424
s
#define s(width, name)
Definition: cbs_vp9.c:198
AV_PIX_FMT_GBRP16LE
@ AV_PIX_FMT_GBRP16LE
planar GBR 4:4:4 48bpp, little-endian
Definition: pixfmt.h:172
RingbufY
@ RingbufY
Definition: vf_deshake_opencl.c:111
av_lfg_get
static unsigned int av_lfg_get(AVLFG *c)
Get the next random unsigned 32-bit number using an ALFG.
Definition: lfg.h:53
AV_CEIL_RSHIFT
#define AV_CEIL_RSHIFT(a, b)
Definition: common.h:60
DeshakeOpenCLContext::smooth_percent
float smooth_percent
Definition: vf_deshake_opencl.c:278
FFFrameQueueGlobal
Structure to hold global options and statistics for frame queues.
Definition: framequeue.h:46
mi
#define mi
Definition: vf_colormatrix.c:106
DeshakeOpenCLContext::grayscale_time
unsigned long long grayscale_time
Definition: vf_deshake_opencl.c:290
DeshakeOpenCLContext::kernel_brief_descriptors
cl_kernel kernel_brief_descriptors
Definition: vf_deshake_opencl.c:239
transform
static const int8_t transform[32][32]
Definition: dsp.c:27
av_q2d
static double av_q2d(AVRational a)
Convert an AVRational to a double.
Definition: rational.h:104
lfg.h
deshake_opencl_uninit
static av_cold void deshake_opencl_uninit(AVFilterContext *avctx)
Definition: vf_deshake_opencl.c:1050
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:40
filters.h
DeshakeOpenCLContext::kernel_match_descriptors
cl_kernel kernel_match_descriptors
Definition: vf_deshake_opencl.c:240
ctx
AVFormatContext * ctx
Definition: movenc.c:49
av_rescale_q
int64_t av_rescale_q(int64_t a, AVRational bq, AVRational cq)
Rescale a 64-bit integer by 2 rational numbers.
Definition: mathematics.c:142
AV_PIX_FMT_GBRP10LE
@ AV_PIX_FMT_GBRP10LE
planar GBR 4:4:4 30bpp, little-endian
Definition: pixfmt.h:170
FILTER_OUTPUTS
#define FILTER_OUTPUTS(array)
Definition: filters.h:263
link
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a link
Definition: filter_design.txt:23
CL_RELEASE_KERNEL
#define CL_RELEASE_KERNEL(k)
release an OpenCL Kernel
Definition: opencl.h:101
ff_framequeue_take
AVFrame * ff_framequeue_take(FFFrameQueue *fq)
Take the first frame in the queue.
Definition: framequeue.c:98
RingbufferIndices
RingbufferIndices
Definition: vf_deshake_opencl.c:109
rand_in
static int rand_in(int low, int high, AVLFG *alfg)
Definition: vf_deshake_opencl.c:303
averaged_event_time_ms
static double averaged_event_time_ms(unsigned long long total_time, int num_frames)
Definition: vf_deshake_opencl.c:309
AbsoluteFrameMotion::debug_matches
AVFifo * debug_matches
Definition: vf_deshake_opencl.c:145
filter_frame
static int filter_frame(AVFilterLink *link, AVFrame *input_frame)
Definition: vf_deshake_opencl.c:1368
result
and forward the result(frame or status change) to the corresponding input. If nothing is possible
AV_PIX_FMT_GBRAP12BE
@ AV_PIX_FMT_GBRAP12BE
planar GBR 4:4:4:4 48bpp, big-endian
Definition: pixfmt.h:310
fabs
static __device__ float fabs(float a)
Definition: cuda_runtime.h:182
NULL
#define NULL
Definition: coverity.c:32
DeshakeOpenCLContext::alfg
AVLFG alfg
Definition: vf_deshake_opencl.c:200
av_frame_copy_props
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
Definition: frame.c:725
transform_center_scale
static void transform_center_scale(float x_shift, float y_shift, float angle, float scale_x, float scale_y, float center_w, float center_h, float *matrix)
Definition: vf_deshake_opencl.c:951
ff_framequeue_add
int ff_framequeue_add(FFFrameQueue *fq, AVFrame *frame)
Add a frame.
Definition: framequeue.c:64
ff_framequeue_free
void ff_framequeue_free(FFFrameQueue *fq)
Free the queue and all queued frames.
Definition: framequeue.c:54
DeshakeOpenCLContext::ransac_err
float * ransac_err
Definition: vf_deshake_opencl.c:217
DeshakeOpenCLContext::kernel_harris_response
cl_kernel kernel_harris_response
Definition: vf_deshake_opencl.c:237
framequeue.h
AVFilterContext::inputs
AVFilterLink ** inputs
array of pointers to input links
Definition: avfilter.h:465
start_end_for
static IterIndices start_end_for(DeshakeOpenCLContext *deshake_ctx, int length)
Definition: vf_deshake_opencl.c:833
av_fifo_can_read
size_t av_fifo_can_read(const AVFifo *f)
Definition: fifo.c:87
double
double
Definition: af_crystalizer.c:132
AV_PIX_FMT_OPENCL
@ AV_PIX_FMT_OPENCL
Hardware surfaces for OpenCL.
Definition: pixfmt.h:358
DeshakeOpenCLContext::matches_contig
cl_mem matches_contig
Definition: vf_deshake_opencl.c:262
DebugMatches::model_matches
MotionVector model_matches[3]
Definition: vf_deshake_opencl.c:124
DeshakeOpenCLContext::duration
int64_t duration
Definition: vf_deshake_opencl.c:196
DeshakeOpenCLContext
Definition: vf_deshake_opencl.c:190
ff_inlink_acknowledge_status
int ff_inlink_acknowledge_status(AVFilterLink *link, int *rstatus, int64_t *rpts)
Test and acknowledge the change of status on the link.
Definition: avfilter.c:1438
DeshakeOpenCLContext::crop_uv
CropInfo crop_uv
Definition: vf_deshake_opencl.c:222
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
transformed_point
static cl_float2 transformed_point(float x, float y, float *transform)
Definition: vf_deshake_opencl.c:941
DeshakeOpenCLContext::harris_response_time
unsigned long long harris_response_time
Definition: vf_deshake_opencl.c:291
make_gauss_kernel
static void make_gauss_kernel(float *gauss_kernel, float length, float sigma)
Definition: vf_deshake_opencl.c:809
ff_filter_link
static FilterLink * ff_filter_link(AVFilterLink *link)
Definition: filters.h:197
FF_FILTER_FLAG_HWFRAME_AWARE
#define FF_FILTER_FLAG_HWFRAME_AWARE
The filter is aware of hardware frames, and any hardware frame context should not be automatically pr...
Definition: filters.h:206
FFFrameQueue
Queue of AVFrame pointers.
Definition: framequeue.h:53
DeshakeOpenCLContext::crop_upscale_time
unsigned long long crop_upscale_time
Definition: vf_deshake_opencl.c:296
AVLFG
Context structure for the Lagged Fibonacci PRNG.
Definition: lfg.h:33
f
f
Definition: af_crystalizer.c:122
F3
#define F3(l, r, i)
Definition: cast5.c:37
init
int(* init)(AVBSFContext *ctx)
Definition: dts2pts.c:368
AVFifo
Definition: fifo.c:35
DeshakeOpenCLContext::matches_contig_host
MotionVector * matches_contig_host
Definition: vf_deshake_opencl.c:231
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:94
powf
#define powf(x, y)
Definition: libm.h:50
CL_RELEASE_MEMORY
#define CL_RELEASE_MEMORY(m)
release an OpenCL Memory Object
Definition: opencl.h:114
AV_PIX_FMT_FLAG_RGB
#define AV_PIX_FMT_FLAG_RGB
The pixel format contains RGB-like data (as opposed to YUV/grayscale).
Definition: pixdesc.h:136
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:83
AVFILTER_FLAG_HWDEVICE
#define AVFILTER_FLAG_HWDEVICE
The filter can create hardware frames using AVFilterContext.hw_device_ctx.
Definition: avfilter.h:173
hypot
static av_const double hypot(double x, double y)
Definition: libm.h:366
CropInfo::bottom_right
cl_float2 bottom_right
Definition: vf_deshake_opencl.c:180
AV_PIX_FMT_GBRP9BE
@ AV_PIX_FMT_GBRP9BE
planar GBR 4:4:4 27bpp, big-endian
Definition: pixfmt.h:167
get_subset
static int get_subset(AVLFG *alfg, const MotionVector *point_pairs, const int num_point_pairs, MotionVector *pairs_subset, int max_attempts)
Definition: vf_deshake_opencl.c:393
ROUNDED_UP_DIV
#define ROUNDED_UP_DIV(a, b)
Definition: vf_deshake_opencl.c:93
AV_PIX_FMT_GBRP9LE
@ AV_PIX_FMT_GBRP9LE
planar GBR 4:4:4 27bpp, little-endian
Definition: pixfmt.h:168
FLAGS
#define FLAGS
Definition: vf_deshake_opencl.c:2121
MotionVector::should_consider
cl_int should_consider
Definition: vf_deshake_opencl.c:105
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
AV_PIX_FMT_GBRAP10LE
@ AV_PIX_FMT_GBRAP10LE
planar GBR 4:4:4:4 40bpp, little-endian
Definition: pixfmt.h:314
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
FF_FILTER_FORWARD_WANTED
FF_FILTER_FORWARD_WANTED(outlink, inlink)
MotionVector
Definition: agm.c:39
opencl_source.h
M_PI
#define M_PI
Definition: mathematics.h:67
ff_opencl_filter_config_input
int ff_opencl_filter_config_input(AVFilterLink *inlink)
Check that the input link contains a suitable hardware frames context and extract the device from it.
Definition: opencl.c:46
AV_OPT_TYPE_FLOAT
@ AV_OPT_TYPE_FLOAT
Underlying C type is float.
Definition: opt.h:271
DeshakeOpenCLContext::inliers
MotionVector * inliers
Definition: vf_deshake_opencl.c:233
FrameDelta::rotation
float rotation
Definition: vf_deshake_opencl.c:166
uninit
static void uninit(AVBSFContext *ctx)
Definition: pcm_rechunk.c:68
ff_get_matrix
void ff_get_matrix(float x_shift, float y_shift, float angle, float scale_x, float scale_y, float *matrix)
Get an affine transformation matrix from given translation, rotation, and zoom factors.
Definition: transform.c:106
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
queue_frame
static int queue_frame(AVFilterLink *link, AVFrame *input_frame)
Definition: vf_deshake_opencl.c:1726
round
static av_always_inline av_const double round(double x)
Definition: libm.h:444
DeshakeOpenCLContext::transform_y
cl_mem transform_y
Definition: vf_deshake_opencl.c:264
FFMIN3
#define FFMIN3(a, b, c)
Definition: macros.h:50
transform_debug
static void transform_debug(AVFilterContext *avctx, float *new_vals, float *old_vals, int curr_frame)
Definition: vf_deshake_opencl.c:1340
av_malloc_array
#define av_malloc_array(a, b)
Definition: tableprint_vlc.h:31
common.h
make_vectors_contig
static int make_vectors_contig(DeshakeOpenCLContext *deshake_ctx, int size_y, int size_x)
Definition: vf_deshake_opencl.c:777
delta
float delta
Definition: vorbis_enc_data.h:430
activate
static int activate(AVFilterContext *ctx)
Definition: vf_deshake_opencl.c:2005
DeshakeOpenCLContext::is_yuv
int is_yuv
Definition: vf_deshake_opencl.c:225
DeshakeOpenCLContext::fq
FFFrameQueue fq
Definition: vf_deshake_opencl.c:203
av_inv_q
static av_always_inline AVRational av_inv_q(AVRational q)
Invert a rational.
Definition: rational.h:159
DeshakeOpenCLContext::grayscale
cl_mem grayscale
Definition: vf_deshake_opencl.c:245
AVFilterPad::name
const char * name
Pad name.
Definition: filters.h:44
DeshakeOpenCLContext::transform_time
unsigned long long transform_time
Definition: vf_deshake_opencl.c:295
RingbufCount
@ RingbufCount
Definition: vf_deshake_opencl.c:117
AVFilter
Filter definition.
Definition: avfilter.h:201
AVHWFramesContext
This struct describes a set or pool of "hardware" frames (i.e.
Definition: hwcontext.h:115
DeshakeOpenCLContext::ocf
OpenCLFilterContext ocf
Definition: vf_deshake_opencl.c:191
AV_PIX_FMT_GBRAP16LE
@ AV_PIX_FMT_GBRAP16LE
planar GBRA 4:4:4:4 64bpp, little-endian
Definition: pixfmt.h:214
ff_opencl_filter_init
int ff_opencl_filter_init(AVFilterContext *avctx)
Initialise an OpenCL filter context.
Definition: opencl.c:135
CL_BLOCKING_WRITE_BUFFER
#define CL_BLOCKING_WRITE_BUFFER(queue, buffer, size, host_ptr, event)
Perform a blocking write to a buffer.
Definition: opencl.h:214
ret
ret
Definition: filter_design.txt:187
FrameDelta
Definition: vf_deshake_opencl.c:164
DeshakeOpenCLContext::matches_host
MotionVector * matches_host
Definition: vf_deshake_opencl.c:230
compute_error
static void compute_error(const MotionVector *point_pairs, const int num_point_pairs, const double *model, float *err)
Definition: vf_deshake_opencl.c:435
ransac_update_num_iters
static int ransac_update_num_iters(double confidence, double num_outliers, int max_iters)
Definition: vf_deshake_opencl.c:495
pixfmt.h
frame
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return or at least make progress towards producing a frame
Definition: filter_design.txt:264
DeshakeOpenCLContext::prev_refined_features
cl_mem prev_refined_features
Definition: vf_deshake_opencl.c:252
FrameDelta::scale
cl_float2 scale
Definition: vf_deshake_opencl.c:167
av_fifo_alloc2
AVFifo * av_fifo_alloc2(size_t nb_elems, size_t elem_size, unsigned int flags)
Allocate and initialize an AVFifo with a given element size.
Definition: fifo.c:47
AbsoluteFrameMotion::data_start_offset
int data_start_offset
Definition: vf_deshake_opencl.c:142
DeshakeOpenCLContext::refined_features
cl_mem refined_features
Definition: vf_deshake_opencl.c:250
transform.h
check_subset
static int check_subset(const MotionVector *pairs_subset)
Definition: vf_deshake_opencl.c:375
ff_framequeue_queued_frames
static size_t ff_framequeue_queued_frames(const FFFrameQueue *fq)
Get the number of queued frames.
Definition: framequeue.h:146
run_estimate_kernel
static void run_estimate_kernel(const MotionVector *point_pairs, double *model)
Definition: vf_deshake_opencl.c:319
CropInfo
Definition: vf_deshake_opencl.c:176
AVFrame::height
int height
Definition: frame.h:461
status
ov_status_e status
Definition: dnn_backend_openvino.c:100
DebugMatches::matches
MotionVector * matches
Definition: vf_deshake_opencl.c:122
DeshakeOpenCLContext::tripod_mode
int tripod_mode
Definition: vf_deshake_opencl.c:270
DeshakeOpenCLContext::match_descriptors_time
unsigned long long match_descriptors_time
Definition: vf_deshake_opencl.c:294
ff_framequeue_global_init
void ff_framequeue_global_init(FFFrameQueueGlobal *fqg)
Init a global structure.
Definition: framequeue.c:31
avfilter.h
gaussian_for
static float gaussian_for(int x, float sigma)
Definition: vf_deshake_opencl.c:803
values
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return values
Definition: filter_design.txt:263
temp
else temp
Definition: vf_mcdeint.c:263
OpenCLFilterContext
Definition: opencl.h:36
ff_opencl_filter_uninit
void ff_opencl_filter_uninit(AVFilterContext *avctx)
Uninitialise an OpenCL filter context.
Definition: opencl.c:144
DeshakeOpenCLContext::debug_on
int debug_on
Definition: vf_deshake_opencl.c:271
DebugMatches::num_matches
int num_matches
Definition: vf_deshake_opencl.c:126
CL_RELEASE_QUEUE
#define CL_RELEASE_QUEUE(q)
release an OpenCL Command Queue
Definition: opencl.h:127
DeshakeOpenCLContext::kernel_refine_features
cl_kernel kernel_refine_features
Definition: vf_deshake_opencl.c:238
transforms
static const struct @144 transforms[18]
AVFilterContext
An instance of a filter.
Definition: avfilter.h:457
AV_PIX_FMT_GBRP
@ AV_PIX_FMT_GBRP
planar GBR 4:4:4 24bpp
Definition: pixfmt.h:165
desc
const char * desc
Definition: libsvtav1.c:79
find_inliers
static int find_inliers(MotionVector *point_pairs, const int num_point_pairs, const double *model, float *err, double thresh)
Definition: vf_deshake_opencl.c:459
AVMEDIA_TYPE_VIDEO
@ AVMEDIA_TYPE_VIDEO
Definition: avutil.h:201
deshake_opencl_options
static const AVOption deshake_opencl_options[]
Definition: vf_deshake_opencl.c:2123
mem.h
PointPair::p1
cl_float2 p1
Definition: vf_deshake_opencl.c:97
AVPixFmtDescriptor
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:69
ff_opencl_get_event_time
cl_ulong ff_opencl_get_event_time(cl_event event)
Gets the command start and end times for the given event and returns the difference (the time that th...
Definition: opencl.c:342
smooth
static float smooth(DeshakeOpenCLContext *deshake_ctx, float *gauss_kernel, int length, float max_val, AVFifo *values)
Definition: vf_deshake_opencl.c:887
DeshakeOpenCLContext::kernel_grayscale
cl_kernel kernel_grayscale
Definition: vf_deshake_opencl.c:236
AV_OPT_TYPE_BOOL
@ AV_OPT_TYPE_BOOL
Underlying C type is int.
Definition: opt.h:327
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:34
FFMAX3
#define FFMAX3(a, b, c)
Definition: macros.h:48
DebugMatches
Definition: vf_deshake_opencl.c:121
CL_CREATE_BUFFER_FLAGS
#define CL_CREATE_BUFFER_FLAGS(ctx, buffer_name, flags, size, host_ptr)
Create a buffer with the given information.
Definition: opencl.h:197
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
CL_FAIL_ON_ERROR
#define CL_FAIL_ON_ERROR(errcode,...)
A helper macro to handle OpenCL errors.
Definition: opencl.h:74
av_fifo_freep2
void av_fifo_freep2(AVFifo **f)
Free an AVFifo and reset pointer to NULL.
Definition: fifo.c:286
AV_PIX_FMT_GBRAP10BE
@ AV_PIX_FMT_GBRAP10BE
planar GBR 4:4:4:4 40bpp, big-endian
Definition: pixfmt.h:313
DeshakeOpenCLContext::descriptors
cl_mem descriptors
Definition: vf_deshake_opencl.c:257
DeshakeOpenCLContext::matches
cl_mem matches
Definition: vf_deshake_opencl.c:261
MotionVector::p
PointPair p
Definition: vf_deshake_opencl.c:103
DeshakeOpenCLContext::brief_descriptors_time
unsigned long long brief_descriptors_time
Definition: vf_deshake_opencl.c:293
DeshakeOpenCLContext::command_queue
cl_command_queue command_queue
Definition: vf_deshake_opencl.c:235
PointPair
Definition: vf_deshake_opencl.c:95
FILTER_SINGLE_PIXFMT
#define FILTER_SINGLE_PIXFMT(pix_fmt_)
Definition: filters.h:252
BREIFN
#define BREIFN
Definition: vf_deshake_opencl.c:85
DeshakeOpenCLContext::sw_format
int sw_format
Definition: vf_deshake_opencl.c:227
DeshakeOpenCLContext::gauss_kernel
float * gauss_kernel
Definition: vf_deshake_opencl.c:214
DeshakeOpenCLContext::refine_features
cl_int refine_features
Definition: vf_deshake_opencl.c:275
SimilarityMatrix
Definition: vf_deshake_opencl.c:171
ringbuf_float_at
static void ringbuf_float_at(DeshakeOpenCLContext *deshake_ctx, AVFifo *values, float *val, int offset)
Definition: vf_deshake_opencl.c:844
optimize_model
static void optimize_model(DeshakeOpenCLContext *deshake_ctx, MotionVector *best_pairs, MotionVector *inliers, const int num_inliers, float best_err, double *model_out)
Definition: vf_deshake_opencl.c:600
ff_vf_deshake_opencl
const AVFilter ff_vf_deshake_opencl
Definition: vf_deshake_opencl.c:2154
src
#define src
Definition: vp8dsp.c:248
DeshakeOpenCLContext::crop_y
CropInfo crop_y
Definition: vf_deshake_opencl.c:220
av_clipd
av_clipd
Definition: af_crystalizer.c:132
CropInfo::top_left
cl_float2 top_left
Definition: vf_deshake_opencl.c:178
decompose_transform
static FrameDelta decompose_transform(double *model)
Definition: vf_deshake_opencl.c:737
AVFilterContext::outputs
AVFilterLink ** outputs
array of pointers to output links
Definition: avfilter.h:469
F1
#define F1(l, r, i)
Definition: cast5.c:51