FFmpeg
vf_deshake_opencl.c
Go to the documentation of this file.
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with FFmpeg; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  *
18  * Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
19  * Copyright (C) 2009, Willow Garage Inc., all rights reserved.
20  * Copyright (C) 2013, OpenCV Foundation, all rights reserved.
21  * Third party copyrights are property of their respective owners.
22  *
23  * Redistribution and use in source and binary forms, with or without modification,
24  * are permitted provided that the following conditions are met:
25  *
26  * * Redistribution's of source code must retain the above copyright notice,
27  * this list of conditions and the following disclaimer.
28  *
29  * * Redistribution's in binary form must reproduce the above copyright notice,
30  * this list of conditions and the following disclaimer in the documentation
31  * and/or other materials provided with the distribution.
32  *
33  * * The name of the copyright holders may not be used to endorse or promote products
34  * derived from this software without specific prior written permission.
35  *
36  * This software is provided by the copyright holders and contributors "as is" and
37  * any express or implied warranties, including, but not limited to, the implied
38  * warranties of merchantability and fitness for a particular purpose are disclaimed.
39  * In no event shall the Intel Corporation or contributors be liable for any direct,
40  * indirect, incidental, special, exemplary, or consequential damages
41  * (including, but not limited to, procurement of substitute goods or services;
42  * loss of use, data, or profits; or business interruption) however caused
43  * and on any theory of liability, whether in contract, strict liability,
44  * or tort (including negligence or otherwise) arising in any way out of
45  * the use of this software, even if advised of the possibility of such damage.
46  */
47 
48 #include <float.h>
49 #include <libavutil/lfg.h>
50 #include "libavutil/opt.h"
51 #include "libavutil/imgutils.h"
52 #include "libavutil/mem.h"
53 #include "libavutil/fifo.h"
54 #include "libavutil/common.h"
55 #include "libavutil/avassert.h"
56 #include "libavutil/pixfmt.h"
57 #include "avfilter.h"
58 #include "framequeue.h"
59 #include "filters.h"
60 #include "transform.h"
61 #include "formats.h"
62 #include "internal.h"
63 #include "opencl.h"
64 #include "opencl_source.h"
65 #include "video.h"
66 
67 /*
68 This filter matches feature points between frames (dealing with outliers) and then
69 uses the matches to estimate an affine transform between frames. This transform is
70 decomposed into various values (translation, scale, rotation) and the values are
71 summed relative to the start of the video to obtain on absolute camera position
72 for each frame. This "camera path" is then smoothed via a gaussian filter, resulting
73 in a new path that is turned back into an affine transform and applied to each
74 frame to render it.
75 
76 High-level overview:
77 
78 All of the work to extract motion data from frames occurs in queue_frame. Motion data
79 is buffered in a smoothing window, so queue_frame simply computes the absolute camera
80 positions and places them in ringbuffers.
81 
82 filter_frame is responsible for looking at the absolute camera positions currently
83 in the ringbuffers, applying the gaussian filter, and then transforming the frames.
84 */
85 
86 // Number of bits for BRIEF descriptors
87 #define BREIFN 512
88 // Size of the patch from which a BRIEF descriptor is extracted
89 // This is the size used in OpenCV
90 #define BRIEF_PATCH_SIZE 31
91 #define BRIEF_PATCH_SIZE_HALF (BRIEF_PATCH_SIZE / 2)
92 
93 #define MATCHES_CONTIG_SIZE 2000
94 
95 #define ROUNDED_UP_DIV(a, b) ((a + (b - 1)) / b)
96 
97 typedef struct PointPair {
98  // Previous frame
99  cl_float2 p1;
100  // Current frame
101  cl_float2 p2;
102 } PointPair;
103 
104 typedef struct MotionVector {
106  // Used to mark vectors as potential outliers
108 } MotionVector;
109 
110 // Denotes the indices for the different types of motion in the ringbuffers array
117 
118  // Should always be last
120 };
121 
122 // Struct that holds data for drawing point match debug data
123 typedef struct DebugMatches {
125  // The points used to calculate the affine transform for a frame
127 
129  // For cases where we couldn't calculate a model
131 } DebugMatches;
132 
133 // Groups together the ringbuffers that store absolute distortion / position values
134 // for each frame
135 typedef struct AbsoluteFrameMotion {
136  // Array with the various ringbuffers, indexed via the RingbufferIndices enum
138 
139  // Offset to get to the current frame being processed
140  // (not in bytes)
142  // Keeps track of where the start and end of contiguous motion data is (to
143  // deal with cases where no motion data is found between two frames)
146 
149 
150 // Takes care of freeing the arrays within the DebugMatches inside of the
151 // debug_matches ringbuffer and then freeing the buffer itself.
153  DebugMatches dm;
154 
155  if (!afm->debug_matches) {
156  return;
157  }
158 
159  while (av_fifo_read(afm->debug_matches, &dm, 1) >= 0)
160  av_freep(&dm.matches);
161 
163 }
164 
165 // Stores the translation, scale, rotation, and skew deltas between two frames
166 typedef struct FrameDelta {
167  cl_float2 translation;
168  float rotation;
169  cl_float2 scale;
170  cl_float2 skew;
171 } FrameDelta;
172 
173 typedef struct SimilarityMatrix {
174  // The 2x3 similarity matrix
175  double matrix[6];
177 
178 typedef struct CropInfo {
179  // The top left corner of the bounding box for the crop
180  cl_float2 top_left;
181  // The bottom right corner of the bounding box for the crop
182  cl_float2 bottom_right;
183 } CropInfo;
184 
185 // Returned from function that determines start and end values for iteration
186 // around the current frame in a ringbuffer
187 typedef struct IterIndices {
188  int start;
189  int end;
190 } IterIndices;
191 
192 typedef struct DeshakeOpenCLContext {
194  // Whether or not the above `OpenCLFilterContext` has been initialized
196 
197  // These variables are used in the activate callback
198  int64_t duration;
199  int eof;
200 
201  // State for random number generation
203 
204  // FIFO frame queue used to buffer future frames for processing
206  // Ringbuffers for frame positions
208 
209  // The number of frames' motion to consider before and after the frame we are
210  // smoothing
212  // The number of the frame we are currently processing
214 
215  // Stores a 1d array of normalised gaussian kernel values for convolution
216  float *gauss_kernel;
217 
218  // Buffer for error values used in RANSAC code
219  float *ransac_err;
220 
221  // Information regarding how to crop the smoothed luminance (or RGB) planes
223  // Information regarding how to crop the smoothed chroma planes
225 
226  // Whether or not we are processing YUV input (as oppposed to RGB)
227  int is_yuv;
228  // The underlying format of the hardware surfaces
230 
231  // Buffer to copy `matches` into for the CPU to work with
234 
236 
237  cl_command_queue command_queue;
238  cl_kernel kernel_grayscale;
243  cl_kernel kernel_transform;
245 
246  // Stores a frame converted to grayscale
247  cl_mem grayscale;
248  // Stores the harris response for a frame (measure of "cornerness" for each pixel)
249  cl_mem harris_buf;
250 
251  // Detected features after non-maximum suppression and sub-pixel refinement
253  // Saved from the previous frame
255 
256  // BRIEF sampling pattern that is randomly initialized
258  // Feature point descriptors for the current frame
259  cl_mem descriptors;
260  // Feature point descriptors for the previous frame
262  // Vectors between points in current and previous frame
263  cl_mem matches;
265  // Holds the matrix to transform luminance (or RGB) with
266  cl_mem transform_y;
267  // Holds the matrix to transform chroma with
268  cl_mem transform_uv;
269 
270  // Configurable options
271 
273  int debug_on;
275 
276  // Whether or not feature points should be refined at a sub-pixel level
278  // If the user sets a value other than the default, 0, this percentage is
279  // translated into a sigma value ranging from 0.5 to 40.0
281  // This number is multiplied by the video frame rate to determine the size
282  // of the smooth window
284 
285  // Debug stuff
286 
290 
291  // These store the total time spent executing the different kernels in nanoseconds
292  unsigned long long grayscale_time;
293  unsigned long long harris_response_time;
294  unsigned long long refine_features_time;
295  unsigned long long brief_descriptors_time;
296  unsigned long long match_descriptors_time;
297  unsigned long long transform_time;
298  unsigned long long crop_upscale_time;
299 
300  // Time spent copying matched features from the device to the host
301  unsigned long long read_buf_time;
303 
304 // Returns a random uniformly-distributed number in [low, high]
305 static int rand_in(int low, int high, AVLFG *alfg) {
306  return (av_lfg_get(alfg) % (high - low)) + low;
307 }
308 
309 // Returns the average execution time for an event given the total time and the
310 // number of frames processed.
311 static double averaged_event_time_ms(unsigned long long total_time, int num_frames) {
312  return (double)total_time / (double)num_frames / 1000000.0;
313 }
314 
315 // The following code is loosely ported from OpenCV
316 
317 // Estimates affine transform from 3 point pairs
318 // model is a 2x3 matrix:
319 // a b c
320 // d e f
321 static void run_estimate_kernel(const MotionVector *point_pairs, double *model)
322 {
323  // src points
324  double x1 = point_pairs[0].p.p1.s[0];
325  double y1 = point_pairs[0].p.p1.s[1];
326  double x2 = point_pairs[1].p.p1.s[0];
327  double y2 = point_pairs[1].p.p1.s[1];
328  double x3 = point_pairs[2].p.p1.s[0];
329  double y3 = point_pairs[2].p.p1.s[1];
330 
331  // dest points
332  double X1 = point_pairs[0].p.p2.s[0];
333  double Y1 = point_pairs[0].p.p2.s[1];
334  double X2 = point_pairs[1].p.p2.s[0];
335  double Y2 = point_pairs[1].p.p2.s[1];
336  double X3 = point_pairs[2].p.p2.s[0];
337  double Y3 = point_pairs[2].p.p2.s[1];
338 
339  double d = 1.0 / ( x1*(y2-y3) + x2*(y3-y1) + x3*(y1-y2) );
340 
341  model[0] = d * ( X1*(y2-y3) + X2*(y3-y1) + X3*(y1-y2) );
342  model[1] = d * ( X1*(x3-x2) + X2*(x1-x3) + X3*(x2-x1) );
343  model[2] = d * ( X1*(x2*y3 - x3*y2) + X2*(x3*y1 - x1*y3) + X3*(x1*y2 - x2*y1) );
344 
345  model[3] = d * ( Y1*(y2-y3) + Y2*(y3-y1) + Y3*(y1-y2) );
346  model[4] = d * ( Y1*(x3-x2) + Y2*(x1-x3) + Y3*(x2-x1) );
347  model[5] = d * ( Y1*(x2*y3 - x3*y2) + Y2*(x3*y1 - x1*y3) + Y3*(x1*y2 - x2*y1) );
348 }
349 
350 // Checks that the 3 points in the given array are not collinear
351 static int points_not_collinear(const cl_float2 **points)
352 {
353  int j, k, i = 2;
354 
355  for (j = 0; j < i; j++) {
356  double dx1 = points[j]->s[0] - points[i]->s[0];
357  double dy1 = points[j]->s[1] - points[i]->s[1];
358 
359  for (k = 0; k < j; k++) {
360  double dx2 = points[k]->s[0] - points[i]->s[0];
361  double dy2 = points[k]->s[1] - points[i]->s[1];
362 
363  // Assuming a 3840 x 2160 video with a point at (0, 0) and one at
364  // (3839, 2159), this prevents a third point from being within roughly
365  // 0.5 of a pixel of the line connecting the two on both axes
366  if (fabs(dx2*dy1 - dy2*dx1) <= 1.0) {
367  return 0;
368  }
369  }
370  }
371 
372  return 1;
373 }
374 
375 // Checks a subset of 3 point pairs to make sure that the points are not collinear
376 // and not too close to each other
377 static int check_subset(const MotionVector *pairs_subset)
378 {
379  const cl_float2 *prev_points[] = {
380  &pairs_subset[0].p.p1,
381  &pairs_subset[1].p.p1,
382  &pairs_subset[2].p.p1
383  };
384 
385  const cl_float2 *curr_points[] = {
386  &pairs_subset[0].p.p2,
387  &pairs_subset[1].p.p2,
388  &pairs_subset[2].p.p2
389  };
390 
391  return points_not_collinear(prev_points) && points_not_collinear(curr_points);
392 }
393 
394 // Selects a random subset of 3 points from point_pairs and places them in pairs_subset
395 static int get_subset(
396  AVLFG *alfg,
397  const MotionVector *point_pairs,
398  const int num_point_pairs,
399  MotionVector *pairs_subset,
400  int max_attempts
401 ) {
402  int idx[3];
403  int i = 0, j, iters = 0;
404 
405  for (; iters < max_attempts; iters++) {
406  for (i = 0; i < 3 && iters < max_attempts;) {
407  int idx_i = 0;
408 
409  for (;;) {
410  idx_i = idx[i] = rand_in(0, num_point_pairs, alfg);
411 
412  for (j = 0; j < i; j++) {
413  if (idx_i == idx[j]) {
414  break;
415  }
416  }
417 
418  if (j == i) {
419  break;
420  }
421  }
422 
423  pairs_subset[i] = point_pairs[idx[i]];
424  i++;
425  }
426 
427  if (i == 3 && !check_subset(pairs_subset)) {
428  continue;
429  }
430  break;
431  }
432 
433  return i == 3 && iters < max_attempts;
434 }
435 
436 // Computes the error for each of the given points based on the given model.
437 static void compute_error(
438  const MotionVector *point_pairs,
439  const int num_point_pairs,
440  const double *model,
441  float *err
442 ) {
443  double F0 = model[0], F1 = model[1], F2 = model[2];
444  double F3 = model[3], F4 = model[4], F5 = model[5];
445 
446  for (int i = 0; i < num_point_pairs; i++) {
447  const cl_float2 *f = &point_pairs[i].p.p1;
448  const cl_float2 *t = &point_pairs[i].p.p2;
449 
450  double a = F0*f->s[0] + F1*f->s[1] + F2 - t->s[0];
451  double b = F3*f->s[0] + F4*f->s[1] + F5 - t->s[1];
452 
453  err[i] = a*a + b*b;
454  }
455 }
456 
457 // Determines which of the given point matches are inliers for the given model
458 // based on the specified threshold.
459 //
460 // err must be an array of num_point_pairs length
461 static int find_inliers(
462  MotionVector *point_pairs,
463  const int num_point_pairs,
464  const double *model,
465  float *err,
466  double thresh
467 ) {
468  float t = (float)(thresh * thresh);
469  int i, n = num_point_pairs, num_inliers = 0;
470 
471  compute_error(point_pairs, num_point_pairs, model, err);
472 
473  for (i = 0; i < n; i++) {
474  if (err[i] <= t) {
475  // This is an inlier
476  point_pairs[i].should_consider = 1;
477  num_inliers += 1;
478  } else {
479  point_pairs[i].should_consider = 0;
480  }
481  }
482 
483  return num_inliers;
484 }
485 
486 // Determines the number of iterations required to achieve the desired confidence level.
487 //
488 // The equation used to determine the number of iterations to do is:
489 // 1 - confidence = (1 - inlier_probability^num_points)^num_iters
490 //
491 // Solving for num_iters:
492 //
493 // num_iters = log(1 - confidence) / log(1 - inlier_probability^num_points)
494 //
495 // A more in-depth explanation can be found at https://en.wikipedia.org/wiki/Random_sample_consensus
496 // under the 'Parameters' heading
497 static int ransac_update_num_iters(double confidence, double num_outliers, int max_iters)
498 {
499  double num, denom;
500 
501  confidence = av_clipd(confidence, 0.0, 1.0);
502  num_outliers = av_clipd(num_outliers, 0.0, 1.0);
503 
504  // avoid inf's & nan's
505  num = FFMAX(1.0 - confidence, DBL_MIN);
506  denom = 1.0 - pow(1.0 - num_outliers, 3);
507  if (denom < DBL_MIN) {
508  return 0;
509  }
510 
511  num = log(num);
512  denom = log(denom);
513 
514  return denom >= 0 || -num >= max_iters * (-denom) ? max_iters : (int)round(num / denom);
515 }
516 
517 // Estimates an affine transform between the given pairs of points using RANdom
518 // SAmple Consensus
520  DeshakeOpenCLContext *deshake_ctx,
521  MotionVector *point_pairs,
522  DebugMatches *debug_matches,
523  const int num_point_pairs,
524  double *model_out,
525  const double threshold,
526  const int max_iters,
527  const double confidence
528 ) {
529  int result = 0;
530  double best_model[6], model[6];
531  MotionVector pairs_subset[3], best_pairs[3];
532 
533  int iter, niters = FFMAX(max_iters, 1);
534  int good_count, max_good_count = 0;
535 
536  // We need at least 3 points to build a model from
537  if (num_point_pairs < 3) {
538  return 0;
539  } else if (num_point_pairs == 3) {
540  // There are only 3 points, so RANSAC doesn't apply here
541  run_estimate_kernel(point_pairs, model_out);
542 
543  for (int i = 0; i < 3; ++i) {
544  point_pairs[i].should_consider = 1;
545  }
546 
547  return 1;
548  }
549 
550  for (iter = 0; iter < niters; ++iter) {
551  int found = get_subset(&deshake_ctx->alfg, point_pairs, num_point_pairs, pairs_subset, 10000);
552 
553  if (!found) {
554  if (iter == 0) {
555  return 0;
556  }
557 
558  break;
559  }
560 
561  run_estimate_kernel(pairs_subset, model);
562  good_count = find_inliers(point_pairs, num_point_pairs, model, deshake_ctx->ransac_err, threshold);
563 
564  if (good_count > FFMAX(max_good_count, 2)) {
565  for (int mi = 0; mi < 6; ++mi) {
566  best_model[mi] = model[mi];
567  }
568 
569  for (int pi = 0; pi < 3; pi++) {
570  best_pairs[pi] = pairs_subset[pi];
571  }
572 
573  max_good_count = good_count;
574  niters = ransac_update_num_iters(
575  confidence,
576  (double)(num_point_pairs - good_count) / num_point_pairs,
577  niters
578  );
579  }
580  }
581 
582  if (max_good_count > 0) {
583  for (int mi = 0; mi < 6; ++mi) {
584  model_out[mi] = best_model[mi];
585  }
586 
587  for (int pi = 0; pi < 3; ++pi) {
588  debug_matches->model_matches[pi] = best_pairs[pi];
589  }
590  debug_matches->num_model_matches = 3;
591 
592  // Find the inliers again for the best model for debugging
593  find_inliers(point_pairs, num_point_pairs, best_model, deshake_ctx->ransac_err, threshold);
594  result = 1;
595  }
596 
597  return result;
598 }
599 
600 // "Wiggles" the first point in best_pairs around a tiny bit in order to decrease the
601 // total error
602 static void optimize_model(
603  DeshakeOpenCLContext *deshake_ctx,
604  MotionVector *best_pairs,
605  MotionVector *inliers,
606  const int num_inliers,
607  float best_err,
608  double *model_out
609 ) {
610  float move_x_val = 0.01;
611  float move_y_val = 0.01;
612  int move_x = 1;
613  float old_move_x_val = 0;
614  double model[6];
615  int last_changed = 0;
616 
617  for (int iters = 0; iters < 200; iters++) {
618  float total_err = 0;
619 
620  if (move_x) {
621  best_pairs[0].p.p2.s[0] += move_x_val;
622  } else {
623  best_pairs[0].p.p2.s[0] += move_y_val;
624  }
625 
626  run_estimate_kernel(best_pairs, model);
627  compute_error(inliers, num_inliers, model, deshake_ctx->ransac_err);
628 
629  for (int j = 0; j < num_inliers; j++) {
630  total_err += deshake_ctx->ransac_err[j];
631  }
632 
633  if (total_err < best_err) {
634  for (int mi = 0; mi < 6; ++mi) {
635  model_out[mi] = model[mi];
636  }
637 
638  best_err = total_err;
639  last_changed = iters;
640  } else {
641  // Undo the change
642  if (move_x) {
643  best_pairs[0].p.p2.s[0] -= move_x_val;
644  } else {
645  best_pairs[0].p.p2.s[0] -= move_y_val;
646  }
647 
648  if (iters - last_changed > 4) {
649  // We've already improved the model as much as we can
650  break;
651  }
652 
653  old_move_x_val = move_x_val;
654 
655  if (move_x) {
656  move_x_val *= -1;
657  } else {
658  move_y_val *= -1;
659  }
660 
661  if (old_move_x_val < 0) {
662  move_x = 0;
663  } else {
664  move_x = 1;
665  }
666  }
667  }
668 }
669 
670 // Uses a process similar to that of RANSAC to find a transform that minimizes
671 // the total error for a set of point matches determined to be inliers
672 //
673 // (Pick random subsets, compute model, find total error, iterate until error
674 // is minimized.)
675 static int minimize_error(
676  DeshakeOpenCLContext *deshake_ctx,
677  MotionVector *inliers,
678  DebugMatches *debug_matches,
679  const int num_inliers,
680  double *model_out,
681  const int max_iters
682 ) {
683  int result = 0;
684  float best_err = FLT_MAX;
685  double best_model[6], model[6];
686  MotionVector pairs_subset[3], best_pairs[3];
687 
688  for (int i = 0; i < max_iters; i++) {
689  float total_err = 0;
690  int found = get_subset(&deshake_ctx->alfg, inliers, num_inliers, pairs_subset, 10000);
691 
692  if (!found) {
693  if (i == 0) {
694  return 0;
695  }
696 
697  break;
698  }
699 
700  run_estimate_kernel(pairs_subset, model);
701  compute_error(inliers, num_inliers, model, deshake_ctx->ransac_err);
702 
703  for (int j = 0; j < num_inliers; j++) {
704  total_err += deshake_ctx->ransac_err[j];
705  }
706 
707  if (total_err < best_err) {
708  for (int mi = 0; mi < 6; ++mi) {
709  best_model[mi] = model[mi];
710  }
711 
712  for (int pi = 0; pi < 3; pi++) {
713  best_pairs[pi] = pairs_subset[pi];
714  }
715 
716  best_err = total_err;
717  }
718  }
719 
720  for (int mi = 0; mi < 6; ++mi) {
721  model_out[mi] = best_model[mi];
722  }
723 
724  for (int pi = 0; pi < 3; ++pi) {
725  debug_matches->model_matches[pi] = best_pairs[pi];
726  }
727  debug_matches->num_model_matches = 3;
728  result = 1;
729 
730  optimize_model(deshake_ctx, best_pairs, inliers, num_inliers, best_err, model_out);
731  return result;
732 }
733 
734 // End code from OpenCV
735 
736 // Decomposes a similarity matrix into translation, rotation, scale, and skew
737 //
738 // See http://frederic-wang.fr/decomposition-of-2d-transform-matrices.html
739 static FrameDelta decompose_transform(double *model)
740 {
741  FrameDelta ret;
742 
743  double a = model[0];
744  double c = model[1];
745  double e = model[2];
746  double b = model[3];
747  double d = model[4];
748  double f = model[5];
749  double delta = a * d - b * c;
750 
751  memset(&ret, 0, sizeof(ret));
752 
753  ret.translation.s[0] = e;
754  ret.translation.s[1] = f;
755 
756  // This is the QR method
757  if (a != 0 || b != 0) {
758  double r = hypot(a, b);
759 
760  ret.rotation = FFSIGN(b) * acos(a / r);
761  ret.scale.s[0] = r;
762  ret.scale.s[1] = delta / r;
763  ret.skew.s[0] = atan((a * c + b * d) / (r * r));
764  ret.skew.s[1] = 0;
765  } else if (c != 0 || d != 0) {
766  double s = sqrt(c * c + d * d);
767 
768  ret.rotation = M_PI / 2 - FFSIGN(d) * acos(-c / s);
769  ret.scale.s[0] = delta / s;
770  ret.scale.s[1] = s;
771  ret.skew.s[0] = 0;
772  ret.skew.s[1] = atan((a * c + b * d) / (s * s));
773  } // otherwise there is only translation
774 
775  return ret;
776 }
777 
778 // Move valid vectors from the 2d buffer into a 1d buffer where they are contiguous
780  DeshakeOpenCLContext *deshake_ctx,
781  int size_y,
782  int size_x
783 ) {
784  int num_vectors = 0;
785 
786  for (int i = 0; i < size_y; ++i) {
787  for (int j = 0; j < size_x; ++j) {
788  MotionVector v = deshake_ctx->matches_host[j + i * size_x];
789 
790  if (v.should_consider) {
791  deshake_ctx->matches_contig_host[num_vectors] = v;
792  ++num_vectors;
793  }
794 
795  // Make sure we do not exceed the amount of space we allocated for these vectors
796  if (num_vectors == MATCHES_CONTIG_SIZE - 1) {
797  return num_vectors;
798  }
799  }
800  }
801  return num_vectors;
802 }
803 
804 // Returns the gaussian kernel value for the given x coordinate and sigma value
805 static float gaussian_for(int x, float sigma) {
806  return 1.0f / expf(((float)x * (float)x) / (2.0f * sigma * sigma));
807 }
808 
809 // Makes a normalized gaussian kernel of the given length for the given sigma
810 // and places it in gauss_kernel
811 static void make_gauss_kernel(float *gauss_kernel, float length, float sigma)
812 {
813  float gauss_sum = 0;
814  int window_half = length / 2;
815 
816  for (int i = 0; i < length; ++i) {
817  float val = gaussian_for(i - window_half, sigma);
818 
819  gauss_sum += val;
820  gauss_kernel[i] = val;
821  }
822 
823  // Normalize the gaussian values
824  for (int i = 0; i < length; ++i) {
825  gauss_kernel[i] /= gauss_sum;
826  }
827 }
828 
829 // Returns indices to start and end iteration at in order to iterate over a window
830 // of length size centered at the current frame in a ringbuffer
831 //
832 // Always returns numbers that result in a window of length size, even if that
833 // means specifying negative indices or indices past the end of the values in the
834 // ringbuffers. Make sure you clip indices appropriately within your loop.
835 static IterIndices start_end_for(DeshakeOpenCLContext *deshake_ctx, int length) {
836  IterIndices indices;
837 
838  indices.start = deshake_ctx->abs_motion.curr_frame_offset - (length / 2);
839  indices.end = deshake_ctx->abs_motion.curr_frame_offset + (length / 2) + (length % 2);
840 
841  return indices;
842 }
843 
844 // Sets val to the value in the given ringbuffer at the given offset, taking care of
845 // clipping the offset into the appropriate range
846 static void ringbuf_float_at(
847  DeshakeOpenCLContext *deshake_ctx,
848  AVFifo *values,
849  float *val,
850  int offset
851 ) {
852  int clip_start, clip_end, offset_clipped;
853  if (deshake_ctx->abs_motion.data_end_offset != -1) {
854  clip_end = deshake_ctx->abs_motion.data_end_offset;
855  } else {
856  // This expression represents the last valid index in the buffer,
857  // which we use repeatedly at the end of the video.
858  clip_end = deshake_ctx->smooth_window - av_fifo_can_write(values) - 1;
859  }
860 
861  if (deshake_ctx->abs_motion.data_start_offset != -1) {
862  clip_start = deshake_ctx->abs_motion.data_start_offset;
863  } else {
864  // Negative indices will occur at the start of the video, and we want
865  // them to be clipped to 0 in order to repeatedly use the position of
866  // the first frame.
867  clip_start = 0;
868  }
869 
870  offset_clipped = av_clip(
871  offset,
872  clip_start,
873  clip_end
874  );
875 
876  av_fifo_peek(values, val, 1, offset_clipped);
877 }
878 
879 // Returns smoothed current frame value of the given buffer of floats based on the
880 // given Gaussian kernel and its length (also the window length, centered around the
881 // current frame) and the "maximum value" of the motion.
882 //
883 // This "maximum value" should be the width / height of the image in the case of
884 // translation and an empirically chosen constant for rotation / scale.
885 //
886 // The sigma chosen to generate the final gaussian kernel with used to smooth the
887 // camera path is either hardcoded (set by user, deshake_ctx->smooth_percent) or
888 // adaptively chosen.
889 static float smooth(
890  DeshakeOpenCLContext *deshake_ctx,
891  float *gauss_kernel,
892  int length,
893  float max_val,
894  AVFifo *values
895 ) {
896  float new_large_s = 0, new_small_s = 0, new_best = 0, old, diff_between,
897  percent_of_max, inverted_percent;
898  IterIndices indices = start_end_for(deshake_ctx, length);
899  float large_sigma = 40.0f;
900  float small_sigma = 2.0f;
901  float best_sigma;
902 
903  if (deshake_ctx->smooth_percent) {
904  best_sigma = (large_sigma - 0.5f) * deshake_ctx->smooth_percent + 0.5f;
905  } else {
906  // Strategy to adaptively smooth trajectory:
907  //
908  // 1. Smooth path with large and small sigma values
909  // 2. Take the absolute value of the difference between them
910  // 3. Get a percentage by putting the difference over the "max value"
911  // 4, Invert the percentage
912  // 5. Calculate a new sigma value weighted towards the larger sigma value
913  // 6. Determine final smoothed trajectory value using that sigma
914 
915  make_gauss_kernel(gauss_kernel, length, large_sigma);
916  for (int i = indices.start, j = 0; i < indices.end; ++i, ++j) {
917  ringbuf_float_at(deshake_ctx, values, &old, i);
918  new_large_s += old * gauss_kernel[j];
919  }
920 
921  make_gauss_kernel(gauss_kernel, length, small_sigma);
922  for (int i = indices.start, j = 0; i < indices.end; ++i, ++j) {
923  ringbuf_float_at(deshake_ctx, values, &old, i);
924  new_small_s += old * gauss_kernel[j];
925  }
926 
927  diff_between = fabsf(new_large_s - new_small_s);
928  percent_of_max = diff_between / max_val;
929  inverted_percent = 1 - percent_of_max;
930  best_sigma = large_sigma * powf(inverted_percent, 40);
931  }
932 
933  make_gauss_kernel(gauss_kernel, length, best_sigma);
934  for (int i = indices.start, j = 0; i < indices.end; ++i, ++j) {
935  ringbuf_float_at(deshake_ctx, values, &old, i);
936  new_best += old * gauss_kernel[j];
937  }
938 
939  return new_best;
940 }
941 
942 // Returns the position of the given point after the transform is applied
943 static cl_float2 transformed_point(float x, float y, float *transform) {
944  cl_float2 ret;
945 
946  ret.s[0] = x * transform[0] + y * transform[1] + transform[2];
947  ret.s[1] = x * transform[3] + y * transform[4] + transform[5];
948 
949  return ret;
950 }
951 
952 // Creates an affine transform that scales from the center of a frame
954  float x_shift,
955  float y_shift,
956  float angle,
957  float scale_x,
958  float scale_y,
959  float center_w,
960  float center_h,
961  float *matrix
962 ) {
963  cl_float2 center_s;
964  float center_s_w, center_s_h;
965 
967  0,
968  0,
969  0,
970  scale_x,
971  scale_y,
972  matrix
973  );
974 
975  center_s = transformed_point(center_w, center_h, matrix);
976  center_s_w = center_w - center_s.s[0];
977  center_s_h = center_h - center_s.s[1];
978 
980  x_shift + center_s_w,
981  y_shift + center_s_h,
982  angle,
983  scale_x,
984  scale_y,
985  matrix
986  );
987 }
988 
989 // Determines the crop necessary to eliminate black borders from a smoothed frame
990 // and updates target crop accordingly
991 static void update_needed_crop(
992  CropInfo* crop,
993  float *transform,
994  float frame_width,
995  float frame_height
996 ) {
997  float new_width, new_height, adjusted_width, adjusted_height, adjusted_x, adjusted_y;
998 
999  cl_float2 top_left = transformed_point(0, 0, transform);
1000  cl_float2 top_right = transformed_point(frame_width, 0, transform);
1001  cl_float2 bottom_left = transformed_point(0, frame_height, transform);
1002  cl_float2 bottom_right = transformed_point(frame_width, frame_height, transform);
1003  float ar_h = frame_height / frame_width;
1004  float ar_w = frame_width / frame_height;
1005 
1006  if (crop->bottom_right.s[0] == 0) {
1007  // The crop hasn't been set to the original size of the plane
1008  crop->bottom_right.s[0] = frame_width;
1009  crop->bottom_right.s[1] = frame_height;
1010  }
1011 
1012  crop->top_left.s[0] = FFMAX3(
1013  crop->top_left.s[0],
1014  top_left.s[0],
1015  bottom_left.s[0]
1016  );
1017 
1018  crop->top_left.s[1] = FFMAX3(
1019  crop->top_left.s[1],
1020  top_left.s[1],
1021  top_right.s[1]
1022  );
1023 
1024  crop->bottom_right.s[0] = FFMIN3(
1025  crop->bottom_right.s[0],
1026  bottom_right.s[0],
1027  top_right.s[0]
1028  );
1029 
1030  crop->bottom_right.s[1] = FFMIN3(
1031  crop->bottom_right.s[1],
1032  bottom_right.s[1],
1033  bottom_left.s[1]
1034  );
1035 
1036  // Make sure our potentially new bounding box has the same aspect ratio
1037  new_height = crop->bottom_right.s[1] - crop->top_left.s[1];
1038  new_width = crop->bottom_right.s[0] - crop->top_left.s[0];
1039 
1040  adjusted_width = new_height * ar_w;
1041  adjusted_x = crop->bottom_right.s[0] - adjusted_width;
1042 
1043  if (adjusted_x >= crop->top_left.s[0]) {
1044  crop->top_left.s[0] = adjusted_x;
1045  } else {
1046  adjusted_height = new_width * ar_h;
1047  adjusted_y = crop->bottom_right.s[1] - adjusted_height;
1048  crop->top_left.s[1] = adjusted_y;
1049  }
1050 }
1051 
1053 {
1054  DeshakeOpenCLContext *ctx = avctx->priv;
1055  cl_int cle;
1056 
1057  for (int i = 0; i < RingbufCount; i++)
1058  av_fifo_freep2(&ctx->abs_motion.ringbuffers[i]);
1059 
1060  if (ctx->debug_on)
1061  free_debug_matches(&ctx->abs_motion);
1062 
1063  if (ctx->gauss_kernel)
1064  av_freep(&ctx->gauss_kernel);
1065 
1066  if (ctx->ransac_err)
1067  av_freep(&ctx->ransac_err);
1068 
1069  if (ctx->matches_host)
1070  av_freep(&ctx->matches_host);
1071 
1072  if (ctx->matches_contig_host)
1073  av_freep(&ctx->matches_contig_host);
1074 
1075  if (ctx->inliers)
1076  av_freep(&ctx->inliers);
1077 
1078  ff_framequeue_free(&ctx->fq);
1079 
1080  CL_RELEASE_KERNEL(ctx->kernel_grayscale);
1081  CL_RELEASE_KERNEL(ctx->kernel_harris_response);
1082  CL_RELEASE_KERNEL(ctx->kernel_refine_features);
1083  CL_RELEASE_KERNEL(ctx->kernel_brief_descriptors);
1084  CL_RELEASE_KERNEL(ctx->kernel_match_descriptors);
1085  CL_RELEASE_KERNEL(ctx->kernel_crop_upscale);
1086  if (ctx->debug_on)
1087  CL_RELEASE_KERNEL(ctx->kernel_draw_debug_info);
1088 
1089  CL_RELEASE_QUEUE(ctx->command_queue);
1090 
1091  if (!ctx->is_yuv)
1092  CL_RELEASE_MEMORY(ctx->grayscale);
1093  CL_RELEASE_MEMORY(ctx->harris_buf);
1094  CL_RELEASE_MEMORY(ctx->refined_features);
1095  CL_RELEASE_MEMORY(ctx->prev_refined_features);
1096  CL_RELEASE_MEMORY(ctx->brief_pattern);
1097  CL_RELEASE_MEMORY(ctx->descriptors);
1098  CL_RELEASE_MEMORY(ctx->prev_descriptors);
1099  CL_RELEASE_MEMORY(ctx->matches);
1100  CL_RELEASE_MEMORY(ctx->matches_contig);
1101  CL_RELEASE_MEMORY(ctx->transform_y);
1102  CL_RELEASE_MEMORY(ctx->transform_uv);
1103  if (ctx->debug_on) {
1104  CL_RELEASE_MEMORY(ctx->debug_matches);
1105  CL_RELEASE_MEMORY(ctx->debug_model_matches);
1106  }
1107 
1108  ff_opencl_filter_uninit(avctx);
1109 }
1110 
1112 {
1113  DeshakeOpenCLContext *ctx = avctx->priv;
1114  AVFilterLink *outlink = avctx->outputs[0];
1115  AVFilterLink *inlink = avctx->inputs[0];
1116  // Pointer to the host-side pattern buffer to be initialized and then copied
1117  // to the GPU
1118  PointPair *pattern_host = NULL;
1119  cl_int cle;
1120  int err;
1121  cl_ulong8 zeroed_ulong8;
1122  FFFrameQueueGlobal fqg;
1123  cl_image_format grayscale_format;
1124  cl_image_desc grayscale_desc;
1125  cl_command_queue_properties queue_props;
1126 
1127  const enum AVPixelFormat disallowed_formats[14] = {
1142  };
1143 
1144  // Number of elements for an array
1145  const int image_grid_32 = ROUNDED_UP_DIV(outlink->h, 32) * ROUNDED_UP_DIV(outlink->w, 32);
1146 
1147  const int descriptor_buf_size = image_grid_32 * (BREIFN / 8);
1148  const int features_buf_size = image_grid_32 * sizeof(cl_float2);
1149 
1150  const AVHWFramesContext *hw_frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data;
1151  const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hw_frames_ctx->sw_format);
1152 
1153  av_assert0(hw_frames_ctx);
1154  av_assert0(desc);
1155 
1157  ff_framequeue_init(&ctx->fq, &fqg);
1158  ctx->eof = 0;
1159  ctx->smooth_window = (int)(av_q2d(avctx->inputs[0]->frame_rate) * ctx->smooth_window_multiplier);
1160  ctx->curr_frame = 0;
1161 
1162  memset(&zeroed_ulong8, 0, sizeof(cl_ulong8));
1163 
1164  ctx->gauss_kernel = av_malloc_array(ctx->smooth_window, sizeof(float));
1165  if (!ctx->gauss_kernel) {
1166  err = AVERROR(ENOMEM);
1167  goto fail;
1168  }
1169 
1170  ctx->ransac_err = av_malloc_array(MATCHES_CONTIG_SIZE, sizeof(float));
1171  if (!ctx->ransac_err) {
1172  err = AVERROR(ENOMEM);
1173  goto fail;
1174  }
1175 
1176  for (int i = 0; i < RingbufCount; i++) {
1177  ctx->abs_motion.ringbuffers[i] = av_fifo_alloc2(ctx->smooth_window,
1178  sizeof(float), 0);
1179 
1180  if (!ctx->abs_motion.ringbuffers[i]) {
1181  err = AVERROR(ENOMEM);
1182  goto fail;
1183  }
1184  }
1185 
1186  if (ctx->debug_on) {
1187  ctx->abs_motion.debug_matches = av_fifo_alloc2(
1188  ctx->smooth_window / 2,
1189  sizeof(DebugMatches), 0
1190  );
1191 
1192  if (!ctx->abs_motion.debug_matches) {
1193  err = AVERROR(ENOMEM);
1194  goto fail;
1195  }
1196  }
1197 
1198  ctx->abs_motion.curr_frame_offset = 0;
1199  ctx->abs_motion.data_start_offset = -1;
1200  ctx->abs_motion.data_end_offset = -1;
1201 
1202  pattern_host = av_malloc_array(BREIFN, sizeof(PointPair));
1203  if (!pattern_host) {
1204  err = AVERROR(ENOMEM);
1205  goto fail;
1206  }
1207 
1208  ctx->matches_host = av_malloc_array(image_grid_32, sizeof(MotionVector));
1209  if (!ctx->matches_host) {
1210  err = AVERROR(ENOMEM);
1211  goto fail;
1212  }
1213 
1214  ctx->matches_contig_host = av_malloc_array(MATCHES_CONTIG_SIZE, sizeof(MotionVector));
1215  if (!ctx->matches_contig_host) {
1216  err = AVERROR(ENOMEM);
1217  goto fail;
1218  }
1219 
1220  ctx->inliers = av_malloc_array(MATCHES_CONTIG_SIZE, sizeof(MotionVector));
1221  if (!ctx->inliers) {
1222  err = AVERROR(ENOMEM);
1223  goto fail;
1224  }
1225 
1226  // Initializing the patch pattern for building BREIF descriptors with
1227  av_lfg_init(&ctx->alfg, 234342424);
1228  for (int i = 0; i < BREIFN; ++i) {
1229  PointPair pair;
1230 
1231  for (int j = 0; j < 2; ++j) {
1232  pair.p1.s[j] = rand_in(-BRIEF_PATCH_SIZE_HALF, BRIEF_PATCH_SIZE_HALF + 1, &ctx->alfg);
1233  pair.p2.s[j] = rand_in(-BRIEF_PATCH_SIZE_HALF, BRIEF_PATCH_SIZE_HALF + 1, &ctx->alfg);
1234  }
1235 
1236  pattern_host[i] = pair;
1237  }
1238 
1239  for (int i = 0; i < 14; i++) {
1240  if (ctx->sw_format == disallowed_formats[i]) {
1241  av_log(avctx, AV_LOG_ERROR, "unsupported format in deshake_opencl.\n");
1242  err = AVERROR(ENOSYS);
1243  goto fail;
1244  }
1245  }
1246 
1247  if (desc->flags & AV_PIX_FMT_FLAG_RGB) {
1248  ctx->is_yuv = 0;
1249  } else {
1250  ctx->is_yuv = 1;
1251  }
1252  ctx->sw_format = hw_frames_ctx->sw_format;
1253 
1255  if (err < 0)
1256  goto fail;
1257 
1258  if (ctx->debug_on) {
1259  queue_props = CL_QUEUE_PROFILING_ENABLE;
1260  } else {
1261  queue_props = 0;
1262  }
1263  ctx->command_queue = clCreateCommandQueue(
1264  ctx->ocf.hwctx->context,
1265  ctx->ocf.hwctx->device_id,
1266  queue_props,
1267  &cle
1268  );
1269  CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create OpenCL command queue %d.\n", cle);
1270 
1271  CL_CREATE_KERNEL(ctx, grayscale);
1272  CL_CREATE_KERNEL(ctx, harris_response);
1273  CL_CREATE_KERNEL(ctx, refine_features);
1274  CL_CREATE_KERNEL(ctx, brief_descriptors);
1275  CL_CREATE_KERNEL(ctx, match_descriptors);
1277  CL_CREATE_KERNEL(ctx, crop_upscale);
1278  if (ctx->debug_on)
1279  CL_CREATE_KERNEL(ctx, draw_debug_info);
1280 
1281  if (!ctx->is_yuv) {
1282  grayscale_format.image_channel_order = CL_R;
1283  grayscale_format.image_channel_data_type = CL_FLOAT;
1284 
1285  grayscale_desc = (cl_image_desc) {
1286  .image_type = CL_MEM_OBJECT_IMAGE2D,
1287  .image_width = outlink->w,
1288  .image_height = outlink->h,
1289  .image_depth = 0,
1290  .image_array_size = 0,
1291  .image_row_pitch = 0,
1292  .image_slice_pitch = 0,
1293  .num_mip_levels = 0,
1294  .num_samples = 0,
1295  .buffer = NULL,
1296  };
1297 
1298  ctx->grayscale = clCreateImage(
1299  ctx->ocf.hwctx->context,
1300  0,
1301  &grayscale_format,
1302  &grayscale_desc,
1303  NULL,
1304  &cle
1305  );
1306  CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create grayscale image: %d.\n", cle);
1307  }
1308 
1309  CL_CREATE_BUFFER(ctx, harris_buf, outlink->h * outlink->w * sizeof(float));
1310  CL_CREATE_BUFFER(ctx, refined_features, features_buf_size);
1311  CL_CREATE_BUFFER(ctx, prev_refined_features, features_buf_size);
1313  ctx,
1314  brief_pattern,
1315  CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
1316  BREIFN * sizeof(PointPair),
1317  pattern_host
1318  );
1319  CL_CREATE_BUFFER(ctx, descriptors, descriptor_buf_size);
1320  CL_CREATE_BUFFER(ctx, prev_descriptors, descriptor_buf_size);
1321  CL_CREATE_BUFFER(ctx, matches, image_grid_32 * sizeof(MotionVector));
1322  CL_CREATE_BUFFER(ctx, matches_contig, MATCHES_CONTIG_SIZE * sizeof(MotionVector));
1323  CL_CREATE_BUFFER(ctx, transform_y, 9 * sizeof(float));
1324  CL_CREATE_BUFFER(ctx, transform_uv, 9 * sizeof(float));
1325  if (ctx->debug_on) {
1326  CL_CREATE_BUFFER(ctx, debug_matches, MATCHES_CONTIG_SIZE * sizeof(MotionVector));
1327  CL_CREATE_BUFFER(ctx, debug_model_matches, 3 * sizeof(MotionVector));
1328  }
1329 
1330  ctx->initialized = 1;
1331  av_freep(&pattern_host);
1332 
1333  return 0;
1334 
1335 fail:
1336  av_freep(&pattern_host);
1337  return err;
1338 }
1339 
1340 // Logs debug information about the transform data
1341 static void transform_debug(AVFilterContext *avctx, float *new_vals, float *old_vals, int curr_frame) {
1342  av_log(avctx, AV_LOG_VERBOSE,
1343  "Frame %d:\n"
1344  "\tframe moved from: %f x, %f y\n"
1345  "\t to: %f x, %f y\n"
1346  "\t rotated from: %f degrees\n"
1347  "\t to: %f degrees\n"
1348  "\t scaled from: %f x, %f y\n"
1349  "\t to: %f x, %f y\n"
1350  "\n"
1351  "\tframe moved by: %f x, %f y\n"
1352  "\t rotated by: %f degrees\n"
1353  "\t scaled by: %f x, %f y\n",
1354  curr_frame,
1355  old_vals[RingbufX], old_vals[RingbufY],
1356  new_vals[RingbufX], new_vals[RingbufY],
1357  old_vals[RingbufRot] * (180.0 / M_PI),
1358  new_vals[RingbufRot] * (180.0 / M_PI),
1359  old_vals[RingbufScaleX], old_vals[RingbufScaleY],
1360  new_vals[RingbufScaleX], new_vals[RingbufScaleY],
1361  old_vals[RingbufX] - new_vals[RingbufX], old_vals[RingbufY] - new_vals[RingbufY],
1362  old_vals[RingbufRot] * (180.0 / M_PI) - new_vals[RingbufRot] * (180.0 / M_PI),
1363  new_vals[RingbufScaleX] / old_vals[RingbufScaleX], new_vals[RingbufScaleY] / old_vals[RingbufScaleY]
1364  );
1365 }
1366 
1367 // Uses the buffered motion information to determine a transform that smooths the
1368 // given frame and applies it
1369 static int filter_frame(AVFilterLink *link, AVFrame *input_frame)
1370 {
1371  AVFilterContext *avctx = link->dst;
1372  AVFilterLink *outlink = avctx->outputs[0];
1373  DeshakeOpenCLContext *deshake_ctx = avctx->priv;
1374  AVFrame *cropped_frame = NULL, *transformed_frame = NULL;
1375  int err;
1376  cl_int cle;
1377  float new_vals[RingbufCount];
1378  float old_vals[RingbufCount];
1379  // Luma (in the case of YUV) transform, or just the transform in the case of RGB
1380  float transform_y[9];
1381  // Chroma transform
1382  float transform_uv[9];
1383  // Luma crop transform (or RGB)
1384  float transform_crop_y[9];
1385  // Chroma crop transform
1386  float transform_crop_uv[9];
1387  float transform_debug_rgb[9];
1388  size_t global_work[2];
1389  int64_t duration;
1390  cl_mem src, transformed, dst;
1391  cl_mem transforms[3];
1392  CropInfo crops[3];
1393  cl_event transform_event, crop_upscale_event;
1394  DebugMatches debug_matches;
1395  cl_int num_model_matches;
1396 
1397  const float center_w = (float)input_frame->width / 2;
1398  const float center_h = (float)input_frame->height / 2;
1399 
1400  const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(deshake_ctx->sw_format);
1401  const int chroma_width = AV_CEIL_RSHIFT(input_frame->width, desc->log2_chroma_w);
1402  const int chroma_height = AV_CEIL_RSHIFT(input_frame->height, desc->log2_chroma_h);
1403 
1404  const float center_w_chroma = (float)chroma_width / 2;
1405  const float center_h_chroma = (float)chroma_height / 2;
1406 
1407  const float luma_w_over_chroma_w = ((float)input_frame->width / (float)chroma_width);
1408  const float luma_h_over_chroma_h = ((float)input_frame->height / (float)chroma_height);
1409 
1410  if (deshake_ctx->debug_on) {
1411  av_fifo_read(
1412  deshake_ctx->abs_motion.debug_matches,
1413  &debug_matches, 1);
1414  }
1415 
1416 #if FF_API_PKT_DURATION
1418  if (input_frame->pkt_duration) {
1419  duration = input_frame->pkt_duration;
1420  } else
1422 #endif
1423  if (input_frame->duration) {
1424  duration = input_frame->duration;
1425  } else {
1426  duration = av_rescale_q(1, av_inv_q(outlink->frame_rate), outlink->time_base);
1427  }
1428  deshake_ctx->duration = input_frame->pts + duration;
1429 
1430  // Get the absolute transform data for this frame
1431  for (int i = 0; i < RingbufCount; i++) {
1432  av_fifo_peek(deshake_ctx->abs_motion.ringbuffers[i],
1433  &old_vals[i], 1,
1434  deshake_ctx->abs_motion.curr_frame_offset);
1435  }
1436 
1437  if (deshake_ctx->tripod_mode) {
1438  // If tripod mode is turned on we simply undo all motion relative to the
1439  // first frame
1440 
1441  new_vals[RingbufX] = 0.0f;
1442  new_vals[RingbufY] = 0.0f;
1443  new_vals[RingbufRot] = 0.0f;
1444  new_vals[RingbufScaleX] = 1.0f;
1445  new_vals[RingbufScaleY] = 1.0f;
1446  } else {
1447  // Tripod mode is off and we need to smooth a moving camera
1448 
1449  new_vals[RingbufX] = smooth(
1450  deshake_ctx,
1451  deshake_ctx->gauss_kernel,
1452  deshake_ctx->smooth_window,
1453  input_frame->width,
1454  deshake_ctx->abs_motion.ringbuffers[RingbufX]
1455  );
1456  new_vals[RingbufY] = smooth(
1457  deshake_ctx,
1458  deshake_ctx->gauss_kernel,
1459  deshake_ctx->smooth_window,
1460  input_frame->height,
1461  deshake_ctx->abs_motion.ringbuffers[RingbufY]
1462  );
1463  new_vals[RingbufRot] = smooth(
1464  deshake_ctx,
1465  deshake_ctx->gauss_kernel,
1466  deshake_ctx->smooth_window,
1467  M_PI / 4,
1468  deshake_ctx->abs_motion.ringbuffers[RingbufRot]
1469  );
1470  new_vals[RingbufScaleX] = smooth(
1471  deshake_ctx,
1472  deshake_ctx->gauss_kernel,
1473  deshake_ctx->smooth_window,
1474  2.0f,
1475  deshake_ctx->abs_motion.ringbuffers[RingbufScaleX]
1476  );
1477  new_vals[RingbufScaleY] = smooth(
1478  deshake_ctx,
1479  deshake_ctx->gauss_kernel,
1480  deshake_ctx->smooth_window,
1481  2.0f,
1482  deshake_ctx->abs_motion.ringbuffers[RingbufScaleY]
1483  );
1484  }
1485 
1487  old_vals[RingbufX] - new_vals[RingbufX],
1488  old_vals[RingbufY] - new_vals[RingbufY],
1489  old_vals[RingbufRot] - new_vals[RingbufRot],
1490  new_vals[RingbufScaleX] / old_vals[RingbufScaleX],
1491  new_vals[RingbufScaleY] / old_vals[RingbufScaleY],
1492  center_w,
1493  center_h,
1494  transform_y
1495  );
1496 
1498  (old_vals[RingbufX] - new_vals[RingbufX]) / luma_w_over_chroma_w,
1499  (old_vals[RingbufY] - new_vals[RingbufY]) / luma_h_over_chroma_h,
1500  old_vals[RingbufRot] - new_vals[RingbufRot],
1501  new_vals[RingbufScaleX] / old_vals[RingbufScaleX],
1502  new_vals[RingbufScaleY] / old_vals[RingbufScaleY],
1503  center_w_chroma,
1504  center_h_chroma,
1505  transform_uv
1506  );
1507 
1508  CL_BLOCKING_WRITE_BUFFER(deshake_ctx->command_queue, deshake_ctx->transform_y, 9 * sizeof(float), transform_y, NULL);
1509  CL_BLOCKING_WRITE_BUFFER(deshake_ctx->command_queue, deshake_ctx->transform_uv, 9 * sizeof(float), transform_uv, NULL);
1510 
1511  if (deshake_ctx->debug_on)
1512  transform_debug(avctx, new_vals, old_vals, deshake_ctx->curr_frame);
1513 
1514  cropped_frame = ff_get_video_buffer(outlink, outlink->w, outlink->h);
1515  if (!cropped_frame) {
1516  err = AVERROR(ENOMEM);
1517  goto fail;
1518  }
1519 
1520  transformed_frame = ff_get_video_buffer(outlink, outlink->w, outlink->h);
1521  if (!transformed_frame) {
1522  err = AVERROR(ENOMEM);
1523  goto fail;
1524  }
1525 
1526  transforms[0] = deshake_ctx->transform_y;
1527  transforms[1] = transforms[2] = deshake_ctx->transform_uv;
1528 
1529  for (int p = 0; p < FF_ARRAY_ELEMS(transformed_frame->data); p++) {
1530  // Transform all of the planes appropriately
1531  src = (cl_mem)input_frame->data[p];
1532  transformed = (cl_mem)transformed_frame->data[p];
1533 
1534  if (!transformed)
1535  break;
1536 
1537  err = ff_opencl_filter_work_size_from_image(avctx, global_work, input_frame, p, 0);
1538  if (err < 0)
1539  goto fail;
1540 
1542  deshake_ctx->command_queue,
1543  deshake_ctx->kernel_transform,
1544  global_work,
1545  NULL,
1546  &transform_event,
1547  { sizeof(cl_mem), &src },
1548  { sizeof(cl_mem), &transformed },
1549  { sizeof(cl_mem), &transforms[p] },
1550  );
1551  }
1552 
1553  if (deshake_ctx->debug_on && !deshake_ctx->is_yuv && debug_matches.num_matches > 0) {
1555  deshake_ctx->command_queue,
1556  deshake_ctx->debug_matches,
1557  debug_matches.num_matches * sizeof(MotionVector),
1558  debug_matches.matches,
1559  NULL
1560  );
1561 
1563  deshake_ctx->command_queue,
1564  deshake_ctx->debug_model_matches,
1565  debug_matches.num_model_matches * sizeof(MotionVector),
1566  debug_matches.model_matches,
1567  NULL
1568  );
1569 
1570  num_model_matches = debug_matches.num_model_matches;
1571 
1572  // Invert the transform
1574  new_vals[RingbufX] - old_vals[RingbufX],
1575  new_vals[RingbufY] - old_vals[RingbufY],
1576  new_vals[RingbufRot] - old_vals[RingbufRot],
1577  old_vals[RingbufScaleX] / new_vals[RingbufScaleX],
1578  old_vals[RingbufScaleY] / new_vals[RingbufScaleY],
1579  center_w,
1580  center_h,
1581  transform_debug_rgb
1582  );
1583 
1584  CL_BLOCKING_WRITE_BUFFER(deshake_ctx->command_queue, deshake_ctx->transform_y, 9 * sizeof(float), transform_debug_rgb, NULL);
1585 
1586  transformed = (cl_mem)transformed_frame->data[0];
1588  deshake_ctx->command_queue,
1589  deshake_ctx->kernel_draw_debug_info,
1590  (size_t[]){ debug_matches.num_matches },
1591  NULL,
1592  NULL,
1593  { sizeof(cl_mem), &transformed },
1594  { sizeof(cl_mem), &deshake_ctx->debug_matches },
1595  { sizeof(cl_mem), &deshake_ctx->debug_model_matches },
1596  { sizeof(cl_int), &num_model_matches },
1597  { sizeof(cl_mem), &deshake_ctx->transform_y }
1598  );
1599  }
1600 
1601  if (deshake_ctx->should_crop) {
1602  // Generate transforms for cropping
1604  (old_vals[RingbufX] - new_vals[RingbufX]) / 5,
1605  (old_vals[RingbufY] - new_vals[RingbufY]) / 5,
1606  (old_vals[RingbufRot] - new_vals[RingbufRot]) / 5,
1607  new_vals[RingbufScaleX] / old_vals[RingbufScaleX],
1608  new_vals[RingbufScaleY] / old_vals[RingbufScaleY],
1609  center_w,
1610  center_h,
1611  transform_crop_y
1612  );
1613  update_needed_crop(&deshake_ctx->crop_y, transform_crop_y, input_frame->width, input_frame->height);
1614 
1616  (old_vals[RingbufX] - new_vals[RingbufX]) / (5 * luma_w_over_chroma_w),
1617  (old_vals[RingbufY] - new_vals[RingbufY]) / (5 * luma_h_over_chroma_h),
1618  (old_vals[RingbufRot] - new_vals[RingbufRot]) / 5,
1619  new_vals[RingbufScaleX] / old_vals[RingbufScaleX],
1620  new_vals[RingbufScaleY] / old_vals[RingbufScaleY],
1621  center_w_chroma,
1622  center_h_chroma,
1623  transform_crop_uv
1624  );
1625  update_needed_crop(&deshake_ctx->crop_uv, transform_crop_uv, chroma_width, chroma_height);
1626 
1627  crops[0] = deshake_ctx->crop_y;
1628  crops[1] = crops[2] = deshake_ctx->crop_uv;
1629 
1630  for (int p = 0; p < FF_ARRAY_ELEMS(cropped_frame->data); p++) {
1631  // Crop all of the planes appropriately
1632  dst = (cl_mem)cropped_frame->data[p];
1633  transformed = (cl_mem)transformed_frame->data[p];
1634 
1635  if (!dst)
1636  break;
1637 
1638  err = ff_opencl_filter_work_size_from_image(avctx, global_work, input_frame, p, 0);
1639  if (err < 0)
1640  goto fail;
1641 
1643  deshake_ctx->command_queue,
1644  deshake_ctx->kernel_crop_upscale,
1645  global_work,
1646  NULL,
1647  &crop_upscale_event,
1648  { sizeof(cl_mem), &transformed },
1649  { sizeof(cl_mem), &dst },
1650  { sizeof(cl_float2), &crops[p].top_left },
1651  { sizeof(cl_float2), &crops[p].bottom_right },
1652  );
1653  }
1654  }
1655 
1656  if (deshake_ctx->curr_frame < deshake_ctx->smooth_window / 2) {
1657  // This means we are somewhere at the start of the video. We need to
1658  // increment the current frame offset until it reaches the center of
1659  // the ringbuffers (as the current frame will be located there for
1660  // the rest of the video).
1661  //
1662  // The end of the video is taken care of by draining motion data
1663  // one-by-one out of the buffer, causing the (at that point fixed)
1664  // offset to move towards later frames' data.
1665  ++deshake_ctx->abs_motion.curr_frame_offset;
1666  }
1667 
1668  if (deshake_ctx->abs_motion.data_end_offset != -1) {
1669  // Keep the end offset in sync with the frame it's supposed to be
1670  // positioned at
1671  --deshake_ctx->abs_motion.data_end_offset;
1672 
1673  if (deshake_ctx->abs_motion.data_end_offset == deshake_ctx->abs_motion.curr_frame_offset - 1) {
1674  // The end offset would be the start of the new video sequence; flip to
1675  // start offset
1676  deshake_ctx->abs_motion.data_end_offset = -1;
1677  deshake_ctx->abs_motion.data_start_offset = deshake_ctx->abs_motion.curr_frame_offset;
1678  }
1679  } else if (deshake_ctx->abs_motion.data_start_offset != -1) {
1680  // Keep the start offset in sync with the frame it's supposed to be
1681  // positioned at
1682  --deshake_ctx->abs_motion.data_start_offset;
1683  }
1684 
1685  if (deshake_ctx->debug_on) {
1686  deshake_ctx->transform_time += ff_opencl_get_event_time(transform_event);
1687  if (deshake_ctx->should_crop) {
1688  deshake_ctx->crop_upscale_time += ff_opencl_get_event_time(crop_upscale_event);
1689  }
1690  }
1691 
1692  ++deshake_ctx->curr_frame;
1693 
1694  if (deshake_ctx->debug_on)
1695  av_freep(&debug_matches.matches);
1696 
1697  if (deshake_ctx->should_crop) {
1698  err = av_frame_copy_props(cropped_frame, input_frame);
1699  if (err < 0)
1700  goto fail;
1701 
1702  av_frame_free(&transformed_frame);
1703  av_frame_free(&input_frame);
1704  return ff_filter_frame(outlink, cropped_frame);
1705 
1706  } else {
1707  err = av_frame_copy_props(transformed_frame, input_frame);
1708  if (err < 0)
1709  goto fail;
1710 
1711  av_frame_free(&cropped_frame);
1712  av_frame_free(&input_frame);
1713  return ff_filter_frame(outlink, transformed_frame);
1714  }
1715 
1716 fail:
1717  clFinish(deshake_ctx->command_queue);
1718 
1719  if (deshake_ctx->debug_on)
1720  if (debug_matches.matches)
1721  av_freep(&debug_matches.matches);
1722 
1723  av_frame_free(&input_frame);
1724  av_frame_free(&transformed_frame);
1725  av_frame_free(&cropped_frame);
1726  return err;
1727 }
1728 
1729 // Add the given frame to the frame queue to eventually be processed.
1730 //
1731 // Also determines the motion from the previous frame and updates the stored
1732 // motion information accordingly.
1733 static int queue_frame(AVFilterLink *link, AVFrame *input_frame)
1734 {
1735  AVFilterContext *avctx = link->dst;
1736  DeshakeOpenCLContext *deshake_ctx = avctx->priv;
1737  int err;
1738  int num_vectors;
1739  int num_inliers = 0;
1740  cl_int cle;
1741  FrameDelta relative;
1742  SimilarityMatrix model;
1743  size_t global_work[2];
1744  size_t harris_global_work[2];
1745  size_t grid_32_global_work[2];
1746  int grid_32_h, grid_32_w;
1747  size_t local_work[2];
1748  cl_mem src, temp;
1749  float prev_vals[5];
1750  float new_vals[5];
1751  cl_event grayscale_event, harris_response_event, refine_features_event,
1752  brief_event, match_descriptors_event, read_buf_event;
1753  DebugMatches debug_matches;
1754 
1755  num_vectors = 0;
1756 
1757  local_work[0] = 8;
1758  local_work[1] = 8;
1759 
1760  err = ff_opencl_filter_work_size_from_image(avctx, global_work, input_frame, 0, 0);
1761  if (err < 0)
1762  goto fail;
1763 
1764  err = ff_opencl_filter_work_size_from_image(avctx, harris_global_work, input_frame, 0, 8);
1765  if (err < 0)
1766  goto fail;
1767 
1768  err = ff_opencl_filter_work_size_from_image(avctx, grid_32_global_work, input_frame, 0, 32);
1769  if (err < 0)
1770  goto fail;
1771 
1772  // We want a single work-item for each 32x32 block of pixels in the input frame
1773  grid_32_global_work[0] /= 32;
1774  grid_32_global_work[1] /= 32;
1775 
1776  grid_32_h = ROUNDED_UP_DIV(input_frame->height, 32);
1777  grid_32_w = ROUNDED_UP_DIV(input_frame->width, 32);
1778 
1779  if (deshake_ctx->is_yuv) {
1780  deshake_ctx->grayscale = (cl_mem)input_frame->data[0];
1781  } else {
1782  src = (cl_mem)input_frame->data[0];
1783 
1785  deshake_ctx->command_queue,
1786  deshake_ctx->kernel_grayscale,
1787  global_work,
1788  NULL,
1789  &grayscale_event,
1790  { sizeof(cl_mem), &src },
1791  { sizeof(cl_mem), &deshake_ctx->grayscale }
1792  );
1793  }
1794 
1796  deshake_ctx->command_queue,
1797  deshake_ctx->kernel_harris_response,
1798  harris_global_work,
1799  local_work,
1800  &harris_response_event,
1801  { sizeof(cl_mem), &deshake_ctx->grayscale },
1802  { sizeof(cl_mem), &deshake_ctx->harris_buf }
1803  );
1804 
1806  deshake_ctx->command_queue,
1807  deshake_ctx->kernel_refine_features,
1808  grid_32_global_work,
1809  NULL,
1810  &refine_features_event,
1811  { sizeof(cl_mem), &deshake_ctx->grayscale },
1812  { sizeof(cl_mem), &deshake_ctx->harris_buf },
1813  { sizeof(cl_mem), &deshake_ctx->refined_features },
1814  { sizeof(cl_int), &deshake_ctx->refine_features }
1815  );
1816 
1818  deshake_ctx->command_queue,
1819  deshake_ctx->kernel_brief_descriptors,
1820  grid_32_global_work,
1821  NULL,
1822  &brief_event,
1823  { sizeof(cl_mem), &deshake_ctx->grayscale },
1824  { sizeof(cl_mem), &deshake_ctx->refined_features },
1825  { sizeof(cl_mem), &deshake_ctx->descriptors },
1826  { sizeof(cl_mem), &deshake_ctx->brief_pattern}
1827  );
1828 
1829  if (!av_fifo_can_read(deshake_ctx->abs_motion.ringbuffers[RingbufX])) {
1830  // This is the first frame we've been given to queue, meaning there is
1831  // no previous frame to match descriptors to
1832 
1833  goto no_motion_data;
1834  }
1835 
1837  deshake_ctx->command_queue,
1838  deshake_ctx->kernel_match_descriptors,
1839  grid_32_global_work,
1840  NULL,
1841  &match_descriptors_event,
1842  { sizeof(cl_mem), &deshake_ctx->prev_refined_features },
1843  { sizeof(cl_mem), &deshake_ctx->refined_features },
1844  { sizeof(cl_mem), &deshake_ctx->descriptors },
1845  { sizeof(cl_mem), &deshake_ctx->prev_descriptors },
1846  { sizeof(cl_mem), &deshake_ctx->matches }
1847  );
1848 
1849  cle = clEnqueueReadBuffer(
1850  deshake_ctx->command_queue,
1851  deshake_ctx->matches,
1852  CL_TRUE,
1853  0,
1854  grid_32_h * grid_32_w * sizeof(MotionVector),
1855  deshake_ctx->matches_host,
1856  0,
1857  NULL,
1858  &read_buf_event
1859  );
1860  CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to read matches to host: %d.\n", cle);
1861 
1862  num_vectors = make_vectors_contig(deshake_ctx, grid_32_h, grid_32_w);
1863 
1864  if (num_vectors < 10) {
1865  // Not enough matches to get reliable motion data for this frame
1866  //
1867  // From this point on all data is relative to this frame rather than the
1868  // original frame. We have to make sure that we don't mix values that were
1869  // relative to the original frame with the new values relative to this
1870  // frame when doing the gaussian smoothing. We keep track of where the old
1871  // values end using this data_end_offset field in order to accomplish
1872  // that goal.
1873  //
1874  // If no motion data is present for multiple frames in a short window of
1875  // time, we leave the end where it was to avoid mixing 0s in with the
1876  // old data (and just treat them all as part of the new values)
1877  if (deshake_ctx->abs_motion.data_end_offset == -1) {
1878  deshake_ctx->abs_motion.data_end_offset =
1879  av_fifo_can_read(deshake_ctx->abs_motion.ringbuffers[RingbufX]) - 1;
1880  }
1881 
1882  goto no_motion_data;
1883  }
1884 
1885  if (!estimate_affine_2d(
1886  deshake_ctx,
1887  deshake_ctx->matches_contig_host,
1888  &debug_matches,
1889  num_vectors,
1890  model.matrix,
1891  10.0,
1892  3000,
1893  0.999999999999
1894  )) {
1895  goto no_motion_data;
1896  }
1897 
1898  for (int i = 0; i < num_vectors; i++) {
1899  if (deshake_ctx->matches_contig_host[i].should_consider) {
1900  deshake_ctx->inliers[num_inliers] = deshake_ctx->matches_contig_host[i];
1901  num_inliers++;
1902  }
1903  }
1904 
1905  if (!minimize_error(
1906  deshake_ctx,
1907  deshake_ctx->inliers,
1908  &debug_matches,
1909  num_inliers,
1910  model.matrix,
1911  400
1912  )) {
1913  goto no_motion_data;
1914  }
1915 
1916 
1917  relative = decompose_transform(model.matrix);
1918 
1919  // Get the absolute transform data for the previous frame
1920  for (int i = 0; i < RingbufCount; i++) {
1921  av_fifo_peek(
1922  deshake_ctx->abs_motion.ringbuffers[i],
1923  &prev_vals[i], 1,
1924  av_fifo_can_read(deshake_ctx->abs_motion.ringbuffers[i]) - 1);
1925  }
1926 
1927  new_vals[RingbufX] = prev_vals[RingbufX] + relative.translation.s[0];
1928  new_vals[RingbufY] = prev_vals[RingbufY] + relative.translation.s[1];
1929  new_vals[RingbufRot] = prev_vals[RingbufRot] + relative.rotation;
1930  new_vals[RingbufScaleX] = prev_vals[RingbufScaleX] / relative.scale.s[0];
1931  new_vals[RingbufScaleY] = prev_vals[RingbufScaleY] / relative.scale.s[1];
1932 
1933  if (deshake_ctx->debug_on) {
1934  if (!deshake_ctx->is_yuv) {
1935  deshake_ctx->grayscale_time += ff_opencl_get_event_time(grayscale_event);
1936  }
1937  deshake_ctx->harris_response_time += ff_opencl_get_event_time(harris_response_event);
1938  deshake_ctx->refine_features_time += ff_opencl_get_event_time(refine_features_event);
1939  deshake_ctx->brief_descriptors_time += ff_opencl_get_event_time(brief_event);
1940  deshake_ctx->match_descriptors_time += ff_opencl_get_event_time(match_descriptors_event);
1941  deshake_ctx->read_buf_time += ff_opencl_get_event_time(read_buf_event);
1942  }
1943 
1944  goto end;
1945 
1946 no_motion_data:
1947  new_vals[RingbufX] = 0.0f;
1948  new_vals[RingbufY] = 0.0f;
1949  new_vals[RingbufRot] = 0.0f;
1950  new_vals[RingbufScaleX] = 1.0f;
1951  new_vals[RingbufScaleY] = 1.0f;
1952 
1953  for (int i = 0; i < num_vectors; i++) {
1954  deshake_ctx->matches_contig_host[i].should_consider = 0;
1955  }
1956  debug_matches.num_model_matches = 0;
1957 
1958  if (deshake_ctx->debug_on) {
1959  av_log(avctx, AV_LOG_VERBOSE,
1960  "\n[ALERT] No motion data found in queue_frame, motion reset to 0\n\n"
1961  );
1962  }
1963 
1964  goto end;
1965 
1966 end:
1967  // Swap the descriptor buffers (we don't need the previous frame's descriptors
1968  // again so we will use that space for the next frame's descriptors)
1969  temp = deshake_ctx->prev_descriptors;
1970  deshake_ctx->prev_descriptors = deshake_ctx->descriptors;
1971  deshake_ctx->descriptors = temp;
1972 
1973  // Same for the refined features
1974  temp = deshake_ctx->prev_refined_features;
1975  deshake_ctx->prev_refined_features = deshake_ctx->refined_features;
1976  deshake_ctx->refined_features = temp;
1977 
1978  if (deshake_ctx->debug_on) {
1979  if (num_vectors == 0) {
1980  debug_matches.matches = NULL;
1981  } else {
1982  debug_matches.matches = av_malloc_array(num_vectors, sizeof(MotionVector));
1983 
1984  if (!debug_matches.matches) {
1985  err = AVERROR(ENOMEM);
1986  goto fail;
1987  }
1988  }
1989 
1990  for (int i = 0; i < num_vectors; i++) {
1991  debug_matches.matches[i] = deshake_ctx->matches_contig_host[i];
1992  }
1993  debug_matches.num_matches = num_vectors;
1994 
1995  av_fifo_write(
1996  deshake_ctx->abs_motion.debug_matches,
1997  &debug_matches, 1);
1998  }
1999 
2000  for (int i = 0; i < RingbufCount; i++) {
2001  av_fifo_write(deshake_ctx->abs_motion.ringbuffers[i], &new_vals[i], 1);
2002  }
2003 
2004  return ff_framequeue_add(&deshake_ctx->fq, input_frame);
2005 
2006 fail:
2007  clFinish(deshake_ctx->command_queue);
2008  av_frame_free(&input_frame);
2009  return err;
2010 }
2011 
2013 {
2014  AVFilterLink *inlink = ctx->inputs[0];
2015  AVFilterLink *outlink = ctx->outputs[0];
2016  DeshakeOpenCLContext *deshake_ctx = ctx->priv;
2017  AVFrame *frame = NULL;
2018  int ret, status;
2019  int64_t pts;
2020 
2022 
2023  if (!deshake_ctx->eof) {
2025  if (ret < 0)
2026  return ret;
2027  if (ret > 0) {
2028  if (!frame->hw_frames_ctx)
2029  return AVERROR(EINVAL);
2030 
2031  if (!deshake_ctx->initialized) {
2033  if (ret < 0)
2034  return ret;
2035  }
2036 
2037  // If there is no more space in the ringbuffers, remove the oldest
2038  // values to make room for the new ones
2039  if (!av_fifo_can_write(deshake_ctx->abs_motion.ringbuffers[RingbufX])) {
2040  for (int i = 0; i < RingbufCount; i++) {
2041  av_fifo_drain2(deshake_ctx->abs_motion.ringbuffers[i], 1);
2042  }
2043  }
2045  if (ret < 0)
2046  return ret;
2047  if (ret >= 0) {
2048  // See if we have enough buffered frames to process one
2049  //
2050  // "enough" is half the smooth window of queued frames into the future
2051  if (ff_framequeue_queued_frames(&deshake_ctx->fq) >= deshake_ctx->smooth_window / 2) {
2052  return filter_frame(inlink, ff_framequeue_take(&deshake_ctx->fq));
2053  }
2054  }
2055  }
2056  }
2057 
2058  if (!deshake_ctx->eof && ff_inlink_acknowledge_status(inlink, &status, &pts)) {
2059  if (status == AVERROR_EOF) {
2060  deshake_ctx->eof = 1;
2061  }
2062  }
2063 
2064  if (deshake_ctx->eof) {
2065  // Finish processing the rest of the frames in the queue.
2066  while(ff_framequeue_queued_frames(&deshake_ctx->fq) != 0) {
2067  for (int i = 0; i < RingbufCount; i++) {
2068  av_fifo_drain2(deshake_ctx->abs_motion.ringbuffers[i], 1);
2069  }
2070 
2071  ret = filter_frame(inlink, ff_framequeue_take(&deshake_ctx->fq));
2072  if (ret < 0) {
2073  return ret;
2074  }
2075  }
2076 
2077  if (deshake_ctx->debug_on) {
2079  "Average kernel execution times:\n"
2080  "\t grayscale: %0.3f ms\n"
2081  "\t harris_response: %0.3f ms\n"
2082  "\t refine_features: %0.3f ms\n"
2083  "\tbrief_descriptors: %0.3f ms\n"
2084  "\tmatch_descriptors: %0.3f ms\n"
2085  "\t transform: %0.3f ms\n"
2086  "\t crop_upscale: %0.3f ms\n"
2087  "Average buffer read times:\n"
2088  "\t features buf: %0.3f ms\n",
2089  averaged_event_time_ms(deshake_ctx->grayscale_time, deshake_ctx->curr_frame),
2090  averaged_event_time_ms(deshake_ctx->harris_response_time, deshake_ctx->curr_frame),
2091  averaged_event_time_ms(deshake_ctx->refine_features_time, deshake_ctx->curr_frame),
2092  averaged_event_time_ms(deshake_ctx->brief_descriptors_time, deshake_ctx->curr_frame),
2093  averaged_event_time_ms(deshake_ctx->match_descriptors_time, deshake_ctx->curr_frame),
2094  averaged_event_time_ms(deshake_ctx->transform_time, deshake_ctx->curr_frame),
2095  averaged_event_time_ms(deshake_ctx->crop_upscale_time, deshake_ctx->curr_frame),
2096  averaged_event_time_ms(deshake_ctx->read_buf_time, deshake_ctx->curr_frame)
2097  );
2098  }
2099 
2100  ff_outlink_set_status(outlink, AVERROR_EOF, deshake_ctx->duration);
2101  return 0;
2102  }
2103 
2104  if (!deshake_ctx->eof) {
2105  FF_FILTER_FORWARD_WANTED(outlink, inlink);
2106  }
2107 
2108  return FFERROR_NOT_READY;
2109 }
2110 
2112  {
2113  .name = "default",
2114  .type = AVMEDIA_TYPE_VIDEO,
2115  .config_props = &ff_opencl_filter_config_input,
2116  },
2117 };
2118 
2120  {
2121  .name = "default",
2122  .type = AVMEDIA_TYPE_VIDEO,
2123  .config_props = &ff_opencl_filter_config_output,
2124  },
2125 };
2126 
2127 #define OFFSET(x) offsetof(DeshakeOpenCLContext, x)
2128 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
2129 
2131  {
2132  "tripod", "simulates a tripod by preventing any camera movement whatsoever "
2133  "from the original frame",
2134  OFFSET(tripod_mode), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, FLAGS
2135  },
2136  {
2137  "debug", "turn on additional debugging information",
2138  OFFSET(debug_on), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, FLAGS
2139  },
2140  {
2141  "adaptive_crop", "attempt to subtly crop borders to reduce mirrored content",
2142  OFFSET(should_crop), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, FLAGS
2143  },
2144  {
2145  "refine_features", "refine feature point locations at a sub-pixel level",
2146  OFFSET(refine_features), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, FLAGS
2147  },
2148  {
2149  "smooth_strength", "smoothing strength (0 attempts to adaptively determine optimal strength)",
2150  OFFSET(smooth_percent), AV_OPT_TYPE_FLOAT, {.dbl = 0.0f}, 0.0f, 1.0f, FLAGS
2151  },
2152  {
2153  "smooth_window_multiplier", "multiplier for number of frames to buffer for motion data",
2154  OFFSET(smooth_window_multiplier), AV_OPT_TYPE_FLOAT, {.dbl = 2.0}, 0.1, 10.0, FLAGS
2155  },
2156  { NULL }
2157 };
2158 
2159 AVFILTER_DEFINE_CLASS(deshake_opencl);
2160 
2162  .name = "deshake_opencl",
2163  .description = NULL_IF_CONFIG_SMALL("Feature-point based video stabilization filter"),
2164  .priv_size = sizeof(DeshakeOpenCLContext),
2165  .priv_class = &deshake_opencl_class,
2168  .activate = activate,
2172  .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE
2173 };
deshake_opencl_inputs
static const AVFilterPad deshake_opencl_inputs[]
Definition: vf_deshake_opencl.c:2111
AVFILTER_DEFINE_CLASS
AVFILTER_DEFINE_CLASS(deshake_opencl)
MATCHES_CONTIG_SIZE
#define MATCHES_CONTIG_SIZE
Definition: vf_deshake_opencl.c:93
ff_get_video_buffer
AVFrame * ff_get_video_buffer(AVFilterLink *link, int w, int h)
Request a picture buffer with a specific set of permissions.
Definition: video.c:101
CL_RUN_KERNEL_WITH_ARGS
#define CL_RUN_KERNEL_WITH_ARGS(queue, kernel, global_work_size, local_work_size, event,...)
Uses the above macro to enqueue the given kernel and then additionally runs it to completion via clFi...
Definition: opencl.h:180
av_fifo_drain2
void av_fifo_drain2(AVFifo *f, size_t size)
Discard the specified amount of data from an AVFifo.
Definition: fifo.c:266
FF_ENABLE_DEPRECATION_WARNINGS
#define FF_ENABLE_DEPRECATION_WARNINGS
Definition: internal.h:82
F2
#define F2(l, r, i)
Definition: cast5.c:44
AVPixelFormat
AVPixelFormat
Pixel format.
Definition: pixfmt.h:64
av_fifo_can_write
size_t av_fifo_can_write(const AVFifo *f)
Definition: fifo.c:94
free_debug_matches
static void free_debug_matches(AbsoluteFrameMotion *afm)
Definition: vf_deshake_opencl.c:152
status
they must not be accessed directly The fifo field contains the frames that are queued in the input for processing by the filter The status_in and status_out fields contains the queued status(EOF or error) of the link
FrameDelta::translation
cl_float2 translation
Definition: vf_deshake_opencl.c:167
av_clip
#define av_clip
Definition: common.h:95
IterIndices
Definition: vf_deshake_opencl.c:187
r
const char * r
Definition: vf_curves.c:126
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
opt.h
DeshakeOpenCLContext::brief_pattern
cl_mem brief_pattern
Definition: vf_deshake_opencl.c:257
deshake_opencl_init
static int deshake_opencl_init(AVFilterContext *avctx)
Definition: vf_deshake_opencl.c:1111
estimate_affine_2d
static int estimate_affine_2d(DeshakeOpenCLContext *deshake_ctx, MotionVector *point_pairs, DebugMatches *debug_matches, const int num_point_pairs, double *model_out, const double threshold, const int max_iters, const double confidence)
Definition: vf_deshake_opencl.c:519
minimize_error
static int minimize_error(DeshakeOpenCLContext *deshake_ctx, MotionVector *inliers, DebugMatches *debug_matches, const int num_inliers, double *model_out, const int max_iters)
Definition: vf_deshake_opencl.c:675
OFFSET
#define OFFSET(x)
Definition: vf_deshake_opencl.c:2127
av_lfg_init
av_cold void av_lfg_init(AVLFG *c, unsigned int seed)
Definition: lfg.c:32
FF_FILTER_FLAG_HWFRAME_AWARE
#define FF_FILTER_FLAG_HWFRAME_AWARE
The filter is aware of hardware frames, and any hardware frame context should not be automatically pr...
Definition: internal.h:374
AV_PIX_FMT_GBRP16BE
@ AV_PIX_FMT_GBRP16BE
planar GBR 4:4:4 48bpp, big-endian
Definition: pixfmt.h:164
DeshakeOpenCLContext::refine_features_time
unsigned long long refine_features_time
Definition: vf_deshake_opencl.c:294
ff_filter_frame
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:969
AbsoluteFrameMotion
Definition: vf_deshake_opencl.c:135
AVFrame::duration
int64_t duration
Duration of the frame, in the same units as pts.
Definition: frame.h:728
AV_PIX_FMT_GBRP10BE
@ AV_PIX_FMT_GBRP10BE
planar GBR 4:4:4 30bpp, big-endian
Definition: pixfmt.h:162
av_pix_fmt_desc_get
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:2888
AVERROR_EOF
#define AVERROR_EOF
End of file.
Definition: error.h:57
FFERROR_NOT_READY
return FFERROR_NOT_READY
Definition: filter_design.txt:204
DeshakeOpenCLContext::kernel_draw_debug_info
cl_kernel kernel_draw_debug_info
Definition: vf_deshake_opencl.c:287
PointPair::p2
cl_float2 p2
Definition: vf_deshake_opencl.c:101
matrix
Definition: vc1dsp.c:42
DeshakeOpenCLContext::transform_uv
cl_mem transform_uv
Definition: vf_deshake_opencl.c:268
inlink
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
Definition: filter_design.txt:212
av_frame_free
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:99
DeshakeOpenCLContext::should_crop
int should_crop
Definition: vf_deshake_opencl.c:274
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:330
AVFrame::pts
int64_t pts
Presentation timestamp in time_base units (time when frame should be shown to user).
Definition: frame.h:437
AVFrame::width
int width
Definition: frame.h:402
DeshakeOpenCLContext::prev_descriptors
cl_mem prev_descriptors
Definition: vf_deshake_opencl.c:261
opencl.h
AVOption
AVOption.
Definition: opt.h:251
b
#define b
Definition: input.c:41
RingbufX
@ RingbufX
Definition: vf_deshake_opencl.c:112
RingbufRot
@ RingbufRot
Definition: vf_deshake_opencl.c:114
DeshakeOpenCLContext::abs_motion
AbsoluteFrameMotion abs_motion
Definition: vf_deshake_opencl.c:207
expf
#define expf(x)
Definition: libm.h:283
AbsoluteFrameMotion::curr_frame_offset
int curr_frame_offset
Definition: vf_deshake_opencl.c:141
AV_LOG_VERBOSE
#define AV_LOG_VERBOSE
Detailed information.
Definition: log.h:196
float.h
ff_opencl_filter_load_program
int ff_opencl_filter_load_program(AVFilterContext *avctx, const char **program_source_array, int nb_strings)
Load a new OpenCL program from strings in memory.
Definition: opencl.c:157
update_needed_crop
static void update_needed_crop(CropInfo *crop, float *transform, float frame_width, float frame_height)
Definition: vf_deshake_opencl.c:991
ff_framequeue_init
void ff_framequeue_init(FFFrameQueue *fq, FFFrameQueueGlobal *fqg)
Init a frame queue and attach it to a global structure.
Definition: framequeue.c:47
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
AVFilter::name
const char * name
Filter name.
Definition: avfilter.h:165
points_not_collinear
static int points_not_collinear(const cl_float2 **points)
Definition: vf_deshake_opencl.c:351
video.h
FF_FILTER_FORWARD_STATUS_BACK
#define FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink)
Forward the status on an output link to an input link.
Definition: filters.h:199
CL_CREATE_KERNEL
#define CL_CREATE_KERNEL(ctx, kernel_name)
Create a kernel with the given name.
Definition: opencl.h:93
CL_CREATE_BUFFER
#define CL_CREATE_BUFFER(ctx, buffer_name, size)
Create a buffer with the given information.
Definition: opencl.h:237
AVFrame::data
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:351
BRIEF_PATCH_SIZE_HALF
#define BRIEF_PATCH_SIZE_HALF
Definition: vf_deshake_opencl.c:91
DeshakeOpenCLContext::harris_buf
cl_mem harris_buf
Definition: vf_deshake_opencl.c:249
DeshakeOpenCLContext::smooth_window_multiplier
float smooth_window_multiplier
Definition: vf_deshake_opencl.c:283
formats.h
AV_PIX_FMT_GBRAP12LE
@ AV_PIX_FMT_GBRAP12LE
planar GBR 4:4:4:4 48bpp, little-endian
Definition: pixfmt.h:308
ff_inlink_consume_frame
int ff_inlink_consume_frame(AVFilterLink *link, AVFrame **rframe)
Take a frame from the link's FIFO and update the link's stats.
Definition: avfilter.c:1364
ff_opencl_filter_work_size_from_image
int ff_opencl_filter_work_size_from_image(AVFilterContext *avctx, size_t *work_size, AVFrame *frame, int plane, int block_alignment)
Find the work size needed needed for a given plane of an image.
Definition: opencl.c:264
fifo.h
DeshakeOpenCLContext::debug_matches
cl_mem debug_matches
Definition: vf_deshake_opencl.c:288
AbsoluteFrameMotion::data_end_offset
int data_end_offset
Definition: vf_deshake_opencl.c:145
AV_PIX_FMT_GBRAP
@ AV_PIX_FMT_GBRAP
planar GBRA 4:4:4:4 32bpp
Definition: pixfmt.h:205
AVFilterContext::priv
void * priv
private data for use by the filter
Definition: avfilter.h:407
IterIndices::start
int start
Definition: vf_deshake_opencl.c:188
fail
#define fail()
Definition: checkasm.h:134
av_fifo_write
int av_fifo_write(AVFifo *f, const void *buf, size_t nb_elems)
Write data into a FIFO.
Definition: fifo.c:188
FFSIGN
#define FFSIGN(a)
Definition: common.h:65
DeshakeOpenCLContext::curr_frame
int curr_frame
Definition: vf_deshake_opencl.c:213
DeshakeOpenCLContext::kernel_transform
cl_kernel kernel_transform
Definition: vf_deshake_opencl.c:243
AbsoluteFrameMotion::ringbuffers
AVFifo * ringbuffers[RingbufCount]
Definition: vf_deshake_opencl.c:137
RingbufScaleY
@ RingbufScaleY
Definition: vf_deshake_opencl.c:116
IterIndices::end
int end
Definition: vf_deshake_opencl.c:189
val
static double val(void *priv, double ch)
Definition: aeval.c:77
deshake_opencl_outputs
static const AVFilterPad deshake_opencl_outputs[]
Definition: vf_deshake_opencl.c:2119
pts
static int64_t pts
Definition: transcode_aac.c:653
fabsf
static __device__ float fabsf(float a)
Definition: cuda_runtime.h:181
DeshakeOpenCLContext::kernel_crop_upscale
cl_kernel kernel_crop_upscale
Definition: vf_deshake_opencl.c:244
DeshakeOpenCLContext::initialized
int initialized
Definition: vf_deshake_opencl.c:195
DeshakeOpenCLContext::smooth_window
int smooth_window
Definition: vf_deshake_opencl.c:211
FrameDelta::skew
cl_float2 skew
Definition: vf_deshake_opencl.c:170
RingbufScaleX
@ RingbufScaleX
Definition: vf_deshake_opencl.c:115
ff_opencl_filter_config_output
int ff_opencl_filter_config_output(AVFilterLink *outlink)
Create a suitable hardware frames context for the output.
Definition: opencl.c:82
AVFilterPad
A filter pad used for either input or output.
Definition: internal.h:49
DeshakeOpenCLContext::eof
int eof
Definition: vf_deshake_opencl.c:199
avassert.h
DeshakeOpenCLContext::read_buf_time
unsigned long long read_buf_time
Definition: vf_deshake_opencl.c:301
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:180
FF_ARRAY_ELEMS
#define FF_ARRAY_ELEMS(a)
Definition: sinewin_tablegen.c:29
av_cold
#define av_cold
Definition: attributes.h:90
av_fifo_read
int av_fifo_read(AVFifo *f, void *buf, size_t nb_elems)
Read data from a FIFO.
Definition: fifo.c:240
DeshakeOpenCLContext::debug_model_matches
cl_mem debug_model_matches
Definition: vf_deshake_opencl.c:289
DebugMatches::num_model_matches
int num_model_matches
Definition: vf_deshake_opencl.c:130
duration
int64_t duration
Definition: movenc.c:64
float
float
Definition: af_crystalizer.c:122
AV_PIX_FMT_GBRAP16BE
@ AV_PIX_FMT_GBRAP16BE
planar GBRA 4:4:4:4 64bpp, big-endian
Definition: pixfmt.h:206
ff_outlink_set_status
static void ff_outlink_set_status(AVFilterLink *link, int status, int64_t pts)
Set the status field of a link from the source filter.
Definition: filters.h:189
s
#define s(width, name)
Definition: cbs_vp9.c:256
AV_PIX_FMT_GBRP16LE
@ AV_PIX_FMT_GBRP16LE
planar GBR 4:4:4 48bpp, little-endian
Definition: pixfmt.h:165
RingbufY
@ RingbufY
Definition: vf_deshake_opencl.c:113
av_lfg_get
static unsigned int av_lfg_get(AVLFG *c)
Get the next random unsigned 32-bit number using an ALFG.
Definition: lfg.h:53
AV_CEIL_RSHIFT
#define AV_CEIL_RSHIFT(a, b)
Definition: common.h:50
DeshakeOpenCLContext::smooth_percent
float smooth_percent
Definition: vf_deshake_opencl.c:280
FFFrameQueueGlobal
Structure to hold global options and statistics for frame queues.
Definition: framequeue.h:46
mi
#define mi
Definition: vf_colormatrix.c:108
DeshakeOpenCLContext::grayscale_time
unsigned long long grayscale_time
Definition: vf_deshake_opencl.c:292
DeshakeOpenCLContext::kernel_brief_descriptors
cl_kernel kernel_brief_descriptors
Definition: vf_deshake_opencl.c:241
av_q2d
static double av_q2d(AVRational a)
Convert an AVRational to a double.
Definition: rational.h:104
lfg.h
init
int(* init)(AVBSFContext *ctx)
Definition: dts2pts_bsf.c:365
deshake_opencl_uninit
static av_cold void deshake_opencl_uninit(AVFilterContext *avctx)
Definition: vf_deshake_opencl.c:1052
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:37
filters.h
DeshakeOpenCLContext::kernel_match_descriptors
cl_kernel kernel_match_descriptors
Definition: vf_deshake_opencl.c:242
ctx
AVFormatContext * ctx
Definition: movenc.c:48
av_rescale_q
int64_t av_rescale_q(int64_t a, AVRational bq, AVRational cq)
Rescale a 64-bit integer by 2 rational numbers.
Definition: mathematics.c:142
AV_PIX_FMT_GBRP10LE
@ AV_PIX_FMT_GBRP10LE
planar GBR 4:4:4 30bpp, little-endian
Definition: pixfmt.h:163
FILTER_INPUTS
#define FILTER_INPUTS(array)
Definition: internal.h:194
link
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a link
Definition: filter_design.txt:23
CL_RELEASE_KERNEL
#define CL_RELEASE_KERNEL(k)
release an OpenCL Kernel
Definition: opencl.h:101
ff_framequeue_take
AVFrame * ff_framequeue_take(FFFrameQueue *fq)
Take the first frame in the queue.
Definition: framequeue.c:98
RingbufferIndices
RingbufferIndices
Definition: vf_deshake_opencl.c:111
rand_in
static int rand_in(int low, int high, AVLFG *alfg)
Definition: vf_deshake_opencl.c:305
averaged_event_time_ms
static double averaged_event_time_ms(unsigned long long total_time, int num_frames)
Definition: vf_deshake_opencl.c:311
AbsoluteFrameMotion::debug_matches
AVFifo * debug_matches
Definition: vf_deshake_opencl.c:147
filter_frame
static int filter_frame(AVFilterLink *link, AVFrame *input_frame)
Definition: vf_deshake_opencl.c:1369
result
and forward the result(frame or status change) to the corresponding input. If nothing is possible
AV_PIX_FMT_GBRAP12BE
@ AV_PIX_FMT_GBRAP12BE
planar GBR 4:4:4:4 48bpp, big-endian
Definition: pixfmt.h:307
fabs
static __device__ float fabs(float a)
Definition: cuda_runtime.h:182
NULL
#define NULL
Definition: coverity.c:32
DeshakeOpenCLContext::alfg
AVLFG alfg
Definition: vf_deshake_opencl.c:202
av_frame_copy_props
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
Definition: frame.c:594
transform_center_scale
static void transform_center_scale(float x_shift, float y_shift, float angle, float scale_x, float scale_y, float center_w, float center_h, float *matrix)
Definition: vf_deshake_opencl.c:953
ff_framequeue_add
int ff_framequeue_add(FFFrameQueue *fq, AVFrame *frame)
Add a frame.
Definition: framequeue.c:63
ff_framequeue_free
void ff_framequeue_free(FFFrameQueue *fq)
Free the queue and all queued frames.
Definition: framequeue.c:53
DeshakeOpenCLContext::ransac_err
float * ransac_err
Definition: vf_deshake_opencl.c:219
DeshakeOpenCLContext::kernel_harris_response
cl_kernel kernel_harris_response
Definition: vf_deshake_opencl.c:239
framequeue.h
transform
static const int8_t transform[32][32]
Definition: hevcdsp.c:27
AVFilterContext::inputs
AVFilterLink ** inputs
array of pointers to input links
Definition: avfilter.h:400
start_end_for
static IterIndices start_end_for(DeshakeOpenCLContext *deshake_ctx, int length)
Definition: vf_deshake_opencl.c:835
av_fifo_can_read
size_t av_fifo_can_read(const AVFifo *f)
Definition: fifo.c:87
double
double
Definition: af_crystalizer.c:132
AV_PIX_FMT_OPENCL
@ AV_PIX_FMT_OPENCL
Hardware surfaces for OpenCL.
Definition: pixfmt.h:355
DeshakeOpenCLContext::matches_contig
cl_mem matches_contig
Definition: vf_deshake_opencl.c:264
DebugMatches::model_matches
MotionVector model_matches[3]
Definition: vf_deshake_opencl.c:126
DeshakeOpenCLContext::duration
int64_t duration
Definition: vf_deshake_opencl.c:198
DeshakeOpenCLContext
Definition: vf_deshake_opencl.c:192
ff_inlink_acknowledge_status
int ff_inlink_acknowledge_status(AVFilterLink *link, int *rstatus, int64_t *rpts)
Test and acknowledge the change of status on the link.
Definition: avfilter.c:1318
DeshakeOpenCLContext::crop_uv
CropInfo crop_uv
Definition: vf_deshake_opencl.c:224
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
transformed_point
static cl_float2 transformed_point(float x, float y, float *transform)
Definition: vf_deshake_opencl.c:943
DeshakeOpenCLContext::harris_response_time
unsigned long long harris_response_time
Definition: vf_deshake_opencl.c:293
make_gauss_kernel
static void make_gauss_kernel(float *gauss_kernel, float length, float sigma)
Definition: vf_deshake_opencl.c:811
FFFrameQueue
Queue of AVFrame pointers.
Definition: framequeue.h:53
DeshakeOpenCLContext::crop_upscale_time
unsigned long long crop_upscale_time
Definition: vf_deshake_opencl.c:298
AVLFG
Context structure for the Lagged Fibonacci PRNG.
Definition: lfg.h:33
f
f
Definition: af_crystalizer.c:122
F3
#define F3(l, r, i)
Definition: cast5.c:37
AVFifo
Definition: fifo.c:35
DeshakeOpenCLContext::matches_contig_host
MotionVector * matches_contig_host
Definition: vf_deshake_opencl.c:233
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:115
powf
#define powf(x, y)
Definition: libm.h:50
CL_RELEASE_MEMORY
#define CL_RELEASE_MEMORY(m)
release an OpenCL Memory Object
Definition: opencl.h:114
AV_PIX_FMT_FLAG_RGB
#define AV_PIX_FMT_FLAG_RGB
The pixel format contains RGB-like data (as opposed to YUV/grayscale).
Definition: pixdesc.h:136
hypot
static av_const double hypot(double x, double y)
Definition: libm.h:366
CropInfo::bottom_right
cl_float2 bottom_right
Definition: vf_deshake_opencl.c:182
AV_PIX_FMT_GBRP9BE
@ AV_PIX_FMT_GBRP9BE
planar GBR 4:4:4 27bpp, big-endian
Definition: pixfmt.h:160
get_subset
static int get_subset(AVLFG *alfg, const MotionVector *point_pairs, const int num_point_pairs, MotionVector *pairs_subset, int max_attempts)
Definition: vf_deshake_opencl.c:395
ROUNDED_UP_DIV
#define ROUNDED_UP_DIV(a, b)
Definition: vf_deshake_opencl.c:95
AV_PIX_FMT_GBRP9LE
@ AV_PIX_FMT_GBRP9LE
planar GBR 4:4:4 27bpp, little-endian
Definition: pixfmt.h:161
FLAGS
#define FLAGS
Definition: vf_deshake_opencl.c:2128
MotionVector::should_consider
cl_int should_consider
Definition: vf_deshake_opencl.c:107
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
AV_PIX_FMT_GBRAP10LE
@ AV_PIX_FMT_GBRAP10LE
planar GBR 4:4:4:4 40bpp, little-endian
Definition: pixfmt.h:311
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
FF_FILTER_FORWARD_WANTED
FF_FILTER_FORWARD_WANTED(outlink, inlink)
MotionVector
Definition: agm.c:38
opencl_source.h
M_PI
#define M_PI
Definition: mathematics.h:52
ff_opencl_source_deshake
const char * ff_opencl_source_deshake
ff_opencl_filter_config_input
int ff_opencl_filter_config_input(AVFilterLink *inlink)
Check that the input link contains a suitable hardware frames context and extract the device from it.
Definition: opencl.c:46
internal.h
AV_OPT_TYPE_FLOAT
@ AV_OPT_TYPE_FLOAT
Definition: opt.h:228
av_fifo_peek
int av_fifo_peek(AVFifo *f, void *buf, size_t nb_elems, size_t offset)
Read data from a FIFO without modifying FIFO state.
Definition: fifo.c:255
FILTER_SINGLE_PIXFMT
#define FILTER_SINGLE_PIXFMT(pix_fmt_)
Definition: internal.h:184
DeshakeOpenCLContext::inliers
MotionVector * inliers
Definition: vf_deshake_opencl.c:235
FrameDelta::rotation
float rotation
Definition: vf_deshake_opencl.c:168
ff_get_matrix
void ff_get_matrix(float x_shift, float y_shift, float angle, float scale_x, float scale_y, float *matrix)
Get an affine transformation matrix from given translation, rotation, and zoom factors.
Definition: transform.c:106
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:269
queue_frame
static int queue_frame(AVFilterLink *link, AVFrame *input_frame)
Definition: vf_deshake_opencl.c:1733
round
static av_always_inline av_const double round(double x)
Definition: libm.h:444
DeshakeOpenCLContext::transform_y
cl_mem transform_y
Definition: vf_deshake_opencl.c:266
FFMIN3
#define FFMIN3(a, b, c)
Definition: macros.h:50
transform_debug
static void transform_debug(AVFilterContext *avctx, float *new_vals, float *old_vals, int curr_frame)
Definition: vf_deshake_opencl.c:1341
av_malloc_array
#define av_malloc_array(a, b)
Definition: tableprint_vlc.h:31
common.h
make_vectors_contig
static int make_vectors_contig(DeshakeOpenCLContext *deshake_ctx, int size_y, int size_x)
Definition: vf_deshake_opencl.c:779
delta
float delta
Definition: vorbis_enc_data.h:430
activate
static int activate(AVFilterContext *ctx)
Definition: vf_deshake_opencl.c:2012
DeshakeOpenCLContext::is_yuv
int is_yuv
Definition: vf_deshake_opencl.c:227
AVFrame::pkt_duration
attribute_deprecated int64_t pkt_duration
duration of the corresponding packet, expressed in AVStream->time_base units, 0 if unknown.
Definition: frame.h:631
DeshakeOpenCLContext::fq
FFFrameQueue fq
Definition: vf_deshake_opencl.c:205
av_inv_q
static av_always_inline AVRational av_inv_q(AVRational q)
Invert a rational.
Definition: rational.h:159
DeshakeOpenCLContext::grayscale
cl_mem grayscale
Definition: vf_deshake_opencl.c:247
AVFilterPad::name
const char * name
Pad name.
Definition: internal.h:55
DeshakeOpenCLContext::transform_time
unsigned long long transform_time
Definition: vf_deshake_opencl.c:297
RingbufCount
@ RingbufCount
Definition: vf_deshake_opencl.c:119
AVFilter
Filter definition.
Definition: avfilter.h:161
AVHWFramesContext
This struct describes a set or pool of "hardware" frames (i.e.
Definition: hwcontext.h:124
DeshakeOpenCLContext::ocf
OpenCLFilterContext ocf
Definition: vf_deshake_opencl.c:193
AV_PIX_FMT_GBRAP16LE
@ AV_PIX_FMT_GBRAP16LE
planar GBRA 4:4:4:4 64bpp, little-endian
Definition: pixfmt.h:207
ff_opencl_filter_init
int ff_opencl_filter_init(AVFilterContext *avctx)
Initialise an OpenCL filter context.
Definition: opencl.c:133
CL_BLOCKING_WRITE_BUFFER
#define CL_BLOCKING_WRITE_BUFFER(queue, buffer, size, host_ptr, event)
Perform a blocking write to a buffer.
Definition: opencl.h:214
ret
ret
Definition: filter_design.txt:187
FrameDelta
Definition: vf_deshake_opencl.c:166
DeshakeOpenCLContext::matches_host
MotionVector * matches_host
Definition: vf_deshake_opencl.c:232
compute_error
static void compute_error(const MotionVector *point_pairs, const int num_point_pairs, const double *model, float *err)
Definition: vf_deshake_opencl.c:437
ransac_update_num_iters
static int ransac_update_num_iters(double confidence, double num_outliers, int max_iters)
Definition: vf_deshake_opencl.c:497
pixfmt.h
frame
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return or at least make progress towards producing a frame
Definition: filter_design.txt:264
DeshakeOpenCLContext::prev_refined_features
cl_mem prev_refined_features
Definition: vf_deshake_opencl.c:254
FrameDelta::scale
cl_float2 scale
Definition: vf_deshake_opencl.c:169
av_fifo_alloc2
AVFifo * av_fifo_alloc2(size_t nb_elems, size_t elem_size, unsigned int flags)
Allocate and initialize an AVFifo with a given element size.
Definition: fifo.c:47
AbsoluteFrameMotion::data_start_offset
int data_start_offset
Definition: vf_deshake_opencl.c:144
DeshakeOpenCLContext::refined_features
cl_mem refined_features
Definition: vf_deshake_opencl.c:252
transform.h
check_subset
static int check_subset(const MotionVector *pairs_subset)
Definition: vf_deshake_opencl.c:377
ff_framequeue_queued_frames
static size_t ff_framequeue_queued_frames(const FFFrameQueue *fq)
Get the number of queued frames.
Definition: framequeue.h:146
run_estimate_kernel
static void run_estimate_kernel(const MotionVector *point_pairs, double *model)
Definition: vf_deshake_opencl.c:321
CropInfo
Definition: vf_deshake_opencl.c:178
AVFrame::height
int height
Definition: frame.h:402
DebugMatches::matches
MotionVector * matches
Definition: vf_deshake_opencl.c:124
DeshakeOpenCLContext::tripod_mode
int tripod_mode
Definition: vf_deshake_opencl.c:272
DeshakeOpenCLContext::match_descriptors_time
unsigned long long match_descriptors_time
Definition: vf_deshake_opencl.c:296
ff_framequeue_global_init
void ff_framequeue_global_init(FFFrameQueueGlobal *fqg)
Init a global structure.
Definition: framequeue.c:30
avfilter.h
gaussian_for
static float gaussian_for(int x, float sigma)
Definition: vf_deshake_opencl.c:805
values
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return values
Definition: filter_design.txt:263
temp
else temp
Definition: vf_mcdeint.c:248
OpenCLFilterContext
Definition: opencl.h:36
ff_opencl_filter_uninit
void ff_opencl_filter_uninit(AVFilterContext *avctx)
Uninitialise an OpenCL filter context.
Definition: opencl.c:142
DeshakeOpenCLContext::debug_on
int debug_on
Definition: vf_deshake_opencl.c:273
DebugMatches::num_matches
int num_matches
Definition: vf_deshake_opencl.c:128
CL_RELEASE_QUEUE
#define CL_RELEASE_QUEUE(q)
release an OpenCL Command Queue
Definition: opencl.h:127
DeshakeOpenCLContext::kernel_refine_features
cl_kernel kernel_refine_features
Definition: vf_deshake_opencl.c:240
AVFilterContext
An instance of a filter.
Definition: avfilter.h:392
FF_DISABLE_DEPRECATION_WARNINGS
#define FF_DISABLE_DEPRECATION_WARNINGS
Definition: internal.h:81
AV_PIX_FMT_GBRP
@ AV_PIX_FMT_GBRP
planar GBR 4:4:4 24bpp
Definition: pixfmt.h:158
desc
const char * desc
Definition: libsvtav1.c:83
find_inliers
static int find_inliers(MotionVector *point_pairs, const int num_point_pairs, const double *model, float *err, double thresh)
Definition: vf_deshake_opencl.c:461
AVMEDIA_TYPE_VIDEO
@ AVMEDIA_TYPE_VIDEO
Definition: avutil.h:201
deshake_opencl_options
static const AVOption deshake_opencl_options[]
Definition: vf_deshake_opencl.c:2130
mem.h
PointPair::p1
cl_float2 p1
Definition: vf_deshake_opencl.c:99
AVPixFmtDescriptor
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:69
ff_opencl_get_event_time
cl_ulong ff_opencl_get_event_time(cl_event event)
Gets the command start and end times for the given event and returns the difference (the time that th...
Definition: opencl.c:340
smooth
static float smooth(DeshakeOpenCLContext *deshake_ctx, float *gauss_kernel, int length, float max_val, AVFifo *values)
Definition: vf_deshake_opencl.c:889
DeshakeOpenCLContext::kernel_grayscale
cl_kernel kernel_grayscale
Definition: vf_deshake_opencl.c:238
AV_OPT_TYPE_BOOL
@ AV_OPT_TYPE_BOOL
Definition: opt.h:244
FILTER_OUTPUTS
#define FILTER_OUTPUTS(array)
Definition: internal.h:195
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:34
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
FFMAX3
#define FFMAX3(a, b, c)
Definition: macros.h:48
DebugMatches
Definition: vf_deshake_opencl.c:123
d
d
Definition: ffmpeg_filter.c:156
imgutils.h
CL_CREATE_BUFFER_FLAGS
#define CL_CREATE_BUFFER_FLAGS(ctx, buffer_name, flags, size, host_ptr)
Create a buffer with the given information.
Definition: opencl.h:197
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
CL_FAIL_ON_ERROR
#define CL_FAIL_ON_ERROR(errcode,...)
A helper macro to handle OpenCL errors.
Definition: opencl.h:74
av_fifo_freep2
void av_fifo_freep2(AVFifo **f)
Free an AVFifo and reset pointer to NULL.
Definition: fifo.c:286
AV_PIX_FMT_GBRAP10BE
@ AV_PIX_FMT_GBRAP10BE
planar GBR 4:4:4:4 40bpp, big-endian
Definition: pixfmt.h:310
uninit
static av_cold int uninit(AVCodecContext *avctx)
Definition: crystalhd.c:285
DeshakeOpenCLContext::descriptors
cl_mem descriptors
Definition: vf_deshake_opencl.c:259
DeshakeOpenCLContext::matches
cl_mem matches
Definition: vf_deshake_opencl.c:263
int
int
Definition: ffmpeg_filter.c:156
MotionVector::p
PointPair p
Definition: vf_deshake_opencl.c:105
DeshakeOpenCLContext::brief_descriptors_time
unsigned long long brief_descriptors_time
Definition: vf_deshake_opencl.c:295
DeshakeOpenCLContext::command_queue
cl_command_queue command_queue
Definition: vf_deshake_opencl.c:237
PointPair
Definition: vf_deshake_opencl.c:97
BREIFN
#define BREIFN
Definition: vf_deshake_opencl.c:87
DeshakeOpenCLContext::sw_format
int sw_format
Definition: vf_deshake_opencl.c:229
DeshakeOpenCLContext::gauss_kernel
float * gauss_kernel
Definition: vf_deshake_opencl.c:216
DeshakeOpenCLContext::refine_features
cl_int refine_features
Definition: vf_deshake_opencl.c:277
SimilarityMatrix
Definition: vf_deshake_opencl.c:173
ringbuf_float_at
static void ringbuf_float_at(DeshakeOpenCLContext *deshake_ctx, AVFifo *values, float *val, int offset)
Definition: vf_deshake_opencl.c:846
optimize_model
static void optimize_model(DeshakeOpenCLContext *deshake_ctx, MotionVector *best_pairs, MotionVector *inliers, const int num_inliers, float best_err, double *model_out)
Definition: vf_deshake_opencl.c:602
ff_vf_deshake_opencl
const AVFilter ff_vf_deshake_opencl
Definition: vf_deshake_opencl.c:2161
DeshakeOpenCLContext::crop_y
CropInfo crop_y
Definition: vf_deshake_opencl.c:222
av_clipd
av_clipd
Definition: af_crystalizer.c:132
transforms
static const struct @86 transforms[18]
CropInfo::top_left
cl_float2 top_left
Definition: vf_deshake_opencl.c:180
decompose_transform
static FrameDelta decompose_transform(double *model)
Definition: vf_deshake_opencl.c:739
AVFilterContext::outputs
AVFilterLink ** outputs
array of pointers to output links
Definition: avfilter.h:404
F1
#define F1(l, r, i)
Definition: cast5.c:51