90 #define BRIEF_PATCH_SIZE 31 91 #define BRIEF_PATCH_SIZE_HALF (BRIEF_PATCH_SIZE / 2) 93 #define MATCHES_CONTIG_SIZE 2000 95 #define ROUNDED_UP_DIV(a, b) ((a + (b - 1)) / b) 314 return (
av_lfg_get(alfg) % (high - low)) + low;
320 return (
double)total_time / (double)num_frames / 1000000.0;
332 double x1 = point_pairs[0].
p.
p1.s[0];
333 double y1 = point_pairs[0].
p.
p1.s[1];
334 double x2 = point_pairs[1].
p.
p1.s[0];
335 double y2 = point_pairs[1].
p.
p1.s[1];
336 double x3 = point_pairs[2].
p.
p1.s[0];
337 double y3 = point_pairs[2].
p.
p1.s[1];
340 double X1 = point_pairs[0].
p.
p2.s[0];
341 double Y1 = point_pairs[0].
p.
p2.s[1];
342 double X2 = point_pairs[1].
p.
p2.s[0];
343 double Y2 = point_pairs[1].
p.
p2.s[1];
344 double X3 = point_pairs[2].
p.
p2.s[0];
345 double Y3 = point_pairs[2].
p.
p2.s[1];
347 double d = 1.0 / ( x1*(y2-y3) + x2*(y3-y1) + x3*(y1-y2) );
349 model[0] = d * ( X1*(y2-y3) + X2*(y3-y1) + X3*(y1-y2) );
350 model[1] = d * ( X1*(x3-x2) + X2*(x1-x3) + X3*(x2-x1) );
351 model[2] = d * ( X1*(x2*y3 - x3*y2) + X2*(x3*y1 - x1*y3) + X3*(x1*y2 - x2*y1) );
353 model[3] = d * ( Y1*(y2-y3) + Y2*(y3-y1) + Y3*(y1-y2) );
354 model[4] = d * ( Y1*(x3-x2) + Y2*(x1-x3) + Y3*(x2-x1) );
355 model[5] = d * ( Y1*(x2*y3 - x3*y2) + Y2*(x3*y1 - x1*y3) + Y3*(x1*y2 - x2*y1) );
363 for (j = 0; j <
i; j++) {
364 double dx1 = points[j]->s[0] - points[
i]->s[0];
365 double dy1 = points[j]->s[1] - points[
i]->s[1];
367 for (k = 0; k < j; k++) {
368 double dx2 = points[k]->s[0] - points[
i]->s[0];
369 double dy2 = points[k]->s[1] - points[
i]->s[1];
374 if (
fabs(dx2*dy1 - dy2*dx1) <= 1.0) {
387 const cl_float2 *prev_points[] = {
388 &pairs_subset[0].
p.
p1,
389 &pairs_subset[1].
p.
p1,
390 &pairs_subset[2].
p.
p1 393 const cl_float2 *curr_points[] = {
394 &pairs_subset[0].
p.
p2,
395 &pairs_subset[1].
p.
p2,
396 &pairs_subset[2].
p.
p2 406 const int num_point_pairs,
411 int i = 0, j, iters = 0;
413 for (; iters < max_attempts; iters++) {
414 for (i = 0; i < 3 && iters < max_attempts;) {
418 idx_i = idx[
i] =
rand_in(0, num_point_pairs, alfg);
420 for (j = 0; j <
i; j++) {
421 if (idx_i == idx[j]) {
431 pairs_subset[
i] = point_pairs[idx[
i]];
441 return i == 3 && iters < max_attempts;
447 const int num_point_pairs,
451 double F0 = model[0],
F1 = model[1],
F2 = model[2];
452 double F3 = model[3], F4 = model[4], F5 = model[5];
454 for (
int i = 0;
i < num_point_pairs;
i++) {
455 const cl_float2 *
f = &point_pairs[
i].
p.
p1;
456 const cl_float2 *t = &point_pairs[
i].
p.
p2;
458 double a = F0*f->s[0] + F1*f->s[1] + F2 - t->s[0];
459 double b = F3*f->s[0] + F4*f->s[1] + F5 - t->s[1];
471 const int num_point_pairs,
476 float t = (float)(thresh * thresh);
477 int i, n = num_point_pairs, num_inliers = 0;
481 for (i = 0; i < n; i++) {
509 confidence = av_clipd(confidence, 0.0, 1.0);
510 num_outliers = av_clipd(num_outliers, 0.0, 1.0);
513 num =
FFMAX(1.0 - confidence, DBL_MIN);
514 denom = 1.0 - pow(1.0 - num_outliers, 3);
515 if (denom < DBL_MIN) {
522 return denom >= 0 || -num >= max_iters * (-denom) ? max_iters : (
int)
round(num / denom);
531 const int num_point_pairs,
533 const double threshold,
535 const double confidence
538 double best_model[6], model[6];
541 int iter, niters =
FFMAX(max_iters, 1);
542 int good_count, max_good_count = 0;
545 if (num_point_pairs < 3) {
547 }
else if (num_point_pairs == 3) {
551 for (
int i = 0;
i < 3; ++
i) {
558 for (iter = 0; iter < niters; ++iter) {
559 int found =
get_subset(&deshake_ctx->
alfg, point_pairs, num_point_pairs, pairs_subset, 10000);
572 if (good_count >
FFMAX(max_good_count, 2)) {
573 for (
int mi = 0;
mi < 6; ++
mi) {
574 best_model[
mi] = model[
mi];
577 for (
int pi = 0; pi < 3; pi++) {
578 best_pairs[pi] = pairs_subset[pi];
581 max_good_count = good_count;
584 (
double)(num_point_pairs - good_count) / num_point_pairs,
590 if (max_good_count > 0) {
591 for (
int mi = 0;
mi < 6; ++
mi) {
592 model_out[
mi] = best_model[
mi];
595 for (
int pi = 0; pi < 3; ++pi) {
614 const int num_inliers,
618 float move_x_val = 0.01;
619 float move_y_val = 0.01;
621 float old_move_x_val = 0;
623 int last_changed = 0;
625 for (
int iters = 0; iters < 200; iters++) {
629 best_pairs[0].
p.
p2.s[0] += move_x_val;
631 best_pairs[0].
p.
p2.s[0] += move_y_val;
637 for (
int j = 0; j < num_inliers; j++) {
641 if (total_err < best_err) {
642 for (
int mi = 0;
mi < 6; ++
mi) {
643 model_out[
mi] = model[
mi];
646 best_err = total_err;
647 last_changed = iters;
651 best_pairs[0].
p.
p2.s[0] -= move_x_val;
653 best_pairs[0].
p.
p2.s[0] -= move_y_val;
656 if (iters - last_changed > 4) {
661 old_move_x_val = move_x_val;
669 if (old_move_x_val < 0) {
687 const int num_inliers,
692 float best_err = FLT_MAX;
693 double best_model[6], model[6];
696 for (
int i = 0;
i < max_iters;
i++) {
698 int found =
get_subset(&deshake_ctx->
alfg, inliers, num_inliers, pairs_subset, 10000);
711 for (
int j = 0; j < num_inliers; j++) {
715 if (total_err < best_err) {
716 for (
int mi = 0;
mi < 6; ++
mi) {
717 best_model[
mi] = model[
mi];
720 for (
int pi = 0; pi < 3; pi++) {
721 best_pairs[pi] = pairs_subset[pi];
724 best_err = total_err;
728 for (
int mi = 0;
mi < 6; ++
mi) {
729 model_out[
mi] = best_model[
mi];
732 for (
int pi = 0; pi < 3; ++pi) {
738 optimize_model(deshake_ctx, best_pairs, inliers, num_inliers, best_err, model_out);
757 double delta = a * d - b *
c;
759 memset(&ret, 0,
sizeof(ret));
765 if (a != 0 || b != 0) {
770 ret.
scale.s[1] = delta /
r;
771 ret.
skew.s[0] = atan((a * c + b * d) / (r * r));
773 }
else if (c != 0 || d != 0) {
774 double s = sqrt(c * c + d * d);
777 ret.
scale.s[0] = delta /
s;
780 ret.
skew.s[1] = atan((a * c + b * d) / (s * s));
794 for (
int i = 0;
i < size_y; ++
i) {
795 for (
int j = 0; j < size_x; ++j) {
814 return 1.0f /
expf(((
float)x * (
float)x) / (2.0
f * sigma * sigma));
822 int window_half = length / 2;
828 gauss_kernel[
i] =
val;
833 gauss_kernel[
i] /= gauss_sum;
860 int clip_start, clip_end, offset_clipped;
878 offset_clipped = av_clip(
887 offset_clipped *
sizeof(
float),
910 float new_large_s = 0, new_small_s = 0, new_best = 0, old, diff_between,
911 percent_of_max, inverted_percent;
913 float large_sigma = 40.0f;
914 float small_sigma = 2.0f;
918 best_sigma = (large_sigma - 0.5f) * deshake_ctx->
smooth_percent + 0.5f;
930 for (
int i = indices.
start, j = 0;
i < indices.
end; ++
i, ++j) {
932 new_large_s += old * gauss_kernel[j];
936 for (
int i = indices.
start, j = 0;
i < indices.
end; ++
i, ++j) {
938 new_small_s += old * gauss_kernel[j];
941 diff_between =
fabsf(new_large_s - new_small_s);
942 percent_of_max = diff_between / max_val;
943 inverted_percent = 1 - percent_of_max;
944 best_sigma = large_sigma *
powf(inverted_percent, 40);
948 for (
int i = indices.
start, j = 0;
i < indices.
end; ++
i, ++j) {
950 new_best += old * gauss_kernel[j];
960 ret.s[0] = x * transform[0] + y * transform[1] + transform[2];
961 ret.s[1] = x * transform[3] + y * transform[4] + transform[5];
978 float center_s_w, center_s_h;
990 center_s_w = center_w - center_s.s[0];
991 center_s_h = center_h - center_s.s[1];
994 x_shift + center_s_w,
995 y_shift + center_s_h,
1011 float new_width, new_height, adjusted_width, adjusted_height, adjusted_x, adjusted_y;
1016 cl_float2 bottom_right =
transformed_point(frame_width, frame_height, transform);
1017 float ar_h = frame_height / frame_width;
1018 float ar_w = frame_width / frame_height;
1054 adjusted_width = new_height * ar_w;
1057 if (adjusted_x >= crop->
top_left.s[0]) {
1060 adjusted_height = new_width * ar_h;
1061 adjusted_y = crop->
bottom_right.s[1] - adjusted_height;
1135 cl_ulong8 zeroed_ulong8;
1137 cl_image_format grayscale_format;
1138 cl_image_desc grayscale_desc;
1139 cl_command_queue_properties queue_props;
1161 const int descriptor_buf_size = image_grid_32 * (
BREIFN / 8);
1162 const int features_buf_size = image_grid_32 *
sizeof(cl_float2);
1176 memset(&zeroed_ulong8, 0,
sizeof(cl_ulong8));
1219 if (!pattern_host) {
1247 for (
int j = 0; j < 2; ++j) {
1252 pattern_host[
i] = pair;
1255 for (
int i = 0;
i < 14;
i++) {
1256 if (ctx->
sw_format == disallowed_formats[
i]) {
1268 ctx->
sw_format = hw_frames_ctx->sw_format;
1275 queue_props = CL_QUEUE_PROFILING_ENABLE;
1298 grayscale_format.image_channel_order = CL_R;
1299 grayscale_format.image_channel_data_type = CL_FLOAT;
1301 grayscale_desc = (cl_image_desc) {
1302 .image_type = CL_MEM_OBJECT_IMAGE2D,
1303 .image_width = outlink->
w,
1304 .image_height = outlink->
h,
1306 .image_array_size = 0,
1307 .image_row_pitch = 0,
1308 .image_slice_pitch = 0,
1309 .num_mip_levels = 0,
1331 CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
1360 "\tframe moved from: %f x, %f y\n" 1361 "\t to: %f x, %f y\n" 1362 "\t rotated from: %f degrees\n" 1363 "\t to: %f degrees\n" 1364 "\t scaled from: %f x, %f y\n" 1365 "\t to: %f x, %f y\n" 1367 "\tframe moved by: %f x, %f y\n" 1368 "\t rotated by: %f degrees\n" 1369 "\t scaled by: %f x, %f y\n",
1372 new_vals[RingbufX], new_vals[RingbufY],
1376 new_vals[RingbufScaleX], new_vals[RingbufScaleY],
1377 old_vals[RingbufX] - new_vals[RingbufX], old_vals[RingbufY] - new_vals[RingbufY],
1379 new_vals[RingbufScaleX] / old_vals[RingbufScaleX], new_vals[RingbufScaleY] / old_vals[RingbufScaleY]
1396 float transform_y[9];
1398 float transform_uv[9];
1400 float transform_crop_y[9];
1402 float transform_crop_uv[9];
1403 float transform_debug_rgb[9];
1404 size_t global_work[2];
1406 cl_mem
src, transformed, dst;
1409 cl_event transform_event, crop_upscale_event;
1411 cl_int num_model_matches;
1413 const float center_w = (float)input_frame->
width / 2;
1414 const float center_h = (
float)input_frame->
height / 2;
1420 const float center_w_chroma = (float)chroma_width / 2;
1421 const float center_h_chroma = (float)chroma_height / 2;
1423 const float luma_w_over_chroma_w = ((float)input_frame->
width / (
float)chroma_width);
1424 const float luma_h_over_chroma_h = ((float)input_frame->
height / (
float)chroma_height);
1514 (old_vals[RingbufX] - new_vals[RingbufX]) / luma_w_over_chroma_w,
1515 (old_vals[RingbufY] - new_vals[RingbufY]) / luma_h_over_chroma_h,
1516 old_vals[RingbufRot] - new_vals[RingbufRot],
1517 new_vals[RingbufScaleX] / old_vals[RingbufScaleX],
1518 new_vals[RingbufScaleY] / old_vals[RingbufScaleY],
1531 if (!cropped_frame) {
1537 if (!transformed_frame) {
1543 transforms[1] = transforms[2] = deshake_ctx->
transform_uv;
1545 for (
int p = 0; p <
FF_ARRAY_ELEMS(transformed_frame->data); p++) {
1547 src = (cl_mem)input_frame->
data[p];
1548 transformed = (cl_mem)transformed_frame->data[p];
1563 { sizeof(cl_mem), &src },
1564 { sizeof(cl_mem), &transformed },
1565 { sizeof(cl_mem), &transforms[p] },
1590 new_vals[RingbufX] - old_vals[RingbufX],
1591 new_vals[RingbufY] - old_vals[RingbufY],
1592 new_vals[RingbufRot] - old_vals[RingbufRot],
1593 old_vals[RingbufScaleX] / new_vals[RingbufScaleX],
1594 old_vals[RingbufScaleY] / new_vals[RingbufScaleY],
1602 transformed = (cl_mem)transformed_frame->data[0];
1609 {
sizeof(cl_mem), &transformed },
1612 {
sizeof(cl_int), &num_model_matches },
1620 (old_vals[RingbufX] - new_vals[RingbufX]) / 5,
1621 (old_vals[RingbufY] - new_vals[RingbufY]) / 5,
1622 (old_vals[RingbufRot] - new_vals[RingbufRot]) / 5,
1623 new_vals[RingbufScaleX] / old_vals[RingbufScaleX],
1624 new_vals[RingbufScaleY] / old_vals[RingbufScaleY],
1632 (old_vals[RingbufX] - new_vals[RingbufX]) / (5 * luma_w_over_chroma_w),
1633 (old_vals[RingbufY] - new_vals[RingbufY]) / (5 * luma_h_over_chroma_h),
1634 (old_vals[RingbufRot] - new_vals[RingbufRot]) / 5,
1635 new_vals[RingbufScaleX] / old_vals[RingbufScaleX],
1636 new_vals[RingbufScaleY] / old_vals[RingbufScaleY],
1643 crops[0] = deshake_ctx->
crop_y;
1644 crops[1] = crops[2] = deshake_ctx->
crop_uv;
1648 dst = (cl_mem)cropped_frame->
data[p];
1649 transformed = (cl_mem)transformed_frame->data[p];
1663 &crop_upscale_event,
1664 { sizeof(cl_mem), &transformed },
1665 { sizeof(cl_mem), &dst },
1666 { sizeof(cl_float2), &crops[p].top_left },
1667 { sizeof(cl_float2), &crops[p].bottom_right },
1755 int num_inliers = 0;
1759 size_t global_work[2];
1760 size_t harris_global_work[2];
1761 size_t grid_32_global_work[2];
1762 int grid_32_h, grid_32_w;
1763 size_t local_work[2];
1767 cl_event grayscale_event, harris_response_event, refine_features_event,
1768 brief_event, match_descriptors_event, read_buf_event;
1789 grid_32_global_work[0] /= 32;
1790 grid_32_global_work[1] /= 32;
1795 if (deshake_ctx->
is_yuv) {
1798 src = (cl_mem)input_frame->
data[0];
1806 {
sizeof(cl_mem), &src },
1807 {
sizeof(cl_mem), &deshake_ctx->
grayscale }
1812 deshake_ctx->command_queue,
1813 deshake_ctx->kernel_harris_response,
1816 &harris_response_event,
1817 { sizeof(cl_mem), &deshake_ctx->grayscale },
1818 { sizeof(cl_mem), &deshake_ctx->harris_buf }
1822 deshake_ctx->command_queue,
1823 deshake_ctx->kernel_refine_features,
1824 grid_32_global_work,
1826 &refine_features_event,
1827 { sizeof(cl_mem), &deshake_ctx->grayscale },
1828 { sizeof(cl_mem), &deshake_ctx->harris_buf },
1829 { sizeof(cl_mem), &deshake_ctx->refined_features },
1830 { sizeof(cl_int), &deshake_ctx->refine_features }
1834 deshake_ctx->command_queue,
1835 deshake_ctx->kernel_brief_descriptors,
1836 grid_32_global_work,
1839 { sizeof(cl_mem), &deshake_ctx->grayscale },
1840 { sizeof(cl_mem), &deshake_ctx->refined_features },
1841 { sizeof(cl_mem), &deshake_ctx->descriptors },
1842 { sizeof(cl_mem), &deshake_ctx->brief_pattern}
1849 goto no_motion_data;
1853 deshake_ctx->command_queue,
1854 deshake_ctx->kernel_match_descriptors,
1855 grid_32_global_work,
1857 &match_descriptors_event,
1858 { sizeof(cl_mem), &deshake_ctx->prev_refined_features },
1859 { sizeof(cl_mem), &deshake_ctx->refined_features },
1860 { sizeof(cl_mem), &deshake_ctx->descriptors },
1861 { sizeof(cl_mem), &deshake_ctx->prev_descriptors },
1862 { sizeof(cl_mem), &deshake_ctx->matches }
1865 cle = clEnqueueReadBuffer(
1866 deshake_ctx->command_queue,
1867 deshake_ctx->matches,
1871 deshake_ctx->matches_host,
1880 if (num_vectors < 10) {
1893 if (deshake_ctx->abs_motion.data_end_offset == -1) {
1894 deshake_ctx->abs_motion.data_end_offset =
1898 goto no_motion_data;
1903 deshake_ctx->matches_contig_host,
1911 goto no_motion_data;
1914 for (
int i = 0;
i < num_vectors;
i++) {
1915 if (deshake_ctx->matches_contig_host[
i].should_consider) {
1916 deshake_ctx->inliers[num_inliers] = deshake_ctx->matches_contig_host[
i];
1923 deshake_ctx->inliers,
1929 goto no_motion_data;
1938 deshake_ctx->abs_motion.ringbuffers[
i],
1940 av_fifo_size(deshake_ctx->abs_motion.ringbuffers[i]) -
sizeof(
float),
1952 if (deshake_ctx->debug_on) {
1953 if (!deshake_ctx->is_yuv) {
1972 for (
int i = 0;
i < num_vectors;
i++) {
1973 deshake_ctx->matches_contig_host[
i].should_consider = 0;
1975 debug_matches.num_model_matches = 0;
1977 if (deshake_ctx->debug_on) {
1979 "\n[ALERT] No motion data found in queue_frame, motion reset to 0\n\n" 1988 temp = deshake_ctx->prev_descriptors;
1989 deshake_ctx->prev_descriptors = deshake_ctx->descriptors;
1990 deshake_ctx->descriptors =
temp;
1993 temp = deshake_ctx->prev_refined_features;
1994 deshake_ctx->prev_refined_features = deshake_ctx->refined_features;
1995 deshake_ctx->refined_features =
temp;
1997 if (deshake_ctx->debug_on) {
1998 if (num_vectors == 0) {
1999 debug_matches.matches =
NULL;
2003 if (!debug_matches.matches) {
2009 for (
int i = 0;
i < num_vectors;
i++) {
2010 debug_matches.matches[
i] = deshake_ctx->matches_contig_host[
i];
2012 debug_matches.num_matches = num_vectors;
2015 deshake_ctx->abs_motion.debug_matches,
2024 deshake_ctx->abs_motion.ringbuffers[
i],
2034 clFinish(deshake_ctx->command_queue);
2050 if (!deshake_ctx->
eof) {
2087 deshake_ctx->
eof = 1;
2091 if (deshake_ctx->
eof) {
2106 "Average kernel execution times:\n" 2107 "\t grayscale: %0.3f ms\n" 2108 "\t harris_response: %0.3f ms\n" 2109 "\t refine_features: %0.3f ms\n" 2110 "\tbrief_descriptors: %0.3f ms\n" 2111 "\tmatch_descriptors: %0.3f ms\n" 2112 "\t transform: %0.3f ms\n" 2113 "\t crop_upscale: %0.3f ms\n" 2114 "Average buffer read times:\n" 2115 "\t features buf: %0.3f ms\n",
2131 if (!deshake_ctx->
eof) {
2156 #define OFFSET(x) offsetof(DeshakeOpenCLContext, x) 2157 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM 2161 "tripod",
"simulates a tripod by preventing any camera movement whatsoever " 2162 "from the original frame",
2166 "debug",
"turn on additional debugging information",
2170 "adaptive_crop",
"attempt to subtly crop borders to reduce mirrored content",
2174 "refine_features",
"refine feature point locations at a sub-pixel level",
2178 "smooth_strength",
"smoothing strength (0 attempts to adaptively determine optimal strength)",
2182 "smooth_window_multiplier",
"multiplier for number of frames to buffer for motion data",
2191 .
name =
"deshake_opencl",
2194 .priv_class = &deshake_opencl_class,
2199 .
inputs = deshake_opencl_inputs,
2200 .
outputs = deshake_opencl_outputs,
Context structure for the Lagged Fibonacci PRNG.
planar GBR 4:4:4:4 40bpp, little-endian
int ff_inlink_consume_frame(AVFilterLink *link, AVFrame **rframe)
Take a frame from the link's FIFO and update the link's stats.
unsigned long long crop_upscale_time
#define FF_FILTER_FLAG_HWFRAME_AWARE
The filter is aware of hardware frames, and any hardware frame context should not be automatically pr...
cl_kernel kernel_brief_descriptors
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
This structure describes decoded (raw) audio or video data.
AbsoluteFrameMotion abs_motion
int ff_opencl_filter_work_size_from_image(AVFilterContext *avctx, size_t *work_size, AVFrame *frame, int plane, int block_alignment)
Find the work size needed needed for a given plane of an image.
float smooth_window_multiplier
int ff_opencl_filter_config_input(AVFilterLink *inlink)
Check that the input link contains a suitable hardware frames context and extract the device from it...
#define CL_CREATE_BUFFER(ctx, buffer_name, size)
Create a buffer with the given information.
int ff_opencl_filter_query_formats(AVFilterContext *avctx)
Return that all inputs and outputs support only AV_PIX_FMT_OPENCL.
Main libavfilter public API header.
Memory handling functions.
static av_cold int init(AVCodecContext *avctx)
Queue of AVFrame pointers.
int h
agreed upon image height
#define CL_RELEASE_MEMORY(m)
release an OpenCL Memory Object
static void free_debug_matches(AbsoluteFrameMotion *afm)
static void optimize_model(DeshakeOpenCLContext *deshake_ctx, MotionVector *best_pairs, MotionVector *inliers, const int num_inliers, float best_err, double *model_out)
static FrameDelta decompose_transform(double *model)
static int deshake_opencl_init(AVFilterContext *avctx)
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
static int minimize_error(DeshakeOpenCLContext *deshake_ctx, MotionVector *inliers, DebugMatches *debug_matches, const int num_inliers, double *model_out, const int max_iters)
#define MATCHES_CONTIG_SIZE
cl_ulong ff_opencl_get_event_time(cl_event event)
Gets the command start and end times for the given event and returns the difference (the time that th...
AVFrame * ff_get_video_buffer(AVFilterLink *link, int w, int h)
Request a picture buffer with a specific set of permissions.
int ff_opencl_filter_init(AVFilterContext *avctx)
Initialise an OpenCL filter context.
cl_kernel kernel_grayscale
const char * ff_opencl_source_deshake
uint8_t log2_chroma_w
Amount to shift the luma width right to find the chroma width.
static void ff_outlink_set_status(AVFilterLink *link, int status, int64_t pts)
Set the status field of a link from the source filter.
AVFifoBuffer * debug_matches
int av_fifo_generic_write(AVFifoBuffer *f, void *src, int size, int(*func)(void *, void *, int))
Feed data from a user-supplied callback to an AVFifoBuffer.
AVOpenCLDeviceContext * hwctx
AVBufferRef * hw_frames_ctx
For hwaccel-format frames, this should be a reference to the AVHWFramesContext describing the frame...
unsigned long long refine_features_time
const char * name
Pad name.
cl_command_queue command_queue
AVFilterLink ** inputs
array of pointers to input links
#define av_assert0(cond)
assert() equivalent, that is always enabled.
planar GBRA 4:4:4:4 64bpp, big-endian
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
static av_cold int uninit(AVCodecContext *avctx)
static av_cold void deshake_opencl_uninit(AVFilterContext *avctx)
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
static int points_not_collinear(const cl_float2 **points)
static av_cold int end(AVCodecContext *avctx)
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
cl_device_id device_id
The primary device ID of the device.
int64_t pts
Presentation timestamp in time_base units (time when frame should be shown to user).
int av_fifo_space(const AVFifoBuffer *f)
Return the amount of space in bytes in the AVFifoBuffer, that is the amount of data you can write int...
#define BRIEF_PATCH_SIZE_HALF
static void update_needed_crop(CropInfo *crop, float *transform, float frame_width, float frame_height)
planar GBR 4:4:4 48bpp, big-endian
static double av_q2d(AVRational a)
Convert an AVRational to a double.
#define AVERROR_EOF
End of file.
#define AV_LOG_VERBOSE
Detailed information.
int ff_opencl_filter_config_output(AVFilterLink *outlink)
Create a suitable hardware frames context for the output.
static __device__ float fabsf(float a)
int ff_framequeue_add(FFFrameQueue *fq, AVFrame *frame)
Add a frame.
#define FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink)
Forward the status on an output link to an input link.
static const AVFilterPad deshake_opencl_outputs[]
AVFilter ff_vf_deshake_opencl
A filter pad used for either input or output.
int64_t av_rescale_q(int64_t a, AVRational bq, AVRational cq)
Rescale a 64-bit integer by 2 rational numbers.
A link between two filters.
#define CL_RELEASE_QUEUE(q)
release an OpenCL Command Queue
int ff_inlink_acknowledge_status(AVFilterLink *link, int *rstatus, int64_t *rpts)
Test and acknowledge the change of status on the link.
planar GBR 4:4:4 27bpp, big-endian
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
uint8_t log2_chroma_h
Amount to shift the luma height right to find the chroma height.
AVRational frame_rate
Frame rate of the stream on the link, or 1/0 if unknown or variable; if left to 0/0, will be automatically copied from the first input of the source filter if it exists.
static __device__ float fabs(float a)
#define AV_PIX_FMT_FLAG_RGB
The pixel format contains RGB-like data (as opposed to YUV/grayscale).
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
void * priv
private data for use by the filter
int av_fifo_generic_read(AVFifoBuffer *f, void *dest, int buf_size, void(*func)(void *, void *, int))
Feed data from an AVFifoBuffer to a user-supplied callback.
AVRational time_base
Define the time base used by the PTS of the frames/samples which will pass through this link...
unsigned long long brief_descriptors_time
simple assert() macros that are a bit more flexible than ISO C assert().
static av_always_inline av_const double round(double x)
static cl_float2 transformed_point(float x, float y, float *transform)
#define CL_CREATE_KERNEL(ctx, kernel_name)
Create a kernel with the given name.
int w
agreed upon image width
void ff_framequeue_free(FFFrameQueue *fq)
Free the queue and all queued frames.
AVFifoBuffer * ringbuffers[RingbufCount]
#define ROUNDED_UP_DIV(a, b)
planar GBR 4:4:4:4 48bpp, big-endian
static av_const double hypot(double x, double y)
planar GBR 4:4:4:4 40bpp, big-endian
AVBufferRef * hw_frames_ctx
For hwaccel pixel formats, this should be a reference to the AVHWFramesContext describing the frames...
static int rand_in(int low, int high, AVLFG *alfg)
MotionVector model_matches[3]
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return values
MotionVector * matches_contig_host
static void transform_center_scale(float x_shift, float y_shift, float angle, float scale_x, float scale_y, float center_w, float center_h, float *matrix)
AVFrame * ff_framequeue_take(FFFrameQueue *fq)
Take the first frame in the queue.
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return or at least make progress towards producing a frame
static double averaged_event_time_ms(unsigned long long total_time, int num_frames)
static IterIndices start_end_for(DeshakeOpenCLContext *deshake_ctx, int length)
static void transform_debug(AVFilterContext *avctx, float *new_vals, float *old_vals, int curr_frame)
static int filter_frame(AVFilterLink *link, AVFrame *input_frame)
static const AVFilterPad outputs[]
#define FF_ARRAY_ELEMS(a)
static void make_gauss_kernel(float *gauss_kernel, float length, float sigma)
planar GBR 4:4:4:4 48bpp, little-endian
static const int8_t transform[32][32]
static int queue_frame(AVFilterLink *link, AVFrame *input_frame)
static int activate(AVFilterContext *ctx)
cl_kernel kernel_harris_response
int64_t pkt_duration
duration of the corresponding packet, expressed in AVStream->time_base units, 0 if unknown...
static void compute_error(const MotionVector *point_pairs, const int num_point_pairs, const double *model, float *err)
int av_fifo_size(const AVFifoBuffer *f)
Return the amount of data in bytes in the AVFifoBuffer, that is the amount of data you can read from ...
int av_fifo_generic_peek_at(AVFifoBuffer *f, void *dest, int offset, int buf_size, void(*func)(void *, void *, int))
Feed data at specific position from an AVFifoBuffer to a user-supplied callback.
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
planar GBR 4:4:4 30bpp, big-endian
MotionVector * matches_host
uint8_t * data
The data buffer.
static float smooth(DeshakeOpenCLContext *deshake_ctx, float *gauss_kernel, int length, float max_val, AVFifoBuffer *values)
a very simple circular buffer FIFO implementation
static unsigned int av_lfg_get(AVLFG *c)
Get the next random unsigned 32-bit number using an ALFG.
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several inputs
static int ransac_update_num_iters(double confidence, double num_outliers, int max_iters)
#define CL_CREATE_BUFFER_FLAGS(ctx, buffer_name, flags, size, host_ptr)
Create a buffer with the given information.
they must not be accessed directly The fifo field contains the frames that are queued in the input for processing by the filter The status_in and status_out fields contains the queued status(EOF or error) of the link
This struct describes a set or pool of "hardware" frames (i.e.
const char * name
Filter name.
av_cold void av_lfg_init(AVLFG *c, unsigned int seed)
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a link
cl_kernel kernel_refine_features
static int get_subset(AVLFG *alfg, const MotionVector *point_pairs, const int num_point_pairs, MotionVector *pairs_subset, int max_attempts)
#define CL_RUN_KERNEL_WITH_ARGS(queue, kernel, global_work_size, local_work_size, event,...)
Uses the above macro to enqueue the given kernel and then additionally runs it to completion via clFi...
AVFilterLink ** outputs
array of pointers to output links
static float gaussian_for(int x, float sigma)
#define CL_FAIL_ON_ERROR(errcode,...)
A helper macro to handle OpenCL errors.
static void ringbuf_float_at(DeshakeOpenCLContext *deshake_ctx, AVFifoBuffer *values, float *val, int offset)
AVFifoBuffer * av_fifo_alloc_array(size_t nmemb, size_t size)
Initialize an AVFifoBuffer.
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
unsigned long long grayscale_time
cl_mem debug_model_matches
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
static av_always_inline AVRational av_inv_q(AVRational q)
Invert a rational.
static const AVOption deshake_opencl_options[]
static int query_formats(AVFilterContext *ctx)
static size_t ff_framequeue_queued_frames(const FFFrameQueue *fq)
Get the number of queued frames.
common internal and external API header
cl_mem prev_refined_features
planar GBRA 4:4:4:4 32bpp
planar GBR 4:4:4 27bpp, little-endian
void ff_opencl_filter_uninit(AVFilterContext *avctx)
Uninitialise an OpenCL filter context.
unsigned long long match_descriptors_time
#define CL_RELEASE_KERNEL(k)
release an OpenCL Kernel
unsigned long long transform_time
#define CL_BLOCKING_WRITE_BUFFER(queue, buffer, size, host_ptr, event)
Perform a blocking write to a buffer.
static int make_vectors_contig(DeshakeOpenCLContext *deshake_ctx, int size_y, int size_x)
FF_FILTER_FORWARD_WANTED(outlink, inlink)
unsigned long long harris_response_time
cl_kernel kernel_transform
static int check_subset(const MotionVector *pairs_subset)
AVFilterContext * dst
dest filter
cl_context context
The OpenCL context which will contain all operations and frames on this device.
void ff_framequeue_global_init(FFFrameQueueGlobal *fqg)
Init a global structure.
cl_kernel kernel_match_descriptors
and forward the result(frame or status change) to the corresponding input.If nothing is possible
static int estimate_affine_2d(DeshakeOpenCLContext *deshake_ctx, MotionVector *point_pairs, DebugMatches *debug_matches, const int num_point_pairs, double *model_out, const double threshold, const int max_iters, const double confidence)
static void run_estimate_kernel(const MotionVector *point_pairs, double *model)
AVFILTER_DEFINE_CLASS(deshake_opencl)
static const AVFilterPad deshake_opencl_inputs[]
cl_kernel kernel_draw_debug_info
planar GBR 4:4:4 48bpp, little-endian
void av_fifo_freep(AVFifoBuffer **f)
Free an AVFifoBuffer and reset pointer to NULL.
#define av_malloc_array(a, b)
static const struct @80 transforms[18]
int ff_opencl_filter_load_program(AVFilterContext *avctx, const char **program_source_array, int nb_strings)
Load a new OpenCL program from strings in memory.
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later.That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another.Frame references ownership and permissions
void ff_framequeue_init(FFFrameQueue *fq, FFFrameQueueGlobal *fqg)
Init a frame queue and attach it to a global structure.
planar GBRA 4:4:4:4 64bpp, little-endian
void av_fifo_drain(AVFifoBuffer *f, int size)
Discard data from the FIFO.
Structure to hold global options and statistics for frame queues.
AVPixelFormat
Pixel format.
static double val(void *priv, double ch)
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
planar GBR 4:4:4 30bpp, little-endian
static int find_inliers(MotionVector *point_pairs, const int num_point_pairs, const double *model, float *err, double thresh)
#define AV_CEIL_RSHIFT(a, b)
cl_kernel kernel_crop_upscale
unsigned long long read_buf_time