89 #define BRIEF_PATCH_SIZE 31
90 #define BRIEF_PATCH_SIZE_HALF (BRIEF_PATCH_SIZE / 2)
92 #define MATCHES_CONTIG_SIZE 2000
94 #define ROUNDED_UP_DIV(a, b) ((a + (b - 1)) / b)
305 return (
av_lfg_get(alfg) % (high - low)) + low;
311 return (
double)total_time / (
double)num_frames / 1000000.0;
323 double x1 = point_pairs[0].
p.
p1.s[0];
324 double y1 = point_pairs[0].
p.
p1.s[1];
325 double x2 = point_pairs[1].
p.
p1.s[0];
326 double y2 = point_pairs[1].
p.
p1.s[1];
327 double x3 = point_pairs[2].
p.
p1.s[0];
328 double y3 = point_pairs[2].
p.
p1.s[1];
331 double X1 = point_pairs[0].
p.
p2.s[0];
332 double Y1 = point_pairs[0].
p.
p2.s[1];
333 double X2 = point_pairs[1].
p.
p2.s[0];
334 double Y2 = point_pairs[1].
p.
p2.s[1];
335 double X3 = point_pairs[2].
p.
p2.s[0];
336 double Y3 = point_pairs[2].
p.
p2.s[1];
338 double d = 1.0 / ( x1*(y2-y3) + x2*(y3-y1) + x3*(y1-y2) );
340 model[0] =
d * ( X1*(y2-y3) + X2*(y3-y1) + X3*(y1-y2) );
341 model[1] =
d * ( X1*(x3-x2) + X2*(x1-x3) + X3*(x2-x1) );
342 model[2] =
d * ( X1*(x2*y3 - x3*y2) + X2*(x3*y1 - x1*y3) + X3*(x1*y2 - x2*y1) );
344 model[3] =
d * ( Y1*(y2-y3) + Y2*(y3-y1) + Y3*(y1-y2) );
345 model[4] =
d * ( Y1*(x3-x2) + Y2*(x1-x3) + Y3*(x2-x1) );
346 model[5] =
d * ( Y1*(x2*y3 - x3*y2) + Y2*(x3*y1 - x1*y3) + Y3*(x1*y2 - x2*y1) );
354 for (j = 0; j <
i; j++) {
355 double dx1 = points[j]->s[0] - points[
i]->s[0];
356 double dy1 = points[j]->s[1] - points[
i]->s[1];
358 for (k = 0; k < j; k++) {
359 double dx2 = points[k]->s[0] - points[
i]->s[0];
360 double dy2 = points[k]->s[1] - points[
i]->s[1];
365 if (
fabs(dx2*dy1 - dy2*dx1) <= 1.0) {
378 const cl_float2 *prev_points[] = {
379 &pairs_subset[0].
p.
p1,
380 &pairs_subset[1].
p.
p1,
381 &pairs_subset[2].
p.
p1
384 const cl_float2 *curr_points[] = {
385 &pairs_subset[0].
p.
p2,
386 &pairs_subset[1].
p.
p2,
387 &pairs_subset[2].
p.
p2
397 const int num_point_pairs,
402 int i = 0, j, iters = 0;
404 for (; iters < max_attempts; iters++) {
405 for (
i = 0;
i < 3 && iters < max_attempts;) {
409 idx_i = idx[
i] =
rand_in(0, num_point_pairs, alfg);
411 for (j = 0; j <
i; j++) {
412 if (idx_i == idx[j]) {
422 pairs_subset[
i] = point_pairs[idx[
i]];
432 return i == 3 && iters < max_attempts;
438 const int num_point_pairs,
442 double F0 = model[0],
F1 = model[1],
F2 = model[2];
443 double F3 = model[3], F4 = model[4], F5 = model[5];
445 for (
int i = 0;
i < num_point_pairs;
i++) {
446 const cl_float2 *
f = &point_pairs[
i].
p.
p1;
447 const cl_float2 *t = &point_pairs[
i].
p.
p2;
449 double a = F0*
f->s[0] +
F1*
f->s[1] +
F2 - t->s[0];
450 double b =
F3*
f->s[0] + F4*
f->s[1] + F5 - t->s[1];
462 const int num_point_pairs,
467 float t = (
float)(thresh * thresh);
468 int i, n = num_point_pairs, num_inliers = 0;
472 for (
i = 0;
i < n;
i++) {
500 confidence =
av_clipd(confidence, 0.0, 1.0);
501 num_outliers =
av_clipd(num_outliers, 0.0, 1.0);
504 num =
FFMAX(1.0 - confidence, DBL_MIN);
505 denom = 1.0 - pow(1.0 - num_outliers, 3);
506 if (denom < DBL_MIN) {
513 return denom >= 0 || -num >= max_iters * (-denom) ? max_iters : (
int)
round(num / denom);
522 const int num_point_pairs,
524 const double threshold,
526 const double confidence
529 double best_model[6], model[6];
532 int iter, niters =
FFMAX(max_iters, 1);
533 int good_count, max_good_count = 0;
536 if (num_point_pairs < 3) {
538 }
else if (num_point_pairs == 3) {
542 for (
int i = 0;
i < 3; ++
i) {
549 for (iter = 0; iter < niters; ++iter) {
550 int found =
get_subset(&deshake_ctx->
alfg, point_pairs, num_point_pairs, pairs_subset, 10000);
563 if (good_count >
FFMAX(max_good_count, 2)) {
564 for (
int mi = 0;
mi < 6; ++
mi) {
565 best_model[
mi] = model[
mi];
568 for (
int pi = 0; pi < 3; pi++) {
569 best_pairs[pi] = pairs_subset[pi];
572 max_good_count = good_count;
575 (
double)(num_point_pairs - good_count) / num_point_pairs,
581 if (max_good_count > 0) {
582 for (
int mi = 0;
mi < 6; ++
mi) {
583 model_out[
mi] = best_model[
mi];
586 for (
int pi = 0; pi < 3; ++pi) {
605 const int num_inliers,
609 float move_x_val = 0.01;
610 float move_y_val = 0.01;
612 float old_move_x_val = 0;
614 int last_changed = 0;
616 for (
int iters = 0; iters < 200; iters++) {
620 best_pairs[0].
p.
p2.s[0] += move_x_val;
622 best_pairs[0].
p.
p2.s[0] += move_y_val;
628 for (
int j = 0; j < num_inliers; j++) {
632 if (total_err < best_err) {
633 for (
int mi = 0;
mi < 6; ++
mi) {
634 model_out[
mi] = model[
mi];
637 best_err = total_err;
638 last_changed = iters;
642 best_pairs[0].
p.
p2.s[0] -= move_x_val;
644 best_pairs[0].
p.
p2.s[0] -= move_y_val;
647 if (iters - last_changed > 4) {
652 old_move_x_val = move_x_val;
660 if (old_move_x_val < 0) {
678 const int num_inliers,
683 float best_err = FLT_MAX;
684 double best_model[6], model[6];
687 for (
int i = 0;
i < max_iters;
i++) {
689 int found =
get_subset(&deshake_ctx->
alfg, inliers, num_inliers, pairs_subset, 10000);
702 for (
int j = 0; j < num_inliers; j++) {
706 if (
i == 0 || total_err < best_err) {
707 for (
int mi = 0;
mi < 6; ++
mi) {
708 best_model[
mi] = model[
mi];
711 for (
int pi = 0; pi < 3; pi++) {
712 best_pairs[pi] = pairs_subset[pi];
715 best_err = total_err;
719 for (
int mi = 0;
mi < 6; ++
mi) {
720 model_out[
mi] = best_model[
mi];
723 for (
int pi = 0; pi < 3; ++pi) {
729 optimize_model(deshake_ctx, best_pairs, inliers, num_inliers, best_err, model_out);
750 memset(&
ret, 0,
sizeof(
ret));
752 ret.translation.s[0] = e;
753 ret.translation.s[1] =
f;
756 if (
a != 0 ||
b != 0) {
762 ret.skew.s[0] = atan((
a *
c +
b *
d) / (
r *
r));
764 }
else if (
c != 0 ||
d != 0) {
765 double s = sqrt(
c *
c +
d *
d);
771 ret.skew.s[1] = atan((
a *
c +
b *
d) / (
s *
s));
785 for (
int i = 0;
i < size_y; ++
i) {
786 for (
int j = 0; j < size_x; ++j) {
805 return 1.0f /
expf(((
float)x * (
float)x) / (2.0
f * sigma * sigma));
813 int window_half = length / 2;
815 for (
int i = 0;
i < length; ++
i) {
819 gauss_kernel[
i] =
val;
823 for (
int i = 0;
i < length; ++
i) {
824 gauss_kernel[
i] /= gauss_sum;
851 int clip_start, clip_end, offset_clipped;
895 float new_large_s = 0, new_small_s = 0, new_best = 0, old, diff_between,
896 percent_of_max, inverted_percent;
898 float large_sigma = 40.0f;
899 float small_sigma = 2.0f;
903 best_sigma = (large_sigma - 0.5f) * deshake_ctx->
smooth_percent + 0.5f;
915 for (
int i = indices.
start, j = 0;
i < indices.
end; ++
i, ++j) {
917 new_large_s += old * gauss_kernel[j];
921 for (
int i = indices.
start, j = 0;
i < indices.
end; ++
i, ++j) {
923 new_small_s += old * gauss_kernel[j];
926 diff_between =
fabsf(new_large_s - new_small_s);
927 percent_of_max = diff_between / max_val;
928 inverted_percent = 1 - percent_of_max;
929 best_sigma = large_sigma *
powf(inverted_percent, 40);
933 for (
int i = indices.
start, j = 0;
i < indices.
end; ++
i, ++j) {
935 new_best += old * gauss_kernel[j];
963 float center_s_w, center_s_h;
975 center_s_w = center_w - center_s.s[0];
976 center_s_h = center_h - center_s.s[1];
979 x_shift + center_s_w,
980 y_shift + center_s_h,
996 float new_width, new_height, adjusted_width, adjusted_height, adjusted_x, adjusted_y;
1002 float ar_h = frame_height / frame_width;
1003 float ar_w = frame_width / frame_height;
1039 adjusted_width = new_height * ar_w;
1042 if (adjusted_x >= crop->
top_left.s[0]) {
1045 adjusted_height = new_width * ar_h;
1046 adjusted_y = crop->
bottom_right.s[1] - adjusted_height;
1062 if (
ctx->gauss_kernel)
1065 if (
ctx->ransac_err)
1068 if (
ctx->matches_host)
1071 if (
ctx->matches_contig_host)
1102 if (
ctx->debug_on) {
1120 cl_ulong8 zeroed_ulong8;
1122 cl_image_format grayscale_format;
1123 cl_image_desc grayscale_desc;
1124 cl_command_queue_properties queue_props;
1146 const int descriptor_buf_size = image_grid_32 * (
BREIFN / 8);
1147 const int features_buf_size = image_grid_32 *
sizeof(cl_float2);
1159 ctx->curr_frame = 0;
1161 memset(&zeroed_ulong8, 0,
sizeof(cl_ulong8));
1164 if (!
ctx->gauss_kernel) {
1170 if (!
ctx->ransac_err) {
1179 if (!
ctx->abs_motion.ringbuffers[
i]) {
1185 if (
ctx->debug_on) {
1187 ctx->smooth_window / 2,
1191 if (!
ctx->abs_motion.debug_matches) {
1197 ctx->abs_motion.curr_frame_offset = 0;
1198 ctx->abs_motion.data_start_offset = -1;
1199 ctx->abs_motion.data_end_offset = -1;
1202 if (!pattern_host) {
1208 if (!
ctx->matches_host) {
1214 if (!
ctx->matches_contig_host) {
1220 if (!
ctx->inliers) {
1230 for (
int j = 0; j < 2; ++j) {
1235 pattern_host[
i] = pair;
1238 for (
int i = 0;
i < 14;
i++) {
1239 if (
ctx->sw_format == disallowed_formats[
i]) {
1251 ctx->sw_format = hw_frames_ctx->sw_format;
1257 if (
ctx->debug_on) {
1258 queue_props = CL_QUEUE_PROFILING_ENABLE;
1262 ctx->command_queue = clCreateCommandQueue(
1263 ctx->ocf.hwctx->context,
1264 ctx->ocf.hwctx->device_id,
1281 grayscale_format.image_channel_order = CL_R;
1282 grayscale_format.image_channel_data_type = CL_FLOAT;
1284 grayscale_desc = (cl_image_desc) {
1285 .image_type = CL_MEM_OBJECT_IMAGE2D,
1286 .image_width = outlink->
w,
1287 .image_height = outlink->
h,
1289 .image_array_size = 0,
1290 .image_row_pitch = 0,
1291 .image_slice_pitch = 0,
1292 .num_mip_levels = 0,
1297 ctx->grayscale = clCreateImage(
1298 ctx->ocf.hwctx->context,
1314 CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
1324 if (
ctx->debug_on) {
1329 ctx->initialized = 1;
1343 "\tframe moved from: %f x, %f y\n"
1344 "\t to: %f x, %f y\n"
1345 "\t rotated from: %f degrees\n"
1346 "\t to: %f degrees\n"
1347 "\t scaled from: %f x, %f y\n"
1348 "\t to: %f x, %f y\n"
1350 "\tframe moved by: %f x, %f y\n"
1351 "\t rotated by: %f degrees\n"
1352 "\t scaled by: %f x, %f y\n",
1379 float transform_y[9];
1381 float transform_uv[9];
1383 float transform_crop_y[9];
1385 float transform_crop_uv[9];
1386 float transform_debug_rgb[9];
1387 size_t global_work[2];
1389 cl_mem
src, transformed, dst;
1392 cl_event transform_event, crop_upscale_event;
1394 cl_int num_model_matches;
1396 const float center_w = (
float)input_frame->
width / 2;
1397 const float center_h = (
float)input_frame->
height / 2;
1403 const float center_w_chroma = (
float)chroma_width / 2;
1404 const float center_h_chroma = (
float)chroma_height / 2;
1406 const float luma_w_over_chroma_w = ((
float)input_frame->
width / (
float)chroma_width);
1407 const float luma_h_over_chroma_h = ((
float)input_frame->
height / (
float)chroma_height);
1507 if (!cropped_frame) {
1513 if (!transformed_frame) {
1521 for (
int p = 0; p <
FF_ARRAY_ELEMS(transformed_frame->data); p++) {
1523 src = (cl_mem)input_frame->
data[p];
1524 transformed = (cl_mem)transformed_frame->data[p];
1539 { sizeof(cl_mem), &src },
1540 { sizeof(cl_mem), &transformed },
1541 { sizeof(cl_mem), &transforms[p] },
1578 transformed = (cl_mem)transformed_frame->data[0];
1585 {
sizeof(cl_mem), &transformed },
1588 {
sizeof(cl_int), &num_model_matches },
1619 crops[0] = deshake_ctx->
crop_y;
1620 crops[1] = crops[2] = deshake_ctx->
crop_uv;
1624 dst = (cl_mem)cropped_frame->
data[p];
1625 transformed = (cl_mem)transformed_frame->data[p];
1639 &crop_upscale_event,
1640 { sizeof(cl_mem), &transformed },
1641 { sizeof(cl_mem), &dst },
1642 { sizeof(cl_float2), &crops[p].top_left },
1643 { sizeof(cl_float2), &crops[p].bottom_right },
1731 int num_inliers = 0;
1735 size_t global_work[2];
1736 size_t harris_global_work[2];
1737 size_t grid_32_global_work[2];
1738 int grid_32_h, grid_32_w;
1739 size_t local_work[2];
1743 cl_event grayscale_event, harris_response_event, refine_features_event,
1744 brief_event, match_descriptors_event, read_buf_event;
1765 grid_32_global_work[0] /= 32;
1766 grid_32_global_work[1] /= 32;
1771 if (deshake_ctx->
is_yuv) {
1774 src = (cl_mem)input_frame->
data[0];
1782 {
sizeof(cl_mem), &
src },
1783 {
sizeof(cl_mem), &deshake_ctx->
grayscale }
1788 deshake_ctx->command_queue,
1789 deshake_ctx->kernel_harris_response,
1792 &harris_response_event,
1793 { sizeof(cl_mem), &deshake_ctx->grayscale },
1794 { sizeof(cl_mem), &deshake_ctx->harris_buf }
1798 deshake_ctx->command_queue,
1799 deshake_ctx->kernel_refine_features,
1800 grid_32_global_work,
1802 &refine_features_event,
1803 { sizeof(cl_mem), &deshake_ctx->grayscale },
1804 { sizeof(cl_mem), &deshake_ctx->harris_buf },
1805 { sizeof(cl_mem), &deshake_ctx->refined_features },
1806 { sizeof(cl_int), &deshake_ctx->refine_features }
1810 deshake_ctx->command_queue,
1811 deshake_ctx->kernel_brief_descriptors,
1812 grid_32_global_work,
1815 { sizeof(cl_mem), &deshake_ctx->grayscale },
1816 { sizeof(cl_mem), &deshake_ctx->refined_features },
1817 { sizeof(cl_mem), &deshake_ctx->descriptors },
1818 { sizeof(cl_mem), &deshake_ctx->brief_pattern}
1825 goto no_motion_data;
1829 deshake_ctx->command_queue,
1830 deshake_ctx->kernel_match_descriptors,
1831 grid_32_global_work,
1833 &match_descriptors_event,
1834 { sizeof(cl_mem), &deshake_ctx->prev_refined_features },
1835 { sizeof(cl_mem), &deshake_ctx->refined_features },
1836 { sizeof(cl_mem), &deshake_ctx->descriptors },
1837 { sizeof(cl_mem), &deshake_ctx->prev_descriptors },
1838 { sizeof(cl_mem), &deshake_ctx->matches }
1841 cle = clEnqueueReadBuffer(
1842 deshake_ctx->command_queue,
1843 deshake_ctx->matches,
1847 deshake_ctx->matches_host,
1856 if (num_vectors < 10) {
1869 if (deshake_ctx->abs_motion.data_end_offset == -1) {
1870 deshake_ctx->abs_motion.data_end_offset =
1874 goto no_motion_data;
1879 deshake_ctx->matches_contig_host,
1887 goto no_motion_data;
1890 for (
int i = 0;
i < num_vectors;
i++) {
1891 if (deshake_ctx->matches_contig_host[
i].should_consider) {
1892 deshake_ctx->inliers[num_inliers] = deshake_ctx->matches_contig_host[
i];
1899 deshake_ctx->inliers,
1905 goto no_motion_data;
1914 deshake_ctx->abs_motion.ringbuffers[
i],
1925 if (deshake_ctx->debug_on) {
1926 if (!deshake_ctx->is_yuv) {
1945 for (
int i = 0;
i < num_vectors;
i++) {
1946 deshake_ctx->matches_contig_host[
i].should_consider = 0;
1948 debug_matches.num_model_matches = 0;
1950 if (deshake_ctx->debug_on) {
1952 "\n[ALERT] No motion data found in queue_frame, motion reset to 0\n\n"
1961 temp = deshake_ctx->prev_descriptors;
1962 deshake_ctx->prev_descriptors = deshake_ctx->descriptors;
1963 deshake_ctx->descriptors =
temp;
1966 temp = deshake_ctx->prev_refined_features;
1967 deshake_ctx->prev_refined_features = deshake_ctx->refined_features;
1968 deshake_ctx->refined_features =
temp;
1970 if (deshake_ctx->debug_on) {
1971 if (num_vectors == 0) {
1972 debug_matches.matches =
NULL;
1976 if (!debug_matches.matches) {
1982 for (
int i = 0;
i < num_vectors;
i++) {
1983 debug_matches.matches[
i] = deshake_ctx->matches_contig_host[
i];
1985 debug_matches.num_matches = num_vectors;
1988 deshake_ctx->abs_motion.debug_matches,
1993 av_fifo_write(deshake_ctx->abs_motion.ringbuffers[
i], &new_vals[
i], 1);
1999 clFinish(deshake_ctx->command_queue);
2015 if (!deshake_ctx->
eof) {
2052 deshake_ctx->
eof = 1;
2056 if (deshake_ctx->
eof) {
2071 "Average kernel execution times:\n"
2072 "\t grayscale: %0.3f ms\n"
2073 "\t harris_response: %0.3f ms\n"
2074 "\t refine_features: %0.3f ms\n"
2075 "\tbrief_descriptors: %0.3f ms\n"
2076 "\tmatch_descriptors: %0.3f ms\n"
2077 "\t transform: %0.3f ms\n"
2078 "\t crop_upscale: %0.3f ms\n"
2079 "Average buffer read times:\n"
2080 "\t features buf: %0.3f ms\n",
2096 if (!deshake_ctx->
eof) {
2119 #define OFFSET(x) offsetof(DeshakeOpenCLContext, x)
2120 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
2124 "tripod",
"simulates a tripod by preventing any camera movement whatsoever "
2125 "from the original frame",
2129 "debug",
"turn on additional debugging information",
2133 "adaptive_crop",
"attempt to subtly crop borders to reduce mirrored content",
2137 "refine_features",
"refine feature point locations at a sub-pixel level",
2141 "smooth_strength",
"smoothing strength (0 attempts to adaptively determine optimal strength)",
2145 "smooth_window_multiplier",
"multiplier for number of frames to buffer for motion data",
2154 .
name =
"deshake_opencl",
2157 .priv_class = &deshake_opencl_class,