89 #define BRIEF_PATCH_SIZE 31
90 #define BRIEF_PATCH_SIZE_HALF (BRIEF_PATCH_SIZE / 2)
92 #define MATCHES_CONTIG_SIZE 2000
94 #define ROUNDED_UP_DIV(a, b) ((a + (b - 1)) / b)
305 return (
av_lfg_get(alfg) % (high - low)) + low;
311 return (
double)total_time / (
double)num_frames / 1000000.0;
323 double x1 = point_pairs[0].
p.
p1.s[0];
324 double y1 = point_pairs[0].
p.
p1.s[1];
325 double x2 = point_pairs[1].
p.
p1.s[0];
326 double y2 = point_pairs[1].
p.
p1.s[1];
327 double x3 = point_pairs[2].
p.
p1.s[0];
328 double y3 = point_pairs[2].
p.
p1.s[1];
331 double X1 = point_pairs[0].
p.
p2.s[0];
332 double Y1 = point_pairs[0].
p.
p2.s[1];
333 double X2 = point_pairs[1].
p.
p2.s[0];
334 double Y2 = point_pairs[1].
p.
p2.s[1];
335 double X3 = point_pairs[2].
p.
p2.s[0];
336 double Y3 = point_pairs[2].
p.
p2.s[1];
338 double d = 1.0 / ( x1*(y2-y3) + x2*(y3-y1) + x3*(y1-y2) );
340 model[0] =
d * ( X1*(y2-y3) + X2*(y3-y1) + X3*(y1-y2) );
341 model[1] =
d * ( X1*(x3-x2) + X2*(x1-x3) + X3*(x2-x1) );
342 model[2] =
d * ( X1*(x2*y3 - x3*y2) + X2*(x3*y1 - x1*y3) + X3*(x1*y2 - x2*y1) );
344 model[3] =
d * ( Y1*(y2-y3) + Y2*(y3-y1) + Y3*(y1-y2) );
345 model[4] =
d * ( Y1*(x3-x2) + Y2*(x1-x3) + Y3*(x2-x1) );
346 model[5] =
d * ( Y1*(x2*y3 - x3*y2) + Y2*(x3*y1 - x1*y3) + Y3*(x1*y2 - x2*y1) );
354 for (j = 0; j <
i; j++) {
355 double dx1 = points[j]->s[0] - points[
i]->s[0];
356 double dy1 = points[j]->s[1] - points[
i]->s[1];
358 for (k = 0; k < j; k++) {
359 double dx2 = points[k]->s[0] - points[
i]->s[0];
360 double dy2 = points[k]->s[1] - points[
i]->s[1];
365 if (
fabs(dx2*dy1 - dy2*dx1) <= 1.0) {
378 const cl_float2 *prev_points[] = {
379 &pairs_subset[0].
p.
p1,
380 &pairs_subset[1].
p.
p1,
381 &pairs_subset[2].
p.
p1
384 const cl_float2 *curr_points[] = {
385 &pairs_subset[0].
p.
p2,
386 &pairs_subset[1].
p.
p2,
387 &pairs_subset[2].
p.
p2
397 const int num_point_pairs,
402 int i = 0, j, iters = 0;
404 for (; iters < max_attempts; iters++) {
405 for (
i = 0;
i < 3 && iters < max_attempts;) {
409 idx_i = idx[
i] =
rand_in(0, num_point_pairs, alfg);
411 for (j = 0; j <
i; j++) {
412 if (idx_i == idx[j]) {
422 pairs_subset[
i] = point_pairs[idx[
i]];
432 return i == 3 && iters < max_attempts;
438 const int num_point_pairs,
442 double F0 = model[0],
F1 = model[1],
F2 = model[2];
443 double F3 = model[3], F4 = model[4], F5 = model[5];
445 for (
int i = 0;
i < num_point_pairs;
i++) {
446 const cl_float2 *
f = &point_pairs[
i].
p.
p1;
447 const cl_float2 *t = &point_pairs[
i].
p.
p2;
449 double a = F0*
f->s[0] +
F1*
f->s[1] +
F2 - t->s[0];
450 double b =
F3*
f->s[0] + F4*
f->s[1] + F5 - t->s[1];
462 const int num_point_pairs,
467 float t = (
float)(thresh * thresh);
468 int i, n = num_point_pairs, num_inliers = 0;
472 for (
i = 0;
i < n;
i++) {
500 confidence =
av_clipd(confidence, 0.0, 1.0);
501 num_outliers =
av_clipd(num_outliers, 0.0, 1.0);
504 num =
FFMAX(1.0 - confidence, DBL_MIN);
505 denom = 1.0 - pow(1.0 - num_outliers, 3);
506 if (denom < DBL_MIN) {
513 return denom >= 0 || -num >= max_iters * (-denom) ? max_iters : (
int)
round(num / denom);
522 const int num_point_pairs,
524 const double threshold,
526 const double confidence
529 double best_model[6], model[6];
532 int iter, niters =
FFMAX(max_iters, 1);
533 int good_count, max_good_count = 0;
536 if (num_point_pairs < 3) {
538 }
else if (num_point_pairs == 3) {
542 for (
int i = 0;
i < 3; ++
i) {
549 for (iter = 0; iter < niters; ++iter) {
550 int found =
get_subset(&deshake_ctx->
alfg, point_pairs, num_point_pairs, pairs_subset, 10000);
563 if (good_count >
FFMAX(max_good_count, 2)) {
564 for (
int mi = 0;
mi < 6; ++
mi) {
565 best_model[
mi] = model[
mi];
568 for (
int pi = 0; pi < 3; pi++) {
569 best_pairs[pi] = pairs_subset[pi];
572 max_good_count = good_count;
575 (
double)(num_point_pairs - good_count) / num_point_pairs,
581 if (max_good_count > 0) {
582 for (
int mi = 0;
mi < 6; ++
mi) {
583 model_out[
mi] = best_model[
mi];
586 for (
int pi = 0; pi < 3; ++pi) {
605 const int num_inliers,
609 float move_x_val = 0.01;
610 float move_y_val = 0.01;
612 float old_move_x_val = 0;
614 int last_changed = 0;
616 for (
int iters = 0; iters < 200; iters++) {
620 best_pairs[0].
p.
p2.s[0] += move_x_val;
622 best_pairs[0].
p.
p2.s[0] += move_y_val;
628 for (
int j = 0; j < num_inliers; j++) {
632 if (total_err < best_err) {
633 for (
int mi = 0;
mi < 6; ++
mi) {
634 model_out[
mi] = model[
mi];
637 best_err = total_err;
638 last_changed = iters;
642 best_pairs[0].
p.
p2.s[0] -= move_x_val;
644 best_pairs[0].
p.
p2.s[0] -= move_y_val;
647 if (iters - last_changed > 4) {
652 old_move_x_val = move_x_val;
660 if (old_move_x_val < 0) {
678 const int num_inliers,
683 float best_err = FLT_MAX;
684 double best_model[6], model[6];
687 for (
int i = 0;
i < max_iters;
i++) {
689 int found =
get_subset(&deshake_ctx->
alfg, inliers, num_inliers, pairs_subset, 10000);
702 for (
int j = 0; j < num_inliers; j++) {
706 if (total_err < best_err) {
707 for (
int mi = 0;
mi < 6; ++
mi) {
708 best_model[
mi] = model[
mi];
711 for (
int pi = 0; pi < 3; pi++) {
712 best_pairs[pi] = pairs_subset[pi];
715 best_err = total_err;
719 for (
int mi = 0;
mi < 6; ++
mi) {
720 model_out[
mi] = best_model[
mi];
723 for (
int pi = 0; pi < 3; ++pi) {
729 optimize_model(deshake_ctx, best_pairs, inliers, num_inliers, best_err, model_out);
750 memset(&
ret, 0,
sizeof(
ret));
752 ret.translation.s[0] = e;
753 ret.translation.s[1] =
f;
756 if (
a != 0 ||
b != 0) {
762 ret.skew.s[0] = atan((
a *
c +
b *
d) / (
r *
r));
764 }
else if (
c != 0 ||
d != 0) {
765 double s = sqrt(
c *
c +
d *
d);
771 ret.skew.s[1] = atan((
a *
c +
b *
d) / (
s *
s));
785 for (
int i = 0;
i < size_y; ++
i) {
786 for (
int j = 0; j < size_x; ++j) {
805 return 1.0f /
expf(((
float)x * (
float)x) / (2.0
f * sigma * sigma));
813 int window_half = length / 2;
815 for (
int i = 0;
i < length; ++
i) {
819 gauss_kernel[
i] =
val;
823 for (
int i = 0;
i < length; ++
i) {
824 gauss_kernel[
i] /= gauss_sum;
851 int clip_start, clip_end, offset_clipped;
895 float new_large_s = 0, new_small_s = 0, new_best = 0, old, diff_between,
896 percent_of_max, inverted_percent;
898 float large_sigma = 40.0f;
899 float small_sigma = 2.0f;
903 best_sigma = (large_sigma - 0.5f) * deshake_ctx->
smooth_percent + 0.5f;
915 for (
int i = indices.
start, j = 0;
i < indices.
end; ++
i, ++j) {
917 new_large_s += old * gauss_kernel[j];
921 for (
int i = indices.
start, j = 0;
i < indices.
end; ++
i, ++j) {
923 new_small_s += old * gauss_kernel[j];
926 diff_between =
fabsf(new_large_s - new_small_s);
927 percent_of_max = diff_between / max_val;
928 inverted_percent = 1 - percent_of_max;
929 best_sigma = large_sigma *
powf(inverted_percent, 40);
933 for (
int i = indices.
start, j = 0;
i < indices.
end; ++
i, ++j) {
935 new_best += old * gauss_kernel[j];
963 float center_s_w, center_s_h;
975 center_s_w = center_w - center_s.s[0];
976 center_s_h = center_h - center_s.s[1];
979 x_shift + center_s_w,
980 y_shift + center_s_h,
996 float new_width, new_height, adjusted_width, adjusted_height, adjusted_x, adjusted_y;
1002 float ar_h = frame_height / frame_width;
1003 float ar_w = frame_width / frame_height;
1039 adjusted_width = new_height * ar_w;
1042 if (adjusted_x >= crop->
top_left.s[0]) {
1045 adjusted_height = new_width * ar_h;
1046 adjusted_y = crop->
bottom_right.s[1] - adjusted_height;
1062 if (
ctx->gauss_kernel)
1065 if (
ctx->ransac_err)
1068 if (
ctx->matches_host)
1071 if (
ctx->matches_contig_host)
1102 if (
ctx->debug_on) {
1120 cl_ulong8 zeroed_ulong8;
1122 cl_image_format grayscale_format;
1123 cl_image_desc grayscale_desc;
1124 cl_command_queue_properties queue_props;
1146 const int descriptor_buf_size = image_grid_32 * (
BREIFN / 8);
1147 const int features_buf_size = image_grid_32 *
sizeof(cl_float2);
1159 ctx->curr_frame = 0;
1161 memset(&zeroed_ulong8, 0,
sizeof(cl_ulong8));
1164 if (!
ctx->gauss_kernel) {
1170 if (!
ctx->ransac_err) {
1179 if (!
ctx->abs_motion.ringbuffers[
i]) {
1185 if (
ctx->debug_on) {
1187 ctx->smooth_window / 2,
1191 if (!
ctx->abs_motion.debug_matches) {
1197 ctx->abs_motion.curr_frame_offset = 0;
1198 ctx->abs_motion.data_start_offset = -1;
1199 ctx->abs_motion.data_end_offset = -1;
1202 if (!pattern_host) {
1208 if (!
ctx->matches_host) {
1214 if (!
ctx->matches_contig_host) {
1220 if (!
ctx->inliers) {
1230 for (
int j = 0; j < 2; ++j) {
1235 pattern_host[
i] = pair;
1238 for (
int i = 0;
i < 14;
i++) {
1239 if (
ctx->sw_format == disallowed_formats[
i]) {
1251 ctx->sw_format = hw_frames_ctx->sw_format;
1257 if (
ctx->debug_on) {
1258 queue_props = CL_QUEUE_PROFILING_ENABLE;
1262 ctx->command_queue = clCreateCommandQueue(
1263 ctx->ocf.hwctx->context,
1264 ctx->ocf.hwctx->device_id,
1281 grayscale_format.image_channel_order = CL_R;
1282 grayscale_format.image_channel_data_type = CL_FLOAT;
1284 grayscale_desc = (cl_image_desc) {
1285 .image_type = CL_MEM_OBJECT_IMAGE2D,
1286 .image_width = outlink->
w,
1287 .image_height = outlink->
h,
1289 .image_array_size = 0,
1290 .image_row_pitch = 0,
1291 .image_slice_pitch = 0,
1292 .num_mip_levels = 0,
1297 ctx->grayscale = clCreateImage(
1298 ctx->ocf.hwctx->context,
1314 CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
1324 if (
ctx->debug_on) {
1329 ctx->initialized = 1;
1343 "\tframe moved from: %f x, %f y\n"
1344 "\t to: %f x, %f y\n"
1345 "\t rotated from: %f degrees\n"
1346 "\t to: %f degrees\n"
1347 "\t scaled from: %f x, %f y\n"
1348 "\t to: %f x, %f y\n"
1350 "\tframe moved by: %f x, %f y\n"
1351 "\t rotated by: %f degrees\n"
1352 "\t scaled by: %f x, %f y\n",
1379 float transform_y[9];
1381 float transform_uv[9];
1383 float transform_crop_y[9];
1385 float transform_crop_uv[9];
1386 float transform_debug_rgb[9];
1387 size_t global_work[2];
1389 cl_mem
src, transformed, dst;
1392 cl_event transform_event, crop_upscale_event;
1394 cl_int num_model_matches;
1396 const float center_w = (
float)input_frame->
width / 2;
1397 const float center_h = (
float)input_frame->
height / 2;
1403 const float center_w_chroma = (
float)chroma_width / 2;
1404 const float center_h_chroma = (
float)chroma_height / 2;
1406 const float luma_w_over_chroma_w = ((
float)input_frame->
width / (
float)chroma_width);
1407 const float luma_h_over_chroma_h = ((
float)input_frame->
height / (
float)chroma_height);
1415 #if FF_API_PKT_DURATION
1514 if (!cropped_frame) {
1520 if (!transformed_frame) {
1528 for (
int p = 0; p <
FF_ARRAY_ELEMS(transformed_frame->data); p++) {
1530 src = (cl_mem)input_frame->
data[p];
1531 transformed = (cl_mem)transformed_frame->data[p];
1546 { sizeof(cl_mem), &src },
1547 { sizeof(cl_mem), &transformed },
1548 { sizeof(cl_mem), &transforms[p] },
1585 transformed = (cl_mem)transformed_frame->data[0];
1592 {
sizeof(cl_mem), &transformed },
1595 {
sizeof(cl_int), &num_model_matches },
1626 crops[0] = deshake_ctx->
crop_y;
1627 crops[1] = crops[2] = deshake_ctx->
crop_uv;
1631 dst = (cl_mem)cropped_frame->
data[p];
1632 transformed = (cl_mem)transformed_frame->data[p];
1646 &crop_upscale_event,
1647 { sizeof(cl_mem), &transformed },
1648 { sizeof(cl_mem), &dst },
1649 { sizeof(cl_float2), &crops[p].top_left },
1650 { sizeof(cl_float2), &crops[p].bottom_right },
1738 int num_inliers = 0;
1742 size_t global_work[2];
1743 size_t harris_global_work[2];
1744 size_t grid_32_global_work[2];
1745 int grid_32_h, grid_32_w;
1746 size_t local_work[2];
1750 cl_event grayscale_event, harris_response_event, refine_features_event,
1751 brief_event, match_descriptors_event, read_buf_event;
1772 grid_32_global_work[0] /= 32;
1773 grid_32_global_work[1] /= 32;
1778 if (deshake_ctx->
is_yuv) {
1781 src = (cl_mem)input_frame->
data[0];
1789 {
sizeof(cl_mem), &
src },
1790 {
sizeof(cl_mem), &deshake_ctx->
grayscale }
1795 deshake_ctx->command_queue,
1796 deshake_ctx->kernel_harris_response,
1799 &harris_response_event,
1800 { sizeof(cl_mem), &deshake_ctx->grayscale },
1801 { sizeof(cl_mem), &deshake_ctx->harris_buf }
1805 deshake_ctx->command_queue,
1806 deshake_ctx->kernel_refine_features,
1807 grid_32_global_work,
1809 &refine_features_event,
1810 { sizeof(cl_mem), &deshake_ctx->grayscale },
1811 { sizeof(cl_mem), &deshake_ctx->harris_buf },
1812 { sizeof(cl_mem), &deshake_ctx->refined_features },
1813 { sizeof(cl_int), &deshake_ctx->refine_features }
1817 deshake_ctx->command_queue,
1818 deshake_ctx->kernel_brief_descriptors,
1819 grid_32_global_work,
1822 { sizeof(cl_mem), &deshake_ctx->grayscale },
1823 { sizeof(cl_mem), &deshake_ctx->refined_features },
1824 { sizeof(cl_mem), &deshake_ctx->descriptors },
1825 { sizeof(cl_mem), &deshake_ctx->brief_pattern}
1832 goto no_motion_data;
1836 deshake_ctx->command_queue,
1837 deshake_ctx->kernel_match_descriptors,
1838 grid_32_global_work,
1840 &match_descriptors_event,
1841 { sizeof(cl_mem), &deshake_ctx->prev_refined_features },
1842 { sizeof(cl_mem), &deshake_ctx->refined_features },
1843 { sizeof(cl_mem), &deshake_ctx->descriptors },
1844 { sizeof(cl_mem), &deshake_ctx->prev_descriptors },
1845 { sizeof(cl_mem), &deshake_ctx->matches }
1848 cle = clEnqueueReadBuffer(
1849 deshake_ctx->command_queue,
1850 deshake_ctx->matches,
1854 deshake_ctx->matches_host,
1863 if (num_vectors < 10) {
1876 if (deshake_ctx->abs_motion.data_end_offset == -1) {
1877 deshake_ctx->abs_motion.data_end_offset =
1881 goto no_motion_data;
1886 deshake_ctx->matches_contig_host,
1894 goto no_motion_data;
1897 for (
int i = 0;
i < num_vectors;
i++) {
1898 if (deshake_ctx->matches_contig_host[
i].should_consider) {
1899 deshake_ctx->inliers[num_inliers] = deshake_ctx->matches_contig_host[
i];
1906 deshake_ctx->inliers,
1912 goto no_motion_data;
1921 deshake_ctx->abs_motion.ringbuffers[
i],
1932 if (deshake_ctx->debug_on) {
1933 if (!deshake_ctx->is_yuv) {
1952 for (
int i = 0;
i < num_vectors;
i++) {
1953 deshake_ctx->matches_contig_host[
i].should_consider = 0;
1955 debug_matches.num_model_matches = 0;
1957 if (deshake_ctx->debug_on) {
1959 "\n[ALERT] No motion data found in queue_frame, motion reset to 0\n\n"
1968 temp = deshake_ctx->prev_descriptors;
1969 deshake_ctx->prev_descriptors = deshake_ctx->descriptors;
1970 deshake_ctx->descriptors =
temp;
1973 temp = deshake_ctx->prev_refined_features;
1974 deshake_ctx->prev_refined_features = deshake_ctx->refined_features;
1975 deshake_ctx->refined_features =
temp;
1977 if (deshake_ctx->debug_on) {
1978 if (num_vectors == 0) {
1979 debug_matches.matches =
NULL;
1983 if (!debug_matches.matches) {
1989 for (
int i = 0;
i < num_vectors;
i++) {
1990 debug_matches.matches[
i] = deshake_ctx->matches_contig_host[
i];
1992 debug_matches.num_matches = num_vectors;
1995 deshake_ctx->abs_motion.debug_matches,
2000 av_fifo_write(deshake_ctx->abs_motion.ringbuffers[
i], &new_vals[
i], 1);
2006 clFinish(deshake_ctx->command_queue);
2022 if (!deshake_ctx->
eof) {
2059 deshake_ctx->
eof = 1;
2063 if (deshake_ctx->
eof) {
2078 "Average kernel execution times:\n"
2079 "\t grayscale: %0.3f ms\n"
2080 "\t harris_response: %0.3f ms\n"
2081 "\t refine_features: %0.3f ms\n"
2082 "\tbrief_descriptors: %0.3f ms\n"
2083 "\tmatch_descriptors: %0.3f ms\n"
2084 "\t transform: %0.3f ms\n"
2085 "\t crop_upscale: %0.3f ms\n"
2086 "Average buffer read times:\n"
2087 "\t features buf: %0.3f ms\n",
2103 if (!deshake_ctx->
eof) {
2126 #define OFFSET(x) offsetof(DeshakeOpenCLContext, x)
2127 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
2131 "tripod",
"simulates a tripod by preventing any camera movement whatsoever "
2132 "from the original frame",
2136 "debug",
"turn on additional debugging information",
2140 "adaptive_crop",
"attempt to subtly crop borders to reduce mirrored content",
2144 "refine_features",
"refine feature point locations at a sub-pixel level",
2148 "smooth_strength",
"smoothing strength (0 attempts to adaptively determine optimal strength)",
2152 "smooth_window_multiplier",
"multiplier for number of frames to buffer for motion data",
2161 .
name =
"deshake_opencl",
2164 .priv_class = &deshake_opencl_class,