90 #define BRIEF_PATCH_SIZE 31
91 #define BRIEF_PATCH_SIZE_HALF (BRIEF_PATCH_SIZE / 2)
93 #define MATCHES_CONTIG_SIZE 2000
95 #define ROUNDED_UP_DIV(a, b) ((a + (b - 1)) / b)
306 return (
av_lfg_get(alfg) % (high - low)) + low;
312 return (
double)total_time / (
double)num_frames / 1000000.0;
324 double x1 = point_pairs[0].
p.
p1.s[0];
325 double y1 = point_pairs[0].
p.
p1.s[1];
326 double x2 = point_pairs[1].
p.
p1.s[0];
327 double y2 = point_pairs[1].
p.
p1.s[1];
328 double x3 = point_pairs[2].
p.
p1.s[0];
329 double y3 = point_pairs[2].
p.
p1.s[1];
332 double X1 = point_pairs[0].
p.
p2.s[0];
333 double Y1 = point_pairs[0].
p.
p2.s[1];
334 double X2 = point_pairs[1].
p.
p2.s[0];
335 double Y2 = point_pairs[1].
p.
p2.s[1];
336 double X3 = point_pairs[2].
p.
p2.s[0];
337 double Y3 = point_pairs[2].
p.
p2.s[1];
339 double d = 1.0 / ( x1*(y2-y3) + x2*(y3-y1) + x3*(y1-y2) );
341 model[0] =
d * ( X1*(y2-y3) + X2*(y3-y1) + X3*(y1-y2) );
342 model[1] =
d * ( X1*(x3-x2) + X2*(x1-x3) + X3*(x2-x1) );
343 model[2] =
d * ( X1*(x2*y3 - x3*y2) + X2*(x3*y1 - x1*y3) + X3*(x1*y2 - x2*y1) );
345 model[3] =
d * ( Y1*(y2-y3) + Y2*(y3-y1) + Y3*(y1-y2) );
346 model[4] =
d * ( Y1*(x3-x2) + Y2*(x1-x3) + Y3*(x2-x1) );
347 model[5] =
d * ( Y1*(x2*y3 - x3*y2) + Y2*(x3*y1 - x1*y3) + Y3*(x1*y2 - x2*y1) );
355 for (j = 0; j <
i; j++) {
356 double dx1 = points[j]->s[0] - points[
i]->s[0];
357 double dy1 = points[j]->s[1] - points[
i]->s[1];
359 for (k = 0; k < j; k++) {
360 double dx2 = points[k]->s[0] - points[
i]->s[0];
361 double dy2 = points[k]->s[1] - points[
i]->s[1];
366 if (
fabs(dx2*dy1 - dy2*dx1) <= 1.0) {
379 const cl_float2 *prev_points[] = {
380 &pairs_subset[0].
p.
p1,
381 &pairs_subset[1].
p.
p1,
382 &pairs_subset[2].
p.
p1
385 const cl_float2 *curr_points[] = {
386 &pairs_subset[0].
p.
p2,
387 &pairs_subset[1].
p.
p2,
388 &pairs_subset[2].
p.
p2
398 const int num_point_pairs,
403 int i = 0, j, iters = 0;
405 for (; iters < max_attempts; iters++) {
406 for (
i = 0;
i < 3 && iters < max_attempts;) {
410 idx_i = idx[
i] =
rand_in(0, num_point_pairs, alfg);
412 for (j = 0; j <
i; j++) {
413 if (idx_i == idx[j]) {
423 pairs_subset[
i] = point_pairs[idx[
i]];
433 return i == 3 && iters < max_attempts;
439 const int num_point_pairs,
443 double F0 = model[0],
F1 = model[1],
F2 = model[2];
444 double F3 = model[3], F4 = model[4], F5 = model[5];
446 for (
int i = 0;
i < num_point_pairs;
i++) {
447 const cl_float2 *
f = &point_pairs[
i].
p.
p1;
448 const cl_float2 *t = &point_pairs[
i].
p.
p2;
450 double a = F0*
f->s[0] +
F1*
f->s[1] +
F2 - t->s[0];
451 double b =
F3*
f->s[0] + F4*
f->s[1] + F5 - t->s[1];
463 const int num_point_pairs,
468 float t = (
float)(thresh * thresh);
469 int i, n = num_point_pairs, num_inliers = 0;
473 for (
i = 0;
i < n;
i++) {
501 confidence =
av_clipd(confidence, 0.0, 1.0);
502 num_outliers =
av_clipd(num_outliers, 0.0, 1.0);
505 num =
FFMAX(1.0 - confidence, DBL_MIN);
506 denom = 1.0 - pow(1.0 - num_outliers, 3);
507 if (denom < DBL_MIN) {
514 return denom >= 0 || -num >= max_iters * (-denom) ? max_iters : (
int)
round(num / denom);
523 const int num_point_pairs,
525 const double threshold,
527 const double confidence
530 double best_model[6], model[6];
533 int iter, niters =
FFMAX(max_iters, 1);
534 int good_count, max_good_count = 0;
537 if (num_point_pairs < 3) {
539 }
else if (num_point_pairs == 3) {
543 for (
int i = 0;
i < 3; ++
i) {
550 for (iter = 0; iter < niters; ++iter) {
551 int found =
get_subset(&deshake_ctx->
alfg, point_pairs, num_point_pairs, pairs_subset, 10000);
564 if (good_count >
FFMAX(max_good_count, 2)) {
565 for (
int mi = 0;
mi < 6; ++
mi) {
566 best_model[
mi] = model[
mi];
569 for (
int pi = 0; pi < 3; pi++) {
570 best_pairs[pi] = pairs_subset[pi];
573 max_good_count = good_count;
576 (
double)(num_point_pairs - good_count) / num_point_pairs,
582 if (max_good_count > 0) {
583 for (
int mi = 0;
mi < 6; ++
mi) {
584 model_out[
mi] = best_model[
mi];
587 for (
int pi = 0; pi < 3; ++pi) {
606 const int num_inliers,
610 float move_x_val = 0.01;
611 float move_y_val = 0.01;
613 float old_move_x_val = 0;
615 int last_changed = 0;
617 for (
int iters = 0; iters < 200; iters++) {
621 best_pairs[0].
p.
p2.s[0] += move_x_val;
623 best_pairs[0].
p.
p2.s[0] += move_y_val;
629 for (
int j = 0; j < num_inliers; j++) {
633 if (total_err < best_err) {
634 for (
int mi = 0;
mi < 6; ++
mi) {
635 model_out[
mi] = model[
mi];
638 best_err = total_err;
639 last_changed = iters;
643 best_pairs[0].
p.
p2.s[0] -= move_x_val;
645 best_pairs[0].
p.
p2.s[0] -= move_y_val;
648 if (iters - last_changed > 4) {
653 old_move_x_val = move_x_val;
661 if (old_move_x_val < 0) {
679 const int num_inliers,
684 float best_err = FLT_MAX;
685 double best_model[6], model[6];
688 for (
int i = 0;
i < max_iters;
i++) {
690 int found =
get_subset(&deshake_ctx->
alfg, inliers, num_inliers, pairs_subset, 10000);
703 for (
int j = 0; j < num_inliers; j++) {
707 if (total_err < best_err) {
708 for (
int mi = 0;
mi < 6; ++
mi) {
709 best_model[
mi] = model[
mi];
712 for (
int pi = 0; pi < 3; pi++) {
713 best_pairs[pi] = pairs_subset[pi];
716 best_err = total_err;
720 for (
int mi = 0;
mi < 6; ++
mi) {
721 model_out[
mi] = best_model[
mi];
724 for (
int pi = 0; pi < 3; ++pi) {
730 optimize_model(deshake_ctx, best_pairs, inliers, num_inliers, best_err, model_out);
751 memset(&
ret, 0,
sizeof(
ret));
753 ret.translation.s[0] = e;
754 ret.translation.s[1] =
f;
757 if (
a != 0 ||
b != 0) {
763 ret.skew.s[0] = atan((
a *
c +
b *
d) / (
r *
r));
765 }
else if (
c != 0 ||
d != 0) {
766 double s = sqrt(
c *
c +
d *
d);
772 ret.skew.s[1] = atan((
a *
c +
b *
d) / (
s *
s));
786 for (
int i = 0;
i < size_y; ++
i) {
787 for (
int j = 0; j < size_x; ++j) {
806 return 1.0f /
expf(((
float)x * (
float)x) / (2.0
f * sigma * sigma));
814 int window_half = length / 2;
816 for (
int i = 0;
i < length; ++
i) {
820 gauss_kernel[
i] =
val;
824 for (
int i = 0;
i < length; ++
i) {
825 gauss_kernel[
i] /= gauss_sum;
852 int clip_start, clip_end, offset_clipped;
896 float new_large_s = 0, new_small_s = 0, new_best = 0, old, diff_between,
897 percent_of_max, inverted_percent;
899 float large_sigma = 40.0f;
900 float small_sigma = 2.0f;
904 best_sigma = (large_sigma - 0.5f) * deshake_ctx->
smooth_percent + 0.5f;
916 for (
int i = indices.
start, j = 0;
i < indices.
end; ++
i, ++j) {
918 new_large_s += old * gauss_kernel[j];
922 for (
int i = indices.
start, j = 0;
i < indices.
end; ++
i, ++j) {
924 new_small_s += old * gauss_kernel[j];
927 diff_between =
fabsf(new_large_s - new_small_s);
928 percent_of_max = diff_between / max_val;
929 inverted_percent = 1 - percent_of_max;
930 best_sigma = large_sigma *
powf(inverted_percent, 40);
934 for (
int i = indices.
start, j = 0;
i < indices.
end; ++
i, ++j) {
936 new_best += old * gauss_kernel[j];
964 float center_s_w, center_s_h;
976 center_s_w = center_w - center_s.s[0];
977 center_s_h = center_h - center_s.s[1];
980 x_shift + center_s_w,
981 y_shift + center_s_h,
997 float new_width, new_height, adjusted_width, adjusted_height, adjusted_x, adjusted_y;
1003 float ar_h = frame_height / frame_width;
1004 float ar_w = frame_width / frame_height;
1040 adjusted_width = new_height * ar_w;
1043 if (adjusted_x >= crop->
top_left.s[0]) {
1046 adjusted_height = new_width * ar_h;
1047 adjusted_y = crop->
bottom_right.s[1] - adjusted_height;
1063 if (
ctx->gauss_kernel)
1066 if (
ctx->ransac_err)
1069 if (
ctx->matches_host)
1072 if (
ctx->matches_contig_host)
1103 if (
ctx->debug_on) {
1121 cl_ulong8 zeroed_ulong8;
1123 cl_image_format grayscale_format;
1124 cl_image_desc grayscale_desc;
1125 cl_command_queue_properties queue_props;
1147 const int descriptor_buf_size = image_grid_32 * (
BREIFN / 8);
1148 const int features_buf_size = image_grid_32 *
sizeof(cl_float2);
1160 ctx->curr_frame = 0;
1162 memset(&zeroed_ulong8, 0,
sizeof(cl_ulong8));
1165 if (!
ctx->gauss_kernel) {
1171 if (!
ctx->ransac_err) {
1180 if (!
ctx->abs_motion.ringbuffers[
i]) {
1186 if (
ctx->debug_on) {
1188 ctx->smooth_window / 2,
1192 if (!
ctx->abs_motion.debug_matches) {
1198 ctx->abs_motion.curr_frame_offset = 0;
1199 ctx->abs_motion.data_start_offset = -1;
1200 ctx->abs_motion.data_end_offset = -1;
1203 if (!pattern_host) {
1209 if (!
ctx->matches_host) {
1215 if (!
ctx->matches_contig_host) {
1221 if (!
ctx->inliers) {
1231 for (
int j = 0; j < 2; ++j) {
1236 pattern_host[
i] = pair;
1239 for (
int i = 0;
i < 14;
i++) {
1240 if (
ctx->sw_format == disallowed_formats[
i]) {
1252 ctx->sw_format = hw_frames_ctx->sw_format;
1258 if (
ctx->debug_on) {
1259 queue_props = CL_QUEUE_PROFILING_ENABLE;
1263 ctx->command_queue = clCreateCommandQueue(
1264 ctx->ocf.hwctx->context,
1265 ctx->ocf.hwctx->device_id,
1282 grayscale_format.image_channel_order = CL_R;
1283 grayscale_format.image_channel_data_type = CL_FLOAT;
1285 grayscale_desc = (cl_image_desc) {
1286 .image_type = CL_MEM_OBJECT_IMAGE2D,
1287 .image_width = outlink->
w,
1288 .image_height = outlink->
h,
1290 .image_array_size = 0,
1291 .image_row_pitch = 0,
1292 .image_slice_pitch = 0,
1293 .num_mip_levels = 0,
1298 ctx->grayscale = clCreateImage(
1299 ctx->ocf.hwctx->context,
1315 CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
1325 if (
ctx->debug_on) {
1330 ctx->initialized = 1;
1344 "\tframe moved from: %f x, %f y\n"
1345 "\t to: %f x, %f y\n"
1346 "\t rotated from: %f degrees\n"
1347 "\t to: %f degrees\n"
1348 "\t scaled from: %f x, %f y\n"
1349 "\t to: %f x, %f y\n"
1351 "\tframe moved by: %f x, %f y\n"
1352 "\t rotated by: %f degrees\n"
1353 "\t scaled by: %f x, %f y\n",
1380 float transform_y[9];
1382 float transform_uv[9];
1384 float transform_crop_y[9];
1386 float transform_crop_uv[9];
1387 float transform_debug_rgb[9];
1388 size_t global_work[2];
1390 cl_mem
src, transformed, dst;
1393 cl_event transform_event, crop_upscale_event;
1395 cl_int num_model_matches;
1397 const float center_w = (
float)input_frame->
width / 2;
1398 const float center_h = (
float)input_frame->
height / 2;
1404 const float center_w_chroma = (
float)chroma_width / 2;
1405 const float center_h_chroma = (
float)chroma_height / 2;
1407 const float luma_w_over_chroma_w = ((
float)input_frame->
width / (
float)chroma_width);
1408 const float luma_h_over_chroma_h = ((
float)input_frame->
height / (
float)chroma_height);
1508 if (!cropped_frame) {
1514 if (!transformed_frame) {
1522 for (
int p = 0; p <
FF_ARRAY_ELEMS(transformed_frame->data); p++) {
1524 src = (cl_mem)input_frame->
data[p];
1525 transformed = (cl_mem)transformed_frame->data[p];
1540 { sizeof(cl_mem), &src },
1541 { sizeof(cl_mem), &transformed },
1542 { sizeof(cl_mem), &transforms[p] },
1579 transformed = (cl_mem)transformed_frame->data[0];
1586 {
sizeof(cl_mem), &transformed },
1589 {
sizeof(cl_int), &num_model_matches },
1620 crops[0] = deshake_ctx->
crop_y;
1621 crops[1] = crops[2] = deshake_ctx->
crop_uv;
1625 dst = (cl_mem)cropped_frame->
data[p];
1626 transformed = (cl_mem)transformed_frame->data[p];
1640 &crop_upscale_event,
1641 { sizeof(cl_mem), &transformed },
1642 { sizeof(cl_mem), &dst },
1643 { sizeof(cl_float2), &crops[p].top_left },
1644 { sizeof(cl_float2), &crops[p].bottom_right },
1732 int num_inliers = 0;
1736 size_t global_work[2];
1737 size_t harris_global_work[2];
1738 size_t grid_32_global_work[2];
1739 int grid_32_h, grid_32_w;
1740 size_t local_work[2];
1744 cl_event grayscale_event, harris_response_event, refine_features_event,
1745 brief_event, match_descriptors_event, read_buf_event;
1766 grid_32_global_work[0] /= 32;
1767 grid_32_global_work[1] /= 32;
1772 if (deshake_ctx->
is_yuv) {
1775 src = (cl_mem)input_frame->
data[0];
1783 {
sizeof(cl_mem), &
src },
1784 {
sizeof(cl_mem), &deshake_ctx->
grayscale }
1789 deshake_ctx->command_queue,
1790 deshake_ctx->kernel_harris_response,
1793 &harris_response_event,
1794 { sizeof(cl_mem), &deshake_ctx->grayscale },
1795 { sizeof(cl_mem), &deshake_ctx->harris_buf }
1799 deshake_ctx->command_queue,
1800 deshake_ctx->kernel_refine_features,
1801 grid_32_global_work,
1803 &refine_features_event,
1804 { sizeof(cl_mem), &deshake_ctx->grayscale },
1805 { sizeof(cl_mem), &deshake_ctx->harris_buf },
1806 { sizeof(cl_mem), &deshake_ctx->refined_features },
1807 { sizeof(cl_int), &deshake_ctx->refine_features }
1811 deshake_ctx->command_queue,
1812 deshake_ctx->kernel_brief_descriptors,
1813 grid_32_global_work,
1816 { sizeof(cl_mem), &deshake_ctx->grayscale },
1817 { sizeof(cl_mem), &deshake_ctx->refined_features },
1818 { sizeof(cl_mem), &deshake_ctx->descriptors },
1819 { sizeof(cl_mem), &deshake_ctx->brief_pattern}
1826 goto no_motion_data;
1830 deshake_ctx->command_queue,
1831 deshake_ctx->kernel_match_descriptors,
1832 grid_32_global_work,
1834 &match_descriptors_event,
1835 { sizeof(cl_mem), &deshake_ctx->prev_refined_features },
1836 { sizeof(cl_mem), &deshake_ctx->refined_features },
1837 { sizeof(cl_mem), &deshake_ctx->descriptors },
1838 { sizeof(cl_mem), &deshake_ctx->prev_descriptors },
1839 { sizeof(cl_mem), &deshake_ctx->matches }
1842 cle = clEnqueueReadBuffer(
1843 deshake_ctx->command_queue,
1844 deshake_ctx->matches,
1848 deshake_ctx->matches_host,
1857 if (num_vectors < 10) {
1870 if (deshake_ctx->abs_motion.data_end_offset == -1) {
1871 deshake_ctx->abs_motion.data_end_offset =
1875 goto no_motion_data;
1880 deshake_ctx->matches_contig_host,
1888 goto no_motion_data;
1891 for (
int i = 0;
i < num_vectors;
i++) {
1892 if (deshake_ctx->matches_contig_host[
i].should_consider) {
1893 deshake_ctx->inliers[num_inliers] = deshake_ctx->matches_contig_host[
i];
1900 deshake_ctx->inliers,
1906 goto no_motion_data;
1915 deshake_ctx->abs_motion.ringbuffers[
i],
1926 if (deshake_ctx->debug_on) {
1927 if (!deshake_ctx->is_yuv) {
1946 for (
int i = 0;
i < num_vectors;
i++) {
1947 deshake_ctx->matches_contig_host[
i].should_consider = 0;
1949 debug_matches.num_model_matches = 0;
1951 if (deshake_ctx->debug_on) {
1953 "\n[ALERT] No motion data found in queue_frame, motion reset to 0\n\n"
1962 temp = deshake_ctx->prev_descriptors;
1963 deshake_ctx->prev_descriptors = deshake_ctx->descriptors;
1964 deshake_ctx->descriptors =
temp;
1967 temp = deshake_ctx->prev_refined_features;
1968 deshake_ctx->prev_refined_features = deshake_ctx->refined_features;
1969 deshake_ctx->refined_features =
temp;
1971 if (deshake_ctx->debug_on) {
1972 if (num_vectors == 0) {
1973 debug_matches.matches =
NULL;
1977 if (!debug_matches.matches) {
1983 for (
int i = 0;
i < num_vectors;
i++) {
1984 debug_matches.matches[
i] = deshake_ctx->matches_contig_host[
i];
1986 debug_matches.num_matches = num_vectors;
1989 deshake_ctx->abs_motion.debug_matches,
1994 av_fifo_write(deshake_ctx->abs_motion.ringbuffers[
i], &new_vals[
i], 1);
2000 clFinish(deshake_ctx->command_queue);
2016 if (!deshake_ctx->
eof) {
2021 if (!
frame->hw_frames_ctx)
2053 deshake_ctx->
eof = 1;
2057 if (deshake_ctx->
eof) {
2072 "Average kernel execution times:\n"
2073 "\t grayscale: %0.3f ms\n"
2074 "\t harris_response: %0.3f ms\n"
2075 "\t refine_features: %0.3f ms\n"
2076 "\tbrief_descriptors: %0.3f ms\n"
2077 "\tmatch_descriptors: %0.3f ms\n"
2078 "\t transform: %0.3f ms\n"
2079 "\t crop_upscale: %0.3f ms\n"
2080 "Average buffer read times:\n"
2081 "\t features buf: %0.3f ms\n",
2097 if (!deshake_ctx->
eof) {
2120 #define OFFSET(x) offsetof(DeshakeOpenCLContext, x)
2121 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
2125 "tripod",
"simulates a tripod by preventing any camera movement whatsoever "
2126 "from the original frame",
2130 "debug",
"turn on additional debugging information",
2134 "adaptive_crop",
"attempt to subtly crop borders to reduce mirrored content",
2138 "refine_features",
"refine feature point locations at a sub-pixel level",
2142 "smooth_strength",
"smoothing strength (0 attempts to adaptively determine optimal strength)",
2146 "smooth_window_multiplier",
"multiplier for number of frames to buffer for motion data",
2155 .
name =
"deshake_opencl",
2158 .priv_class = &deshake_opencl_class,