[FFmpeg-devel] [PATCH 2/2] swresample/resample: optimize exact_rational=on:linear_interp=on case
Muhammad Faiz
mfcc64 at gmail.com
Thu Nov 24 10:52:28 EET 2016
separate dsp.resample to dsp.resample_common and dsp.resample_linear
and choose to call faster resample_common even when linear_interp=on
when c->frac and c->dst_incr_mod are both zero
speed up resampling when exact_rational and linear_interp are both
enabled because exact_rational force c->frac and c->dst_incr_mod to
be zero when soft compensation does not happen
benchmark on exact_rational=on:linear_interp=on
old new
real 8.432s 5.097s
user 7.679s 4.989s
sys 0.125s 0.107s
Signed-off-by: Muhammad Faiz <mfcc64 at gmail.com>
---
libswresample/arm/resample_init.c | 6 ++----
libswresample/resample.c | 7 ++++++-
libswresample/resample.h | 6 ++++--
libswresample/resample_dsp.c | 12 ++++++++----
libswresample/x86/resample_init.c | 32 ++++++++++++++++----------------
5 files changed, 36 insertions(+), 27 deletions(-)
diff --git a/libswresample/arm/resample_init.c b/libswresample/arm/resample_init.c
index 003fafd..e334a27 100644
--- a/libswresample/arm/resample_init.c
+++ b/libswresample/arm/resample_init.c
@@ -111,12 +111,10 @@ av_cold void swri_resample_dsp_arm_init(ResampleContext *c)
switch(c->format) {
case AV_SAMPLE_FMT_FLTP:
- if (!c->linear)
- c->dsp.resample = ff_resample_common_float_neon;
+ c->dsp.resample_common = ff_resample_common_float_neon;
break;
case AV_SAMPLE_FMT_S16P:
- if (!c->linear)
- c->dsp.resample = ff_resample_common_s16_neon;
+ c->dsp.resample_common = ff_resample_common_s16_neon;
break;
}
}
diff --git a/libswresample/resample.c b/libswresample/resample.c
index 8635bf1..e65a57a 100644
--- a/libswresample/resample.c
+++ b/libswresample/resample.c
@@ -496,7 +496,12 @@ static int swri_resample(ResampleContext *c,
dst_size = FFMIN(dst_size, delta_n);
if (dst_size > 0) {
- *consumed = c->dsp.resample(c, dst, src, dst_size, update_ctx);
+ /* resample_linear and resample_common should have same behavior
+ * when frac and dst_incr_mod are zero */
+ if (c->linear && (c->frac || c->dst_incr_mod))
+ *consumed = c->dsp.resample_linear(c, dst, src, dst_size, update_ctx);
+ else
+ *consumed = c->dsp.resample_common(c, dst, src, dst_size, update_ctx);
} else {
*consumed = 0;
}
diff --git a/libswresample/resample.h b/libswresample/resample.h
index 7fe9b97..946f5cc 100644
--- a/libswresample/resample.h
+++ b/libswresample/resample.h
@@ -53,8 +53,10 @@ typedef struct ResampleContext {
struct {
void (*resample_one)(void *dst, const void *src,
int n, int64_t index, int64_t incr);
- int (*resample)(struct ResampleContext *c, void *dst,
- const void *src, int n, int update_ctx);
+ int (*resample_common)(struct ResampleContext *c, void *dst,
+ const void *src, int n, int update_ctx);
+ int (*resample_linear)(struct ResampleContext *c, void *dst,
+ const void *src, int n, int update_ctx);
} dsp;
} ResampleContext;
diff --git a/libswresample/resample_dsp.c b/libswresample/resample_dsp.c
index 41369f3..6ffbb87 100644
--- a/libswresample/resample_dsp.c
+++ b/libswresample/resample_dsp.c
@@ -48,19 +48,23 @@ void swri_resample_dsp_init(ResampleContext *c)
switch(c->format){
case AV_SAMPLE_FMT_S16P:
c->dsp.resample_one = resample_one_int16;
- c->dsp.resample = c->linear ? resample_linear_int16 : resample_common_int16;
+ c->dsp.resample_common = resample_common_int16;
+ c->dsp.resample_linear = resample_linear_int16;
break;
case AV_SAMPLE_FMT_S32P:
c->dsp.resample_one = resample_one_int32;
- c->dsp.resample = c->linear ? resample_linear_int32 : resample_common_int32;
+ c->dsp.resample_common = resample_common_int32;
+ c->dsp.resample_linear = resample_linear_int32;
break;
case AV_SAMPLE_FMT_FLTP:
c->dsp.resample_one = resample_one_float;
- c->dsp.resample = c->linear ? resample_linear_float : resample_common_float;
+ c->dsp.resample_common = resample_common_float;
+ c->dsp.resample_linear = resample_linear_float;
break;
case AV_SAMPLE_FMT_DBLP:
c->dsp.resample_one = resample_one_double;
- c->dsp.resample = c->linear ? resample_linear_double : resample_common_double;
+ c->dsp.resample_common = resample_common_double;
+ c->dsp.resample_linear = resample_linear_double;
break;
}
diff --git a/libswresample/x86/resample_init.c b/libswresample/x86/resample_init.c
index 9d7d5cf..e515762 100644
--- a/libswresample/x86/resample_init.c
+++ b/libswresample/x86/resample_init.c
@@ -50,40 +50,40 @@ av_cold void swri_resample_dsp_x86_init(ResampleContext *c)
switch(c->format){
case AV_SAMPLE_FMT_S16P:
if (ARCH_X86_32 && EXTERNAL_MMXEXT(mm_flags)) {
- c->dsp.resample = c->linear ? ff_resample_linear_int16_mmxext
- : ff_resample_common_int16_mmxext;
+ c->dsp.resample_linear = ff_resample_linear_int16_mmxext;
+ c->dsp.resample_common = ff_resample_common_int16_mmxext;
}
if (EXTERNAL_SSE2(mm_flags)) {
- c->dsp.resample = c->linear ? ff_resample_linear_int16_sse2
- : ff_resample_common_int16_sse2;
+ c->dsp.resample_linear = ff_resample_linear_int16_sse2;
+ c->dsp.resample_common = ff_resample_common_int16_sse2;
}
if (EXTERNAL_XOP(mm_flags)) {
- c->dsp.resample = c->linear ? ff_resample_linear_int16_xop
- : ff_resample_common_int16_xop;
+ c->dsp.resample_linear = ff_resample_linear_int16_xop;
+ c->dsp.resample_common = ff_resample_common_int16_xop;
}
break;
case AV_SAMPLE_FMT_FLTP:
if (EXTERNAL_SSE(mm_flags)) {
- c->dsp.resample = c->linear ? ff_resample_linear_float_sse
- : ff_resample_common_float_sse;
+ c->dsp.resample_linear = ff_resample_linear_float_sse;
+ c->dsp.resample_common = ff_resample_common_float_sse;
}
if (EXTERNAL_AVX_FAST(mm_flags)) {
- c->dsp.resample = c->linear ? ff_resample_linear_float_avx
- : ff_resample_common_float_avx;
+ c->dsp.resample_linear = ff_resample_linear_float_avx;
+ c->dsp.resample_common = ff_resample_common_float_avx;
}
if (EXTERNAL_FMA3_FAST(mm_flags)) {
- c->dsp.resample = c->linear ? ff_resample_linear_float_fma3
- : ff_resample_common_float_fma3;
+ c->dsp.resample_linear = ff_resample_linear_float_fma3;
+ c->dsp.resample_common = ff_resample_common_float_fma3;
}
if (EXTERNAL_FMA4(mm_flags)) {
- c->dsp.resample = c->linear ? ff_resample_linear_float_fma4
- : ff_resample_common_float_fma4;
+ c->dsp.resample_linear = ff_resample_linear_float_fma4;
+ c->dsp.resample_common = ff_resample_common_float_fma4;
}
break;
case AV_SAMPLE_FMT_DBLP:
if (EXTERNAL_SSE2(mm_flags)) {
- c->dsp.resample = c->linear ? ff_resample_linear_double_sse2
- : ff_resample_common_double_sse2;
+ c->dsp.resample_linear = ff_resample_linear_double_sse2;
+ c->dsp.resample_common = ff_resample_common_double_sse2;
}
break;
}
--
2.5.0
More information about the ffmpeg-devel
mailing list