[FFmpeg-devel] [PATCH] checkasm: add (private) kperf timing for macOS
Martin Storsjö
martin at martin.st
Tue Apr 13 15:45:01 EEST 2021
On Tue, 13 Apr 2021, Josh Dekker wrote:
> Signed-off-by: Josh Dekker <josh at itanimul.li>
> ---
> configure | 2 +
> tests/checkasm/Makefile | 1 +
> tests/checkasm/checkasm.c | 19 ++++-
> tests/checkasm/checkasm.h | 10 ++-
> tests/checkasm/macos_kperf.c | 143 +++++++++++++++++++++++++++++++++++
> tests/checkasm/macos_kperf.h | 23 ++++++
> 6 files changed, 195 insertions(+), 3 deletions(-)
> create mode 100644 tests/checkasm/macos_kperf.c
> create mode 100644 tests/checkasm/macos_kperf.h
>
> diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
> index ef6645e3a2..4127081d74 100644
> --- a/tests/checkasm/checkasm.h
> +++ b/tests/checkasm/checkasm.h
> @@ -31,6 +31,8 @@
> #include <sys/ioctl.h>
> #include <asm/unistd.h>
> #include <linux/perf_event.h>
> +#elif CONFIG_MACOS_KPERF
> +#include "macos_kperf.h"
> #endif
>
> #include "libavutil/avstring.h"
> @@ -224,7 +226,7 @@ typedef struct CheckasmPerf {
> int iterations;
> } CheckasmPerf;
>
> -#if defined(AV_READ_TIME) || CONFIG_LINUX_PERF
> +#if defined(AV_READ_TIME) || CONFIG_LINUX_PERF || CONFIG_MACOS_KPERF
>
> #if CONFIG_LINUX_PERF
> #define PERF_START(t) do { \
> @@ -235,6 +237,12 @@ typedef struct CheckasmPerf {
> ioctl(sysfd, PERF_EVENT_IOC_DISABLE, 0); \
> read(sysfd, &t, sizeof(t)); \
> } while (0)
> +#elif CONFIG_MACOS_KPERF
> +#define PERF_START(t) do { \
> + t = 0; \
> + ff_kperf_cycles(&t); \
> +} while (0)
> +#define PERF_STOP(t) ff_kperf_cycles(&t)
> #else
I find this implicit subtraction in ff_kperf_cycles() highly surprising
and hard to reason about.
E.g. if I do t=0; followed by ff_kperf_cycles(&t) three times - what value
do I have in 't'? With the current code, the answer is "the difference
between the absolute current timer minus the number of cycles it took
between the first and second call", and that doesn't sound like something
that is useful to anybody, ever.
Just make the function return the value as-is and do the subtraction here.
> #define PERF_START(t) t = AV_READ_TIME()
> #define PERF_STOP(t) t = AV_READ_TIME() - t
> diff --git a/tests/checkasm/macos_kperf.c b/tests/checkasm/macos_kperf.c
> new file mode 100644
> index 0000000000..e6ae316608
> --- /dev/null
> +++ b/tests/checkasm/macos_kperf.c
> @@ -0,0 +1,143 @@
> +/*
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License along
> + * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
> + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
> + */
> +
> +#include "macos_kperf.h"
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <dlfcn.h>
> +
> +#define KPERF_LIST \
> + F(int, kpc_get_counting, void) \
> + F(int, kpc_force_all_ctrs_set, int) \
> + F(int, kpc_set_counting, uint32_t) \
> + F(int, kpc_set_thread_counting, uint32_t) \
> + F(int, kpc_set_config, uint32_t, void *) \
> + F(int, kpc_get_config, uint32_t, void *) \
> + F(int, kpc_set_period, uint32_t, void *) \
> + F(int, kpc_get_period, uint32_t, void *) \
> + F(uint32_t, kpc_get_counter_count, uint32_t) \
> + F(uint32_t, kpc_get_config_count, uint32_t) \
> + F(int, kperf_sample_get, int *) \
> + F(int, kpc_get_thread_counters, int, unsigned int, void *)
> +
> +#define F(ret, name, ...) \
> + typedef ret name##proc(__VA_ARGS__); \
> + static name##proc *name = NULL;
> +KPERF_LIST
> +#undef F
> +
> +#define CFGWORD_EL0A32EN_MASK (0x10000)
> +#define CFGWORD_EL0A64EN_MASK (0x20000)
> +#define CFGWORD_EL1EN_MASK (0x40000)
> +#define CFGWORD_EL3EN_MASK (0x80000)
> +#define CFGWORD_ALLMODES_MASK (0xf0000)
> +
> +#define CPMU_NONE 0
> +#define CPMU_CORE_CYCLE 0x02
> +#define CPMU_INST_A64 0x8c
> +#define CPMU_INST_BRANCH 0x8d
> +#define CPMU_SYNC_DC_LOAD_MISS 0xbf
> +#define CPMU_SYNC_DC_STORE_MISS 0xc0
> +#define CPMU_SYNC_DTLB_MISS 0xc1
> +#define CPMU_SYNC_ST_HIT_YNGR_LD 0xc4
> +#define CPMU_SYNC_BR_ANY_MISP 0xcb
> +#define CPMU_FED_IC_MISS_DEM 0xd3
> +#define CPMU_FED_ITLB_MISS 0xd4
> +
> +#define KPC_CLASS_FIXED_MASK (1 << 0)
> +#define KPC_CLASS_CONFIGURABLE_MASK (1 << 1)
> +#define KPC_CLASS_POWER_MASK (1 << 2)
> +#define KPC_CLASS_RAWPMU_MASK (1 << 3)
> +
> +#define COUNTERS_COUNT 10
> +#define CONFIG_COUNT 8
Is this value actually needed for anything, other than verifying the
return value of kpc_get_config_count? It looks to me like one could omit
it. (Then again, I know this API is undocumented and all, so it's not
really known how it's supposed to be used.)
> +#define KPC_MASK (KPC_CLASS_CONFIGURABLE_MASK | KPC_CLASS_FIXED_MASK)
> +
> +int ff_kperf_setup()
> +{
> + uint64_t config[COUNTERS_COUNT] = {0};
> + config[0] = CPMU_CORE_CYCLE | CFGWORD_EL0A64EN_MASK;
> + // config[3] = CPMU_INST_BRANCH | CFGWORD_EL0A64EN_MASK;
> + // config[4] = CPMU_SYNC_BR_ANY_MISP | CFGWORD_EL0A64EN_MASK;
> + // config[5] = CPMU_INST_A64 | CFGWORD_EL0A64EN_MASK;
> +
> + if (kpc_set_config(KPC_MASK, config)) {
> + fprintf(stderr, "kperf: kpc_set_config failed\n");
> + return -1;
> + }
> +
> + if (kpc_force_all_ctrs_set(1)) {
> + fprintf(stderr, "kperf: kpc_force_all_ctrs_set failed\n");
> + return -1;
> + }
> +
> + if (kpc_set_counting(KPC_MASK)) {
> + fprintf(stderr, "kperf: kpc_set_counting failed\n");
> + return -1;
> + }
> +
> + if (kpc_set_thread_counting(KPC_MASK)) {
> + fprintf(stderr, "kperf: kpc_set_thread_counting failed\n");
> + return -1;
> + }
> +
> + return 0;
> +}
> +
> +int ff_kperf_init()
> +{
> + void *kperf = dlopen("/System/Library/PrivateFrameworks/kperf.framework/Versions/A/kperf", RTLD_LAZY);
> + if (!kperf) {
> + fprintf(stderr, "kperf: kperf = %p\n", kperf);
If it failed, then this will always print (null), so passing the variable
to it right now is kinda pointless. A more useful error might be available
via dlerror().
> + return -1;
> + }
> +
> +#define F(ret, name, ...) \
> + name = (name##proc *)(dlsym(kperf, #name)); \
> + if (!name) { \
> + fprintf(stderr, "kperf: %s = %p\n", #name, (void *)name); \
The second parameter here will always be null too
> + return -1; \
> + }
> + KPERF_LIST
> +#undef F
> +
> + if (kpc_get_counter_count(KPC_MASK) != COUNTERS_COUNT) {
This is also in the field of unknown API usage, but I guess in one sense,
one could consider also allowing it to work if counter_count >= 1 or
however many we actually use. But I guess we'd need to enlarge the stack
buffers a bit more in that case, to allow an actual value up to whatever
we've allocated.
> + fprintf(stderr, "kperf: wrong fixed counters count\n");
> + return -1;
> + }
> +
> + if (kpc_get_config_count(KPC_MASK) != CONFIG_COUNT) {
> + fprintf(stderr, "kperf: wrong fixed config count\n");
> + return -1;
> + }
> +
> + return 0;
> +}
> +
> +int ff_kperf_cycles(uint64_t *cycles)
> +{
> + uint64_t counters[COUNTERS_COUNT];
> + if (kpc_get_thread_counters(0, COUNTERS_COUNT, counters)) {
> + return -1;
> + }
> +
> + if (cycles)
> + *cycles = counters[0] - *cycles;
As point out in checkasm.h, move the subtraction there; having it here
makes the function very strange to reason about.
// Martin
More information about the ffmpeg-devel
mailing list