[FFmpeg-devel] [PATCH 01/13] hwcontext_vulkan: add a new mechanism to expose used queue families
Lynne
dev at lynne.ee
Thu Aug 8 00:33:26 EEST 2024
The issue with the old mechanism is that we had to introduce new
API each time we needed a new queue family, and all the queue families
were functionally fixed to a given purpose.
Nvidia's GPUs are able to handle video encoding and compute on the
same queue, which results in a speedup when pre-processing is required.
Also, this enables us to expose optical flow queues for frame interpolation.
---
libavutil/hwcontext_vulkan.c | 85 ++++++++++++++++++++++++++++--------
libavutil/hwcontext_vulkan.h | 25 +++++++++++
2 files changed, 93 insertions(+), 17 deletions(-)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index da377aa1a4..33d856ddd3 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -1423,12 +1423,13 @@ static void unlock_queue(AVHWDeviceContext *ctx, uint32_t queue_family, uint32_t
static int vulkan_device_init(AVHWDeviceContext *ctx)
{
- int err;
+ int err = 0;
uint32_t qf_num;
VulkanDevicePriv *p = ctx->hwctx;
AVVulkanDeviceContext *hwctx = &p->p;
FFVulkanFunctions *vk = &p->vkctx.vkfn;
- VkQueueFamilyProperties *qf;
+ VkQueueFamilyProperties2 *qf;
+ VkQueueFamilyVideoPropertiesKHR *qf_vid;
int graph_index, comp_index, tx_index, enc_index, dec_index;
/* Set device extension flags */
@@ -1474,38 +1475,53 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
return AVERROR_EXTERNAL;
}
- qf = av_malloc_array(qf_num, sizeof(VkQueueFamilyProperties));
+ qf = av_malloc_array(qf_num, sizeof(VkQueueFamilyProperties2));
if (!qf)
return AVERROR(ENOMEM);
- vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &qf_num, qf);
+ qf_vid = av_malloc_array(qf_num, sizeof(VkQueueFamilyVideoPropertiesKHR));
+ if (!qf_vid) {
+ av_free(qf);
+ return AVERROR(ENOMEM);
+ }
+
+ for (uint32_t i = 0; i < qf_num; i++) {
+ qf_vid[i] = (VkQueueFamilyVideoPropertiesKHR) {
+ .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR,
+ };
+ qf[i] = (VkQueueFamilyProperties2) {
+ .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_PROPERTIES_2,
+ .pNext = &qf_vid[i],
+ };
+ }
+
+ vk->GetPhysicalDeviceQueueFamilyProperties2(hwctx->phys_dev, &qf_num, qf);
p->qf_mutex = av_calloc(qf_num, sizeof(*p->qf_mutex));
if (!p->qf_mutex) {
- av_free(qf);
- return AVERROR(ENOMEM);
+ err = AVERROR(ENOMEM);
+ goto end;
}
p->nb_tot_qfs = qf_num;
for (uint32_t i = 0; i < qf_num; i++) {
- p->qf_mutex[i] = av_calloc(qf[i].queueCount, sizeof(**p->qf_mutex));
+ p->qf_mutex[i] = av_calloc(qf[i].queueFamilyProperties.queueCount,
+ sizeof(**p->qf_mutex));
if (!p->qf_mutex[i]) {
- av_free(qf);
- return AVERROR(ENOMEM);
+ err = AVERROR(ENOMEM);
+ goto end;
}
- for (uint32_t j = 0; j < qf[i].queueCount; j++) {
+ for (uint32_t j = 0; j < qf[i].queueFamilyProperties.queueCount; j++) {
err = pthread_mutex_init(&p->qf_mutex[i][j], NULL);
if (err != 0) {
av_log(ctx, AV_LOG_ERROR, "pthread_mutex_init failed : %s\n",
av_err2str(err));
- av_free(qf);
- return AVERROR(err);
+ err = AVERROR(err);
+ goto end;
}
}
}
- av_free(qf);
-
graph_index = hwctx->nb_graphics_queues ? hwctx->queue_family_index : -1;
comp_index = hwctx->nb_comp_queues ? hwctx->queue_family_comp_index : -1;
tx_index = hwctx->nb_tx_queues ? hwctx->queue_family_tx_index : -1;
@@ -1517,13 +1533,15 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
if (ctx_qf < 0 && required) { \
av_log(ctx, AV_LOG_ERROR, "%s queue family is required, but marked as missing" \
" in the context!\n", type); \
- return AVERROR(EINVAL); \
+ err = AVERROR(EINVAL); \
+ goto end; \
} else if (fidx < 0 || ctx_qf < 0) { \
break; \
} else if (ctx_qf >= qf_num) { \
av_log(ctx, AV_LOG_ERROR, "Invalid %s family index %i (device has %i families)!\n", \
type, ctx_qf, qf_num); \
- return AVERROR(EINVAL); \
+ err = AVERROR(EINVAL); \
+ goto end; \
} \
\
av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i (queues: %i)" \
@@ -1550,6 +1568,36 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
#undef CHECK_QUEUE
+ /* Update the new queue family fields. If non-zero already,
+ * it means API users have set it. */
+ if (!hwctx->nb_qf) {
+#define ADD_QUEUE(ctx_qf, qc, flag) \
+ do { \
+ if (ctx_qf != -1) { \
+ hwctx->qf[hwctx->nb_qf++] = (AVVulkanDeviceQueueFamily) { \
+ .idx = ctx_qf, \
+ .num = qc, \
+ .flags = flag, \
+ }; \
+ } \
+ } while (0)
+
+ ADD_QUEUE(hwctx->queue_family_index, hwctx->nb_graphics_queues, VK_QUEUE_GRAPHICS_BIT);
+ ADD_QUEUE(hwctx->queue_family_comp_index, hwctx->nb_comp_queues, VK_QUEUE_COMPUTE_BIT);
+ ADD_QUEUE(hwctx->queue_family_tx_index, hwctx->nb_tx_queues, VK_QUEUE_TRANSFER_BIT);
+ ADD_QUEUE(hwctx->queue_family_decode_index, hwctx->nb_decode_queues, VK_QUEUE_VIDEO_DECODE_BIT_KHR);
+ ADD_QUEUE(hwctx->queue_family_encode_index, hwctx->nb_encode_queues, VK_QUEUE_VIDEO_ENCODE_BIT_KHR);
+#undef ADD_QUEUE
+ }
+
+ for (int i = 0; i < hwctx->nb_qf; i++) {
+ if (!hwctx->qf[i].video_caps &&
+ hwctx->qf[i].flags & (VK_QUEUE_VIDEO_DECODE_BIT_KHR |
+ VK_QUEUE_VIDEO_ENCODE_BIT_KHR)) {
+ hwctx->qf[i].video_caps = qf_vid[hwctx->qf[i].idx].videoCodecOperations;
+ }
+ }
+
if (!hwctx->lock_queue)
hwctx->lock_queue = lock_queue;
if (!hwctx->unlock_queue)
@@ -1565,7 +1613,10 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
ff_vk_qf_init(&p->vkctx, &p->compute_qf, VK_QUEUE_COMPUTE_BIT);
ff_vk_qf_init(&p->vkctx, &p->transfer_qf, VK_QUEUE_TRANSFER_BIT);
- return 0;
+end:
+ av_free(qf_vid);
+ av_free(qf);
+ return err;
}
static int vulkan_device_create(AVHWDeviceContext *ctx, const char *device,
diff --git a/libavutil/hwcontext_vulkan.h b/libavutil/hwcontext_vulkan.h
index cbbd2390c1..e9e42015f7 100644
--- a/libavutil/hwcontext_vulkan.h
+++ b/libavutil/hwcontext_vulkan.h
@@ -30,6 +30,20 @@
typedef struct AVVkFrame AVVkFrame;
+typedef struct AVVulkanDeviceQueueFamily {
+ /* Queue family index */
+ int idx;
+ /* Number of queues in the queue family in use */
+ int num;
+ /* Queue family capabilities. Must be non-zero.
+ * Flags may be removed to indicate the queue family may not be used
+ * for a given purpose. */
+ VkQueueFlagBits flags;
+ /* Vulkan implementations are allowed to list multiple video queues
+ * which differ in what they can encode or decode. */
+ VkVideoCodecOperationFlagBitsKHR video_caps;
+} AVVulkanDeviceQueueFamily;
+
/**
* @file
* API-specific header for AV_HWDEVICE_TYPE_VULKAN.
@@ -151,6 +165,17 @@ typedef struct AVVulkanDeviceContext {
* Similar to lock_queue(), unlocks a queue. Must only be called after locking.
*/
void (*unlock_queue)(struct AVHWDeviceContext *ctx, uint32_t queue_family, uint32_t index);
+
+ /**
+ * Queue families used. Must be preferentially ordered. List may contain
+ * duplicates.
+ *
+ * For compatibility reasons, all the enabled queue families listed above
+ * (queue_family_(tx/comp/encode/decode)_index) must also be included in
+ * this list until they're removed after deprecation.
+ */
+ AVVulkanDeviceQueueFamily qf[32];
+ int nb_qf;
} AVVulkanDeviceContext;
/**
--
2.45.2.753.g447d99e1c3b
More information about the ffmpeg-devel
mailing list