19 #define VK_NO_PROTOTYPES
20 #define VK_ENABLE_BETA_EXTENSIONS
24 #include <versionhelpers.h>
46 #include <drm_fourcc.h>
49 #include <va/va_drmcommon.h>
57 #define CHECK_CU(x) FF_CUDA_CHECK_DL(cuda_cu, cu, x)
85 VkPhysicalDeviceProperties2
props;
86 VkPhysicalDeviceMemoryProperties
mprops;
87 VkPhysicalDeviceExternalMemoryHostPropertiesEXT
hprops;
144 #define ADD_VAL_TO_LIST(list, count, val) \
146 list = av_realloc_array(list, sizeof(*list), ++count); \
148 err = AVERROR(ENOMEM); \
151 list[count - 1] = av_strdup(val); \
152 if (!list[count - 1]) { \
153 err = AVERROR(ENOMEM); \
158 #define RELEASE_PROPS(props, count) \
160 for (int i = 0; i < count; i++) \
161 av_free((void *)((props)[i])); \
162 av_free((void *)props); \
165 static const struct {
183 {
AV_PIX_FMT_YUV420P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
188 {
AV_PIX_FMT_YUV422P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
193 {
AV_PIX_FMT_YUV444P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
198 {
AV_PIX_FMT_YUVA420P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
199 {
AV_PIX_FMT_YUVA420P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
201 {
AV_PIX_FMT_YUVA420P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
203 {
AV_PIX_FMT_YUVA422P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
204 {
AV_PIX_FMT_YUVA422P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
205 {
AV_PIX_FMT_YUVA422P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
206 {
AV_PIX_FMT_YUVA422P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
208 {
AV_PIX_FMT_YUVA444P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
209 {
AV_PIX_FMT_YUVA444P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
210 {
AV_PIX_FMT_YUVA444P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
211 {
AV_PIX_FMT_YUVA444P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
232 {
AV_PIX_FMT_GBRAP, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
233 {
AV_PIX_FMT_GBRAP16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
234 {
AV_PIX_FMT_GBRPF32, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
235 {
AV_PIX_FMT_GBRAPF32, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
248 const VkBaseInStructure *in = chain;
250 if (in->sType == stype)
261 VkBaseOutStructure *
out = chain;
284 VkFormatFeatureFlags
flags;
285 VkFormatProperties2 prop = {
286 .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
288 vk->GetPhysicalDeviceFormatProperties2(hwctx->
phys_dev, fmt[
i], &prop);
289 flags =
linear ? prop.formatProperties.linearTilingFeatures :
290 prop.formatProperties.optimalTilingFeatures;
303 static const char *lib_names[] = {
306 #elif defined(__APPLE__)
317 p->
libvulkan = dlopen(lib_names[
i], RTLD_NOW | RTLD_LOCAL);
354 { VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_MEMORY },
355 { VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_SEM },
370 #define CASE(VAL) case VAL: return #VAL
376 CASE(VK_EVENT_RESET);
378 CASE(VK_ERROR_OUT_OF_HOST_MEMORY);
379 CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY);
380 CASE(VK_ERROR_INITIALIZATION_FAILED);
381 CASE(VK_ERROR_DEVICE_LOST);
382 CASE(VK_ERROR_MEMORY_MAP_FAILED);
383 CASE(VK_ERROR_LAYER_NOT_PRESENT);
384 CASE(VK_ERROR_EXTENSION_NOT_PRESENT);
385 CASE(VK_ERROR_FEATURE_NOT_PRESENT);
386 CASE(VK_ERROR_INCOMPATIBLE_DRIVER);
387 CASE(VK_ERROR_TOO_MANY_OBJECTS);
388 CASE(VK_ERROR_FORMAT_NOT_SUPPORTED);
389 CASE(VK_ERROR_FRAGMENTED_POOL);
390 CASE(VK_ERROR_SURFACE_LOST_KHR);
391 CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR);
392 CASE(VK_SUBOPTIMAL_KHR);
393 CASE(VK_ERROR_OUT_OF_DATE_KHR);
394 CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR);
395 CASE(VK_ERROR_VALIDATION_FAILED_EXT);
396 CASE(VK_ERROR_INVALID_SHADER_NV);
397 CASE(VK_ERROR_OUT_OF_POOL_MEMORY);
398 CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE);
399 CASE(VK_ERROR_NOT_PERMITTED_EXT);
400 CASE(VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT);
401 CASE(VK_ERROR_INVALID_DEVICE_ADDRESS_EXT);
402 CASE(VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT);
403 default:
return "Unknown error";
409 VkDebugUtilsMessageTypeFlagsEXT messageType,
410 const VkDebugUtilsMessengerCallbackDataEXT *
data,
417 case VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT: l =
AV_LOG_VERBOSE;
break;
418 case VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT: l =
AV_LOG_INFO;
break;
419 case VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT: l =
AV_LOG_WARNING;
break;
420 case VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT: l =
AV_LOG_ERROR;
break;
425 for (
int i = 0;
i <
data->cmdBufLabelCount;
i++)
432 const char *
const **dst, uint32_t *num,
int debug)
435 const char **extension_names =
NULL;
439 int err = 0, found, extensions_found = 0;
442 int optional_exts_num;
443 uint32_t sup_ext_count;
444 char *user_exts_str =
NULL;
446 VkExtensionProperties *sup_ext;
456 if (!user_exts_str) {
461 vk->EnumerateInstanceExtensionProperties(
NULL, &sup_ext_count,
NULL);
462 sup_ext =
av_malloc_array(sup_ext_count,
sizeof(VkExtensionProperties));
465 vk->EnumerateInstanceExtensionProperties(
NULL, &sup_ext_count, sup_ext);
473 if (!user_exts_str) {
478 vk->EnumerateDeviceExtensionProperties(hwctx->
phys_dev,
NULL,
479 &sup_ext_count,
NULL);
480 sup_ext =
av_malloc_array(sup_ext_count,
sizeof(VkExtensionProperties));
483 vk->EnumerateDeviceExtensionProperties(hwctx->
phys_dev,
NULL,
484 &sup_ext_count, sup_ext);
487 for (
int i = 0;
i < optional_exts_num;
i++) {
488 tstr = optional_exts[
i].
name;
490 for (
int j = 0; j < sup_ext_count; j++) {
491 if (!strcmp(tstr, sup_ext[j].extensionName)) {
505 tstr = VK_EXT_DEBUG_UTILS_EXTENSION_NAME;
507 for (
int j = 0; j < sup_ext_count; j++) {
508 if (!strcmp(tstr, sup_ext[j].extensionName)) {
526 char *save, *token =
av_strtok(user_exts_str,
"+", &save);
529 for (
int j = 0; j < sup_ext_count; j++) {
530 if (!strcmp(token, sup_ext[j].extensionName)) {
546 *dst = extension_names;
547 *num = extensions_found;
561 const char *
const **dst, uint32_t *num,
564 static const char default_layer[] = {
"VK_LAYER_KHRONOS_validation" };
566 int found = 0, err = 0;
570 uint32_t sup_layer_count;
571 VkLayerProperties *sup_layers;
574 char *user_layers_str =
NULL;
577 const char **enabled_layers =
NULL;
578 uint32_t enabled_layers_count = 0;
581 int debug = debug_opt && strtol(debug_opt->
value,
NULL, 10);
584 if (debug_opt && !debug)
587 vk->EnumerateInstanceLayerProperties(&sup_layer_count,
NULL);
588 sup_layers =
av_malloc_array(sup_layer_count,
sizeof(VkLayerProperties));
591 vk->EnumerateInstanceLayerProperties(&sup_layer_count, sup_layers);
594 for (
int i = 0;
i < sup_layer_count;
i++)
600 for (
int i = 0;
i < sup_layer_count;
i++) {
601 if (!strcmp(default_layer, sup_layers[
i].layerName)) {
616 if (!user_layers_str) {
621 token =
av_strtok(user_layers_str,
"+", &save);
624 if (!strcmp(default_layer, token)) {
634 for (
int j = 0; j < sup_layer_count; j++) {
635 if (!strcmp(token, sup_layers[j].layerName)) {
645 "Validation Layer \"%s\" not support.\n", token);
657 *dst = enabled_layers;
658 *num = enabled_layers_count;
672 int err = 0, debug_mode = 0;
677 VkApplicationInfo application_info = {
678 .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
679 .pEngineName =
"libavutil",
680 .apiVersion = VK_API_VERSION_1_2,
685 VkInstanceCreateInfo inst_props = {
686 .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
687 .pApplicationInfo = &application_info,
703 &inst_props.enabledLayerCount, &debug_mode);
709 &inst_props.enabledExtensionCount, debug_mode);
716 ret = vk->CreateInstance(&inst_props, hwctx->
alloc, &hwctx->
inst);
719 if (
ret != VK_SUCCESS) {
733 VkDebugUtilsMessengerCreateInfoEXT dbg = {
734 .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT,
735 .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT |
736 VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT |
737 VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
738 VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT,
739 .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT |
740 VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
741 VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,
746 vk->CreateDebugUtilsMessengerEXT(hwctx->
inst, &dbg,
753 RELEASE_PROPS(inst_props.ppEnabledLayerNames, inst_props.enabledLayerCount);
769 case VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU:
return "integrated";
770 case VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU:
return "discrete";
771 case VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU:
return "virtual";
772 case VK_PHYSICAL_DEVICE_TYPE_CPU:
return "software";
773 default:
return "unknown";
780 int err = 0, choice = -1;
785 VkPhysicalDevice *devices =
NULL;
786 VkPhysicalDeviceIDProperties *idp =
NULL;
787 VkPhysicalDeviceProperties2 *prop =
NULL;
790 ret = vk->EnumeratePhysicalDevices(hwctx->
inst, &num,
NULL);
791 if (
ret != VK_SUCCESS || !num) {
800 ret = vk->EnumeratePhysicalDevices(hwctx->
inst, &num, devices);
801 if (
ret != VK_SUCCESS) {
821 for (
int i = 0;
i < num;
i++) {
822 idp[
i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES;
823 prop[
i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
824 prop[
i].pNext = &idp[
i];
826 vk->GetPhysicalDeviceProperties2(devices[
i], &prop[
i]);
828 prop[
i].properties.deviceName,
830 prop[
i].properties.deviceID);
834 for (
int i = 0;
i < num;
i++) {
835 if (!strncmp(idp[
i].deviceUUID, select->
uuid, VK_UUID_SIZE)) {
843 }
else if (select->
name) {
845 for (
int i = 0;
i < num;
i++) {
846 if (strstr(prop[
i].properties.deviceName, select->
name)) {
857 for (
int i = 0;
i < num;
i++) {
858 if (select->
pci_device == prop[
i].properties.deviceID) {
869 for (
int i = 0;
i < num;
i++) {
870 if (select->
vendor_id == prop[
i].properties.vendorID) {
880 if (select->
index < num) {
881 choice = select->
index;
893 choice, prop[choice].properties.deviceName,
895 prop[choice].properties.deviceID);
908 VkQueueFlagBits
flags)
911 uint32_t min_score = UINT32_MAX;
913 for (
int i = 0;
i < num_qf;
i++) {
914 const VkQueueFlagBits qflags = qf[
i].queueFlags;
915 if (qflags &
flags) {
916 uint32_t score =
av_popcount(qflags) + qf[
i].timestampValidBits;
917 if (score < min_score) {
925 qf[
index].timestampValidBits++;
934 VkQueueFamilyProperties *qf =
NULL;
938 int graph_index, comp_index, tx_index, enc_index, dec_index;
941 vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->
phys_dev, &num,
NULL);
953 vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->
phys_dev, &num, qf);
956 for (
int i = 0;
i < num;
i++) {
958 ((qf[
i].queueFlags) & VK_QUEUE_GRAPHICS_BIT) ?
" graphics" :
"",
959 ((qf[
i].queueFlags) & VK_QUEUE_COMPUTE_BIT) ?
" compute" :
"",
960 ((qf[
i].queueFlags) & VK_QUEUE_TRANSFER_BIT) ?
" transfer" :
"",
961 ((qf[
i].queueFlags) & VK_QUEUE_VIDEO_ENCODE_BIT_KHR) ?
" encode" :
"",
962 ((qf[
i].queueFlags) & VK_QUEUE_VIDEO_DECODE_BIT_KHR) ?
" decode" :
"",
963 ((qf[
i].queueFlags) & VK_QUEUE_SPARSE_BINDING_BIT) ?
" sparse" :
"",
964 ((qf[
i].queueFlags) & VK_QUEUE_PROTECTED_BIT) ?
" protected" :
"",
969 qf[
i].timestampValidBits = 0;
992 #define SETUP_QUEUE(qf_idx) \
995 int qc = qf[fidx].queueCount; \
996 VkDeviceQueueCreateInfo *pc; \
998 if (fidx == graph_index) { \
999 hwctx->queue_family_index = fidx; \
1000 hwctx->nb_graphics_queues = qc; \
1003 if (fidx == comp_index) { \
1004 hwctx->queue_family_comp_index = fidx; \
1005 hwctx->nb_comp_queues = qc; \
1008 if (fidx == tx_index) { \
1009 hwctx->queue_family_tx_index = fidx; \
1010 hwctx->nb_tx_queues = qc; \
1013 if (fidx == enc_index) { \
1014 hwctx->queue_family_encode_index = fidx; \
1015 hwctx->nb_encode_queues = qc; \
1018 if (fidx == dec_index) { \
1019 hwctx->queue_family_decode_index = fidx; \
1020 hwctx->nb_decode_queues = qc; \
1024 pc = av_realloc((void *)cd->pQueueCreateInfos, \
1025 sizeof(*pc) * (cd->queueCreateInfoCount + 1)); \
1028 return AVERROR(ENOMEM); \
1030 cd->pQueueCreateInfos = pc; \
1031 pc = &pc[cd->queueCreateInfoCount]; \
1033 weights = av_malloc(qc * sizeof(float)); \
1036 return AVERROR(ENOMEM); \
1039 memset(pc, 0, sizeof(*pc)); \
1040 pc->sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; \
1041 pc->queueFamilyIndex = fidx; \
1042 pc->queueCount = qc; \
1043 pc->pQueuePriorities = weights; \
1045 for (int i = 0; i < qc; i++) \
1046 weights[i] = 1.0f / qc; \
1048 cd->queueCreateInfoCount++; \
1065 int queue_family_index,
int num_queues)
1073 .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
1074 .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
1075 .queueFamilyIndex = queue_family_index,
1077 VkCommandBufferAllocateInfo cbuf_create = {
1078 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
1079 .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
1080 .commandBufferCount = num_queues,
1088 if (
ret != VK_SUCCESS) {
1098 cbuf_create.commandPool = cmd->
pool;
1101 ret = vk->AllocateCommandBuffers(hwctx->
act_dev, &cbuf_create, cmd->
bufs);
1102 if (
ret != VK_SUCCESS) {
1113 for (
int i = 0;
i < num_queues;
i++) {
1115 vk->GetDeviceQueue(hwctx->
act_dev, queue_family_index,
i, &q->
queue);
1134 vk->WaitForFences(hwctx->
act_dev, 1, &q->
fence, VK_TRUE, UINT64_MAX);
1181 VkCommandBufferBeginInfo cmd_start = {
1182 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
1183 .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
1188 VkFenceCreateInfo fence_spawn = {
1189 .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
1193 if (
ret != VK_SUCCESS) {
1199 vk->WaitForFences(hwctx->
act_dev, 1, &q->
fence, VK_TRUE, UINT64_MAX);
1207 if (
ret != VK_SUCCESS) {
1222 if (!deps || !nb_deps)
1232 for (
int i = 0;
i < nb_deps;
i++) {
1247 VkSubmitInfo *s_info,
AVVkFrame *
f,
int synchronous)
1255 if (
ret != VK_SUCCESS) {
1263 s_info->commandBufferCount = 1;
1266 if (
ret != VK_SUCCESS) {
1274 for (
int i = 0;
i < s_info->signalSemaphoreCount;
i++)
1281 vk->WaitForFences(hwctx->
act_dev, 1, &q->
fence, VK_TRUE, UINT64_MAX);
1301 vk->DestroyDebugUtilsMessengerEXT(hwctx->
inst, p->
debug_ctx,
1305 vk->DestroyInstance(hwctx->
inst, hwctx->
alloc);
1324 VkPhysicalDeviceVulkan12Features dev_features_1_2 = {
1325 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES,
1327 VkPhysicalDeviceVulkan11Features dev_features_1_1 = {
1328 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES,
1329 .pNext = &dev_features_1_2,
1331 VkPhysicalDeviceFeatures2 dev_features = {
1332 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
1333 .pNext = &dev_features_1_1,
1336 VkDeviceCreateInfo dev_info = {
1337 .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
1341 hwctx->
device_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
1356 vk->GetPhysicalDeviceFeatures2(hwctx->
phys_dev, &dev_features);
1359 #define COPY_FEATURE(DST, NAME) (DST).features.NAME = dev_features.features.NAME;
1369 if (!dev_features_1_2.timelineSemaphore) {
1381 &dev_info.enabledExtensionCount, 0))) {
1382 for (
int i = 0;
i < dev_info.queueCreateInfoCount;
i++)
1383 av_free((
void *)dev_info.pQueueCreateInfos[
i].pQueuePriorities);
1384 av_free((
void *)dev_info.pQueueCreateInfos);
1391 for (
int i = 0;
i < dev_info.queueCreateInfoCount;
i++)
1392 av_free((
void *)dev_info.pQueueCreateInfos[
i].pQueuePriorities);
1393 av_free((
void *)dev_info.pQueueCreateInfos);
1395 if (
ret != VK_SUCCESS) {
1398 for (
int i = 0;
i < dev_info.enabledExtensionCount;
i++)
1399 av_free((
void *)dev_info.ppEnabledExtensionNames[
i]);
1400 av_free((
void *)dev_info.ppEnabledExtensionNames);
1430 int graph_index, comp_index, tx_index, enc_index, dec_index;
1449 p->
props.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
1451 p->
hprops.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT;
1453 vk->GetPhysicalDeviceProperties2(hwctx->
phys_dev, &p->
props);
1455 p->
props.properties.deviceName);
1458 p->
props.properties.limits.optimalBufferCopyRowPitchAlignment);
1460 p->
props.properties.limits.minMemoryMapAlignment);
1463 p->
hprops.minImportedHostPointerAlignment);
1468 vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->
phys_dev, &queue_num,
NULL);
1480 #define CHECK_QUEUE(type, required, fidx, ctx_qf, qc) \
1482 if (ctx_qf < 0 && required) { \
1483 av_log(ctx, AV_LOG_ERROR, "%s queue family is required, but marked as missing" \
1484 " in the context!\n", type); \
1485 return AVERROR(EINVAL); \
1486 } else if (fidx < 0 || ctx_qf < 0) { \
1488 } else if (ctx_qf >= queue_num) { \
1489 av_log(ctx, AV_LOG_ERROR, "Invalid %s family index %i (device has %i families)!\n", \
1490 type, ctx_qf, queue_num); \
1491 return AVERROR(EINVAL); \
1494 av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i (queues: %i)" \
1495 " for%s%s%s%s%s\n", \
1497 ctx_qf == graph_index ? " graphics" : "", \
1498 ctx_qf == comp_index ? " compute" : "", \
1499 ctx_qf == tx_index ? " transfers" : "", \
1500 ctx_qf == enc_index ? " encode" : "", \
1501 ctx_qf == dec_index ? " decode" : ""); \
1502 graph_index = (ctx_qf == graph_index) ? -1 : graph_index; \
1503 comp_index = (ctx_qf == comp_index) ? -1 : comp_index; \
1504 tx_index = (ctx_qf == tx_index) ? -1 : tx_index; \
1505 enc_index = (ctx_qf == enc_index) ? -1 : enc_index; \
1506 dec_index = (ctx_qf == dec_index) ? -1 : dec_index; \
1507 p->qfs[p->num_qfs++] = ctx_qf; \
1519 vk->GetPhysicalDeviceMemoryProperties(hwctx->
phys_dev, &p->
mprops);
1528 if (device && device[0]) {
1530 dev_select.
index = strtol(device, &end, 10);
1531 if (end == device) {
1532 dev_select.
index = 0;
1533 dev_select.
name = device;
1549 switch(src_ctx->
type) {
1555 const char *vendor = vaQueryVendorString(src_hwctx->
display);
1561 if (strstr(vendor,
"Intel"))
1562 dev_select.vendor_id = 0x8086;
1563 if (strstr(vendor,
"AMD"))
1564 dev_select.vendor_id = 0x1002;
1572 drmDevice *drm_dev_info;
1573 int err = drmGetDevice(src_hwctx->
fd, &drm_dev_info);
1579 if (drm_dev_info->bustype == DRM_BUS_PCI)
1580 dev_select.pci_device = drm_dev_info->deviceinfo.pci->device_id;
1582 drmFreeDevice(&drm_dev_info);
1592 CudaFunctions *cu = cu_internal->
cuda_dl;
1594 int ret =
CHECK_CU(cu->cuDeviceGetUuid((CUuuid *)&dev_select.uuid,
1601 dev_select.has_uuid = 1;
1612 const void *hwconfig,
1644 constraints->
max_width = p->
props.properties.limits.maxImageDimension2D;
1645 constraints->
max_height = p->
props.properties.limits.maxImageDimension2D;
1658 VkMemoryPropertyFlagBits req_flags,
const void *alloc_extension,
1659 VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
1666 VkMemoryAllocateInfo alloc_info = {
1667 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
1668 .pNext = alloc_extension,
1669 .allocationSize = req->size,
1674 for (
int i = 0;
i < p->
mprops.memoryTypeCount;
i++) {
1675 const VkMemoryType *
type = &p->
mprops.memoryTypes[
i];
1678 if (!(req->memoryTypeBits & (1 <<
i)))
1682 if ((
type->propertyFlags & req_flags) != req_flags)
1686 if (req->size > p->
mprops.memoryHeaps[
type->heapIndex].size)
1700 alloc_info.memoryTypeIndex =
index;
1702 ret = vk->AllocateMemory(dev_hwctx->
act_dev, &alloc_info,
1703 dev_hwctx->
alloc, mem);
1704 if (
ret != VK_SUCCESS) {
1710 *mem_flags |= p->
mprops.memoryTypes[
index].propertyFlags;
1723 if (internal->cuda_fc_ref) {
1729 CudaFunctions *cu = cu_internal->
cuda_dl;
1732 if (internal->cu_sem[
i])
1733 CHECK_CU(cu->cuDestroyExternalSemaphore(internal->cu_sem[
i]));
1734 if (internal->cu_mma[
i])
1735 CHECK_CU(cu->cuMipmappedArrayDestroy(internal->cu_mma[
i]));
1736 if (internal->ext_mem[
i])
1737 CHECK_CU(cu->cuDestroyExternalMemory(internal->ext_mem[
i]));
1739 if (internal->ext_sem_handle[
i])
1740 CloseHandle(internal->ext_sem_handle[
i]);
1741 if (internal->ext_mem_handle[
i])
1742 CloseHandle(internal->ext_mem_handle[
i]);
1764 vk->DeviceWaitIdle(hwctx->
act_dev);
1778 void *alloc_pnext,
size_t alloc_pnext_stride)
1789 VkMemoryRequirements cont_memory_requirements = { 0 };
1791 int cont_mem_size = 0;
1797 VkImageMemoryRequirementsInfo2 req_desc = {
1798 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
1801 VkMemoryDedicatedAllocateInfo ded_alloc = {
1802 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
1803 .pNext = (
void *)(((uint8_t *)alloc_pnext) +
i*alloc_pnext_stride),
1805 VkMemoryDedicatedRequirements ded_req = {
1806 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
1808 VkMemoryRequirements2 req = {
1809 .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
1813 vk->GetImageMemoryRequirements2(hwctx->
act_dev, &req_desc, &req);
1815 if (
f->tiling == VK_IMAGE_TILING_LINEAR)
1816 req.memoryRequirements.size =
FFALIGN(req.memoryRequirements.size,
1817 p->
props.properties.limits.minMemoryMapAlignment);
1820 if (ded_req.requiresDedicatedAllocation) {
1822 "device requires dedicated image allocation!\n");
1825 cont_memory_requirements = req.memoryRequirements;
1826 }
else if (cont_memory_requirements.memoryTypeBits !=
1827 req.memoryRequirements.memoryTypeBits) {
1829 "and %i, cannot allocate in a single region!\n",
1834 cont_mem_size_list[
i] =
FFALIGN(req.memoryRequirements.size,
1835 req.memoryRequirements.alignment);
1836 cont_mem_size += cont_mem_size_list[
i];
1841 use_ded_mem = ded_req.prefersDedicatedAllocation |
1842 ded_req.requiresDedicatedAllocation;
1844 ded_alloc.image =
f->img[
i];
1848 f->tiling == VK_IMAGE_TILING_LINEAR ?
1849 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT :
1850 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
1851 use_ded_mem ? &ded_alloc : (
void *)ded_alloc.pNext,
1852 &
f->flags, &
f->mem[
i])))
1855 f->size[
i] = req.memoryRequirements.size;
1856 bind_info[
i].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
1857 bind_info[
i].image =
f->img[
i];
1858 bind_info[
i].memory =
f->mem[
i];
1862 cont_memory_requirements.size = cont_mem_size;
1866 f->tiling == VK_IMAGE_TILING_LINEAR ?
1867 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT :
1868 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
1869 (
void *)(((uint8_t *)alloc_pnext)),
1870 &
f->flags, &
f->mem[0])))
1873 f->size[0] = cont_memory_requirements.size;
1876 bind_info[
i].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
1877 bind_info[
i].image =
f->img[
i];
1878 bind_info[
i].memory =
f->mem[0];
1879 bind_info[
i].memoryOffset =
offset;
1881 f->offset[
i] = bind_info[
i].memoryOffset;
1882 offset += cont_mem_size_list[
i];
1888 if (
ret != VK_SUCCESS) {
1907 uint32_t src_qf, dst_qf;
1908 VkImageLayout new_layout;
1909 VkAccessFlags new_access;
1917 VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = {
1918 .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
1919 .pSignalSemaphoreValues = sem_sig_val,
1920 .signalSemaphoreValueCount =
planes,
1923 VkSubmitInfo s_info = {
1924 .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
1925 .pNext = &s_timeline_sem_info,
1926 .pSignalSemaphores =
frame->sem,
1927 .signalSemaphoreCount =
planes,
1932 wait_st[
i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
1933 sem_sig_val[
i] =
frame->sem_value[
i] + 1;
1938 new_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
1939 new_access = VK_ACCESS_TRANSFER_WRITE_BIT;
1940 src_qf = VK_QUEUE_FAMILY_IGNORED;
1941 dst_qf = VK_QUEUE_FAMILY_IGNORED;
1944 new_layout = VK_IMAGE_LAYOUT_GENERAL;
1945 new_access = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT;
1946 src_qf = VK_QUEUE_FAMILY_EXTERNAL_KHR;
1947 dst_qf = VK_QUEUE_FAMILY_IGNORED;
1948 s_timeline_sem_info.pWaitSemaphoreValues =
frame->sem_value;
1949 s_timeline_sem_info.waitSemaphoreValueCount =
planes;
1950 s_info.pWaitSemaphores =
frame->sem;
1951 s_info.pWaitDstStageMask = wait_st;
1952 s_info.waitSemaphoreCount =
planes;
1955 new_layout = VK_IMAGE_LAYOUT_GENERAL;
1956 new_access = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT;
1957 src_qf = VK_QUEUE_FAMILY_IGNORED;
1958 dst_qf = VK_QUEUE_FAMILY_EXTERNAL_KHR;
1959 s_timeline_sem_info.pWaitSemaphoreValues =
frame->sem_value;
1960 s_timeline_sem_info.waitSemaphoreValueCount =
planes;
1961 s_info.pWaitSemaphores =
frame->sem;
1962 s_info.pWaitDstStageMask = wait_st;
1963 s_info.waitSemaphoreCount =
planes;
1974 img_bar[
i].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
1975 img_bar[
i].srcAccessMask = 0x0;
1976 img_bar[
i].dstAccessMask = new_access;
1977 img_bar[
i].oldLayout =
frame->layout[
i];
1978 img_bar[
i].newLayout = new_layout;
1979 img_bar[
i].srcQueueFamilyIndex = src_qf;
1980 img_bar[
i].dstQueueFamilyIndex = dst_qf;
1981 img_bar[
i].image =
frame->img[
i];
1982 img_bar[
i].subresourceRange.levelCount = 1;
1983 img_bar[
i].subresourceRange.layerCount = 1;
1984 img_bar[
i].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
1986 frame->layout[
i] = img_bar[
i].newLayout;
1987 frame->access[
i] = img_bar[
i].dstAccessMask;
1991 VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
1992 VK_PIPELINE_STAGE_TRANSFER_BIT,
1999 int frame_w,
int frame_h,
int plane)
2016 VkImageTiling tiling, VkImageUsageFlagBits
usage,
2029 VkExportSemaphoreCreateInfo ext_sem_info = {
2030 .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO,
2032 .handleTypes = IsWindows8OrGreater()
2033 ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT
2034 : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT,
2036 .handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
2040 VkSemaphoreTypeCreateInfo sem_type_info = {
2041 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
2043 .pNext = p->
extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM ? &ext_sem_info :
NULL,
2047 .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
2051 VkSemaphoreCreateInfo sem_spawn = {
2052 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
2053 .pNext = &sem_type_info,
2064 VkImageCreateInfo create_info = {
2065 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
2066 .pNext = create_pnext,
2067 .imageType = VK_IMAGE_TYPE_2D,
2068 .format = img_fmts[
i],
2072 .flags = VK_IMAGE_CREATE_ALIAS_BIT,
2074 .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
2076 .samples = VK_SAMPLE_COUNT_1_BIT,
2077 .pQueueFamilyIndices = p->
qfs,
2078 .queueFamilyIndexCount = p->
num_qfs,
2079 .sharingMode = p->
num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
2080 VK_SHARING_MODE_EXCLUSIVE,
2083 get_plane_wh(&create_info.extent.width, &create_info.extent.height,
2086 ret = vk->CreateImage(hwctx->
act_dev, &create_info,
2088 if (
ret != VK_SUCCESS) {
2096 ret = vk->CreateSemaphore(hwctx->
act_dev, &sem_spawn,
2098 if (
ret != VK_SUCCESS) {
2104 f->layout[
i] = create_info.initialLayout;
2106 f->sem_value[
i] = 0;
2122 VkExternalMemoryHandleTypeFlags *comp_handle_types,
2123 VkExternalMemoryHandleTypeFlagBits *iexp,
2124 VkExternalMemoryHandleTypeFlagBits
exp)
2132 const VkImageDrmFormatModifierListCreateInfoEXT *drm_mod_info =
2134 VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT);
2135 int has_mods = hwctx->
tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT && drm_mod_info;
2138 VkExternalImageFormatProperties eprops = {
2139 .sType = VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES_KHR,
2141 VkImageFormatProperties2 props = {
2142 .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2,
2145 VkPhysicalDeviceImageDrmFormatModifierInfoEXT phy_dev_mod_info = {
2146 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT,
2148 .pQueueFamilyIndices = p->
qfs,
2149 .queueFamilyIndexCount = p->
num_qfs,
2150 .sharingMode = p->
num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
2151 VK_SHARING_MODE_EXCLUSIVE,
2153 VkPhysicalDeviceExternalImageFormatInfo enext = {
2154 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO,
2156 .pNext = has_mods ? &phy_dev_mod_info :
NULL,
2158 VkPhysicalDeviceImageFormatInfo2 pinfo = {
2159 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
2160 .pNext = !
exp ?
NULL : &enext,
2162 .type = VK_IMAGE_TYPE_2D,
2164 .usage = hwctx->
usage,
2165 .flags = VK_IMAGE_CREATE_ALIAS_BIT,
2168 nb_mods = has_mods ? drm_mod_info->drmFormatModifierCount : 1;
2169 for (
int i = 0;
i < nb_mods;
i++) {
2171 phy_dev_mod_info.drmFormatModifier = drm_mod_info->pDrmFormatModifiers[
i];
2173 ret = vk->GetPhysicalDeviceImageFormatProperties2(dev_hwctx->
phys_dev,
2176 if (
ret == VK_SUCCESS) {
2178 *comp_handle_types |= eprops.externalMemoryProperties.compatibleHandleTypes;
2193 VkExternalMemoryHandleTypeFlags e = 0x0;
2195 VkExternalMemoryImageCreateInfo eiinfo = {
2196 .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
2201 if (p->
extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY)
2203 ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT
2204 : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT);
2208 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT);
2212 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
2216 eminfo[
i].sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO;
2218 eminfo[
i].handleTypes = e;
2222 eiinfo.handleTypes ? &eiinfo :
NULL);
2250 if (
fp->modifier_info) {
2251 if (
fp->modifier_info->pDrmFormatModifiers)
2252 av_freep(&
fp->modifier_info->pDrmFormatModifiers);
2269 const VkImageDrmFormatModifierListCreateInfoEXT *modifier_info;
2274 has_modifiers ? VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT :
2276 VK_IMAGE_TILING_OPTIMAL;
2288 VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT);
2291 if (has_modifiers && !modifier_info) {
2293 VkImageDrmFormatModifierListCreateInfoEXT *modifier_info;
2295 VkDrmFormatModifierPropertiesEXT *mod_props;
2296 uint64_t *modifiers;
2297 int modifier_count = 0;
2299 VkDrmFormatModifierPropertiesListEXT mod_props_list = {
2300 .sType = VK_STRUCTURE_TYPE_DRM_FORMAT_MODIFIER_PROPERTIES_LIST_EXT,
2302 .drmFormatModifierCount = 0,
2303 .pDrmFormatModifierProperties =
NULL,
2305 VkFormatProperties2 prop = {
2306 .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
2307 .pNext = &mod_props_list,
2311 vk->GetPhysicalDeviceFormatProperties2(dev_hwctx->
phys_dev, fmt[0], &prop);
2313 if (!mod_props_list.drmFormatModifierCount) {
2314 av_log(hwfc,
AV_LOG_ERROR,
"There are no supported modifiers for the given sw_format\n");
2319 modifier_info =
av_mallocz(
sizeof(*modifier_info));
2323 modifier_info->pNext =
NULL;
2324 modifier_info->sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT;
2333 fp->modifier_info = modifier_info;
2336 modifiers =
av_mallocz(mod_props_list.drmFormatModifierCount *
2337 sizeof(*modifiers));
2341 modifier_info->pDrmFormatModifiers = modifiers;
2344 mod_props =
av_mallocz(mod_props_list.drmFormatModifierCount *
2345 sizeof(*mod_props));
2349 mod_props_list.pDrmFormatModifierProperties = mod_props;
2352 vk->GetPhysicalDeviceFormatProperties2(dev_hwctx->
phys_dev, fmt[0], &prop);
2355 for (
int i = 0;
i < mod_props_list.drmFormatModifierCount;
i++) {
2356 if (!(mod_props[
i].drmFormatModifierTilingFeatures & hwctx->
usage))
2359 modifiers[modifier_count++] = mod_props[
i].drmFormatModifier;
2362 if (!modifier_count) {
2364 " the usage flags!\n");
2369 modifier_info->drmFormatModifierCount = modifier_count;
2455 !(
map->frame->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
2460 flush_ranges[
i].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
2461 flush_ranges[
i].memory =
map->frame->mem[
i];
2462 flush_ranges[
i].size = VK_WHOLE_SIZE;
2467 if (
ret != VK_SUCCESS) {
2474 vk->UnmapMemory(hwctx->
act_dev,
map->frame->mem[
i]);
2483 int err, mapped_mem_count = 0, mem_planes = 0;
2502 if (!(
f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) ||
2503 !(
f->tiling == VK_IMAGE_TILING_LINEAR)) {
2514 for (
int i = 0;
i < mem_planes;
i++) {
2515 ret = vk->MapMemory(hwctx->act_dev,
f->mem[
i], 0,
2516 VK_WHOLE_SIZE, 0, (
void **)&dst->
data[
i]);
2517 if (
ret != VK_SUCCESS) {
2533 !(
f->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
2536 map_mem_ranges[
i].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
2537 map_mem_ranges[
i].size = VK_WHOLE_SIZE;
2538 map_mem_ranges[
i].memory =
f->mem[
i];
2541 ret = vk->InvalidateMappedMemoryRanges(hwctx->act_dev,
planes,
2543 if (
ret != VK_SUCCESS) {
2552 VkImageSubresource
sub = {
2553 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
2555 VkSubresourceLayout
layout;
2556 vk->GetImageSubresourceLayout(hwctx->act_dev,
f->img[
i], &
sub, &
layout);
2571 for (
int i = 0;
i < mapped_mem_count;
i++)
2572 vk->UnmapMemory(hwctx->act_dev,
f->mem[
i]);
2587 VkSemaphoreWaitInfo wait_info = {
2588 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO,
2590 .pSemaphores =
f->sem,
2591 .pValues =
f->sem_value,
2592 .semaphoreCount =
planes,
2595 vk->WaitSemaphores(hwctx->
act_dev, &wait_info, UINT64_MAX);
2608 static const struct {
2609 uint32_t drm_fourcc;
2611 } vulkan_drm_format_map[] = {
2612 { DRM_FORMAT_R8, VK_FORMAT_R8_UNORM },
2613 { DRM_FORMAT_R16, VK_FORMAT_R16_UNORM },
2614 { DRM_FORMAT_GR88, VK_FORMAT_R8G8_UNORM },
2615 { DRM_FORMAT_RG88, VK_FORMAT_R8G8_UNORM },
2616 { DRM_FORMAT_GR1616, VK_FORMAT_R16G16_UNORM },
2617 { DRM_FORMAT_RG1616, VK_FORMAT_R16G16_UNORM },
2618 { DRM_FORMAT_ARGB8888, VK_FORMAT_B8G8R8A8_UNORM },
2619 { DRM_FORMAT_XRGB8888, VK_FORMAT_B8G8R8A8_UNORM },
2620 { DRM_FORMAT_ABGR8888, VK_FORMAT_R8G8B8A8_UNORM },
2621 { DRM_FORMAT_XBGR8888, VK_FORMAT_R8G8B8A8_UNORM },
2624 static inline VkFormat drm_to_vulkan_fmt(uint32_t drm_fourcc)
2627 if (vulkan_drm_format_map[
i].drm_fourcc == drm_fourcc)
2628 return vulkan_drm_format_map[
i].vk_format;
2629 return VK_FORMAT_UNDEFINED;
2638 int bind_counts = 0;
2649 if (drm_to_vulkan_fmt(
desc->layers[
i].format) == VK_FORMAT_UNDEFINED) {
2651 desc->layers[
i].format);
2662 f->tiling = VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT;
2664 for (
int i = 0;
i <
desc->nb_layers;
i++) {
2668 VkSemaphoreTypeCreateInfo sem_type_info = {
2669 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
2670 .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
2673 VkSemaphoreCreateInfo sem_spawn = {
2674 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
2675 .pNext = &sem_type_info,
2680 VkImageDrmFormatModifierExplicitCreateInfoEXT ext_img_mod_spec = {
2681 .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT,
2682 .drmFormatModifier =
desc->objects[0].format_modifier,
2683 .drmFormatModifierPlaneCount =
planes,
2684 .pPlaneLayouts = (
const VkSubresourceLayout *)&ext_img_layouts,
2686 VkExternalMemoryImageCreateInfo ext_img_spec = {
2687 .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
2688 .pNext = &ext_img_mod_spec,
2689 .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
2691 VkImageCreateInfo create_info = {
2692 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
2693 .pNext = &ext_img_spec,
2694 .imageType = VK_IMAGE_TYPE_2D,
2695 .format = drm_to_vulkan_fmt(
desc->layers[
i].format),
2700 .tiling =
f->tiling,
2701 .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
2702 .usage = VK_IMAGE_USAGE_SAMPLED_BIT |
2703 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
2704 .samples = VK_SAMPLE_COUNT_1_BIT,
2705 .pQueueFamilyIndices = p->
qfs,
2706 .queueFamilyIndexCount = p->
num_qfs,
2707 .sharingMode = p->
num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
2708 VK_SHARING_MODE_EXCLUSIVE,
2712 VkExternalImageFormatProperties ext_props = {
2713 .sType = VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES_KHR,
2715 VkImageFormatProperties2 props_ret = {
2716 .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2,
2717 .pNext = &ext_props,
2719 VkPhysicalDeviceImageDrmFormatModifierInfoEXT props_drm_mod = {
2720 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT,
2721 .drmFormatModifier = ext_img_mod_spec.drmFormatModifier,
2722 .pQueueFamilyIndices = create_info.pQueueFamilyIndices,
2723 .queueFamilyIndexCount = create_info.queueFamilyIndexCount,
2724 .sharingMode = create_info.sharingMode,
2726 VkPhysicalDeviceExternalImageFormatInfo props_ext = {
2727 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO,
2728 .pNext = &props_drm_mod,
2729 .handleType = ext_img_spec.handleTypes,
2731 VkPhysicalDeviceImageFormatInfo2 fmt_props = {
2732 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
2733 .pNext = &props_ext,
2734 .format = create_info.format,
2735 .type = create_info.imageType,
2736 .tiling = create_info.tiling,
2737 .usage = create_info.usage,
2738 .flags = create_info.flags,
2742 ret = vk->GetPhysicalDeviceImageFormatProperties2(hwctx->
phys_dev,
2743 &fmt_props, &props_ret);
2744 if (
ret != VK_SUCCESS) {
2752 get_plane_wh(&create_info.extent.width, &create_info.extent.height,
2756 for (
int j = 0; j <
planes; j++) {
2757 ext_img_layouts[j].offset =
desc->layers[
i].planes[j].offset;
2758 ext_img_layouts[j].rowPitch =
desc->layers[
i].planes[j].pitch;
2759 ext_img_layouts[j].size = 0;
2760 ext_img_layouts[j].arrayPitch = 0;
2761 ext_img_layouts[j].depthPitch = 0;
2765 ret = vk->CreateImage(hwctx->
act_dev, &create_info,
2767 if (
ret != VK_SUCCESS) {
2774 ret = vk->CreateSemaphore(hwctx->
act_dev, &sem_spawn,
2776 if (
ret != VK_SUCCESS) {
2787 f->layout[
i] = create_info.initialLayout;
2789 f->sem_value[
i] = 0;
2792 for (
int i = 0;
i <
desc->nb_objects;
i++) {
2794 VkImageMemoryRequirementsInfo2 req_desc = {
2795 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
2798 VkMemoryDedicatedRequirements ded_req = {
2799 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
2801 VkMemoryRequirements2 req2 = {
2802 .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
2807 VkMemoryFdPropertiesKHR fdmp = {
2808 .sType = VK_STRUCTURE_TYPE_MEMORY_FD_PROPERTIES_KHR,
2810 VkImportMemoryFdInfoKHR idesc = {
2811 .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR,
2812 .fd = dup(
desc->objects[
i].fd),
2813 .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
2815 VkMemoryDedicatedAllocateInfo ded_alloc = {
2816 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
2818 .image = req_desc.image,
2822 ret = vk->GetMemoryFdPropertiesKHR(hwctx->
act_dev,
2823 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
2825 if (
ret != VK_SUCCESS) {
2833 vk->GetImageMemoryRequirements2(hwctx->
act_dev, &req_desc, &req2);
2836 req2.memoryRequirements.memoryTypeBits = fdmp.memoryTypeBits;
2839 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
2840 (ded_req.prefersDedicatedAllocation ||
2841 ded_req.requiresDedicatedAllocation) ?
2842 &ded_alloc : ded_alloc.pNext,
2843 &
f->flags, &
f->mem[
i]);
2849 f->size[
i] = req2.memoryRequirements.size;
2852 for (
int i = 0;
i <
desc->nb_layers;
i++) {
2854 for (
int j = 0; j <
planes; j++) {
2855 VkImageAspectFlagBits aspect = j == 0 ? VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT :
2856 j == 1 ? VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT :
2857 VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT;
2859 plane_info[bind_counts].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_PLANE_MEMORY_INFO;
2861 plane_info[bind_counts].planeAspect = aspect;
2863 bind_info[bind_counts].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
2865 bind_info[bind_counts].image =
f->img[
i];
2866 bind_info[bind_counts].memory =
f->mem[
desc->layers[
i].planes[j].object_index];
2869 bind_info[bind_counts].memoryOffset = 0;
2876 ret = vk->BindImageMemory2(hwctx->
act_dev, bind_counts, bind_info);
2877 if (
ret != VK_SUCCESS) {
2893 for (
int i = 0;
i <
desc->nb_layers;
i++) {
2897 for (
int i = 0;
i <
desc->nb_objects;
i++)
2911 if ((err = vulkan_map_from_drm_frame_desc(hwfc, &
f,
src)))
2915 dst->
data[0] = (uint8_t *)
f;
2920 &vulkan_unmap_from_drm,
f);
2942 VASurfaceID surface_id = (VASurfaceID)(uintptr_t)
src->data[3];
2948 vaSyncSurface(vaapi_ctx->display, surface_id);
2956 err = vulkan_map_from_drm(dst_fc, dst,
tmp,
flags);
2989 CudaFunctions *cu = cu_internal->
cuda_dl;
2990 CUarray_format cufmt =
desc->comp[0].depth > 8 ? CU_AD_FORMAT_UNSIGNED_INT16 :
2991 CU_AD_FORMAT_UNSIGNED_INT8;
2996 if (!dst_int || !dst_int->cuda_fc_ref) {
3004 if (!dst_int->cuda_fc_ref) {
3010 CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC tex_desc = {
3015 .NumChannels = 1 + ((
planes == 2) &&
i),
3023 CUDA_EXTERNAL_MEMORY_HANDLE_DESC ext_desc = {
3024 .type = IsWindows8OrGreater()
3025 ? CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32
3026 : CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT,
3027 .size = dst_f->
size[
i],
3029 VkMemoryGetWin32HandleInfoKHR export_info = {
3030 .sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR,
3031 .memory = dst_f->
mem[
i],
3032 .handleType = IsWindows8OrGreater()
3033 ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT
3034 : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT,
3036 VkSemaphoreGetWin32HandleInfoKHR sem_export = {
3037 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_WIN32_HANDLE_INFO_KHR,
3038 .semaphore = dst_f->
sem[
i],
3039 .handleType = IsWindows8OrGreater()
3040 ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT
3041 : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT,
3043 CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC ext_sem_desc = {
3047 ret = vk->GetMemoryWin32HandleKHR(hwctx->
act_dev, &export_info,
3048 &ext_desc.handle.win32.handle);
3049 if (
ret != VK_SUCCESS) {
3055 dst_int->ext_mem_handle[
i] = ext_desc.handle.win32.handle;
3057 CUDA_EXTERNAL_MEMORY_HANDLE_DESC ext_desc = {
3058 .type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD,
3059 .size = dst_f->
size[
i],
3061 VkMemoryGetFdInfoKHR export_info = {
3062 .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
3063 .memory = dst_f->
mem[
i],
3064 .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR,
3066 VkSemaphoreGetFdInfoKHR sem_export = {
3067 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR,
3068 .semaphore = dst_f->
sem[
i],
3069 .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
3071 CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC ext_sem_desc = {
3075 ret = vk->GetMemoryFdKHR(hwctx->
act_dev, &export_info,
3076 &ext_desc.handle.fd);
3077 if (
ret != VK_SUCCESS) {
3085 ret =
CHECK_CU(cu->cuImportExternalMemory(&dst_int->ext_mem[
i], &ext_desc));
3088 close(ext_desc.handle.fd);
3095 tex_desc.arrayDesc.Width = p_w;
3096 tex_desc.arrayDesc.Height = p_h;
3098 ret =
CHECK_CU(cu->cuExternalMemoryGetMappedMipmappedArray(&dst_int->cu_mma[
i],
3099 dst_int->ext_mem[
i],
3106 ret =
CHECK_CU(cu->cuMipmappedArrayGetLevel(&dst_int->cu_array[
i],
3107 dst_int->cu_mma[
i], 0));
3114 ret = vk->GetSemaphoreWin32HandleKHR(hwctx->
act_dev, &sem_export,
3115 &ext_sem_desc.handle.win32.handle);
3117 ret = vk->GetSemaphoreFdKHR(hwctx->
act_dev, &sem_export,
3118 &ext_sem_desc.handle.fd);
3120 if (
ret != VK_SUCCESS) {
3127 dst_int->ext_sem_handle[
i] = ext_sem_desc.handle.win32.handle;
3130 ret =
CHECK_CU(cu->cuImportExternalSemaphore(&dst_int->cu_sem[
i],
3134 close(ext_sem_desc.handle.fd);
3164 CudaFunctions *cu = cu_internal->
cuda_dl;
3174 err =
CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx));
3178 err = vulkan_export_to_cuda(hwfc,
src->hw_frames_ctx, dst);
3187 s_w_par[
i].params.fence.value = dst_f->
sem_value[
i] + 0;
3188 s_s_par[
i].params.fence.value = dst_f->
sem_value[
i] + 1;
3191 err =
CHECK_CU(cu->cuWaitExternalSemaphoresAsync(dst_int->cu_sem, s_w_par,
3192 planes, cuda_dev->stream));
3197 CUDA_MEMCPY2D cpy = {
3198 .srcMemoryType = CU_MEMORYTYPE_DEVICE,
3199 .srcDevice = (CUdeviceptr)
src->data[
i],
3200 .srcPitch =
src->linesize[
i],
3203 .dstMemoryType = CU_MEMORYTYPE_ARRAY,
3204 .dstArray = dst_int->cu_array[
i],
3210 cpy.WidthInBytes = p_w *
desc->comp[
i].step;
3213 err =
CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream));
3218 err =
CHECK_CU(cu->cuSignalExternalSemaphoresAsync(dst_int->cu_sem, s_s_par,
3219 planes, cuda_dev->stream));
3246 switch (
src->format) {
3251 return vulkan_map_from_vaapi(hwfc, dst,
src,
flags);
3257 return vulkan_map_from_drm(hwfc, dst,
src,
flags);
3267 typedef struct VulkanDRMMapping {
3282 static inline uint32_t vulkan_fmt_to_drm(VkFormat vkfmt)
3285 if (vulkan_drm_format_map[
i].vk_format == vkfmt)
3286 return vulkan_drm_format_map[
i].drm_fourcc;
3287 return DRM_FORMAT_INVALID;
3302 VkImageDrmFormatModifierPropertiesEXT drm_mod = {
3303 .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT,
3305 VkSemaphoreWaitInfo wait_info = {
3306 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO,
3308 .semaphoreCount =
planes,
3320 wait_info.pSemaphores =
f->sem;
3321 wait_info.pValues =
f->sem_value;
3323 vk->WaitSemaphores(hwctx->
act_dev, &wait_info, UINT64_MAX);
3329 ret = vk->GetImageDrmFormatModifierPropertiesEXT(hwctx->
act_dev,
f->img[0],
3331 if (
ret != VK_SUCCESS) {
3337 for (
int i = 0; (
i <
planes) && (
f->mem[
i]);
i++) {
3338 VkMemoryGetFdInfoKHR export_info = {
3339 .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
3340 .memory =
f->mem[
i],
3341 .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
3344 ret = vk->GetMemoryFdKHR(hwctx->
act_dev, &export_info,
3346 if (
ret != VK_SUCCESS) {
3359 VkSubresourceLayout
layout;
3360 VkImageSubresource
sub = {
3361 .aspectMask = VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT,
3365 drm_desc->
layers[
i].
format = vulkan_fmt_to_drm(plane_vkfmt);
3376 if (
f->tiling == VK_IMAGE_TILING_OPTIMAL)
3389 dst->
data[0] = (uint8_t *)drm_desc;
3437 return vulkan_map_to_drm(hwfc, dst,
src,
flags);
3443 return vulkan_map_to_vaapi(hwfc, dst,
src,
flags);
3486 VkBufferUsageFlags
usage, VkMemoryPropertyFlagBits
flags,
3487 size_t size, uint32_t req_memory_bits,
int host_mapped,
3488 void *create_pnext,
void *alloc_pnext)
3497 VkBufferCreateInfo buf_spawn = {
3498 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
3499 .pNext = create_pnext,
3502 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
3505 VkBufferMemoryRequirementsInfo2 req_desc = {
3506 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
3508 VkMemoryDedicatedAllocateInfo ded_alloc = {
3509 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
3510 .pNext = alloc_pnext,
3512 VkMemoryDedicatedRequirements ded_req = {
3513 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
3515 VkMemoryRequirements2 req = {
3516 .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
3527 if (
ret != VK_SUCCESS) {
3534 req_desc.buffer = vkbuf->
buf;
3536 vk->GetBufferMemoryRequirements2(hwctx->
act_dev, &req_desc, &req);
3539 use_ded_mem = ded_req.prefersDedicatedAllocation |
3540 ded_req.requiresDedicatedAllocation;
3542 ded_alloc.buffer = vkbuf->
buf;
3545 if (req_memory_bits)
3546 req.memoryRequirements.memoryTypeBits &= req_memory_bits;
3549 use_ded_mem ? &ded_alloc : (
void *)ded_alloc.pNext,
3555 if (
ret != VK_SUCCESS) {
3577 int nb_buffers,
int invalidate)
3584 int invalidate_count = 0;
3586 for (
int i = 0;
i < nb_buffers;
i++) {
3592 VK_WHOLE_SIZE, 0, (
void **)&mem[
i]);
3593 if (
ret != VK_SUCCESS) {
3603 for (
int i = 0;
i < nb_buffers;
i++) {
3605 const VkMappedMemoryRange ival_buf = {
3606 .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
3607 .memory = vkbuf->
mem,
3608 .size = VK_WHOLE_SIZE,
3617 if (vkbuf->
flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
3620 invalidate_ctx[invalidate_count++] = ival_buf;
3623 if (invalidate_count) {
3624 ret = vk->InvalidateMappedMemoryRanges(hwctx->
act_dev, invalidate_count,
3626 if (
ret != VK_SUCCESS)
3635 int nb_buffers,
int flush)
3643 int flush_count = 0;
3646 for (
int i = 0;
i < nb_buffers;
i++) {
3648 const VkMappedMemoryRange flush_buf = {
3649 .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
3650 .memory = vkbuf->
mem,
3651 .size = VK_WHOLE_SIZE,
3654 if (vkbuf->
flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
3657 flush_ctx[flush_count++] = flush_buf;
3662 ret = vk->FlushMappedMemoryRanges(hwctx->
act_dev, flush_count, flush_ctx);
3663 if (
ret != VK_SUCCESS) {
3670 for (
int i = 0;
i < nb_buffers;
i++) {
3683 const int *buf_stride,
int w,
3704 VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = {
3705 .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
3706 .pWaitSemaphoreValues =
frame->sem_value,
3707 .pSignalSemaphoreValues = sem_signal_values,
3708 .waitSemaphoreValueCount =
planes,
3709 .signalSemaphoreValueCount =
planes,
3712 VkSubmitInfo s_info = {
3713 .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
3714 .pNext = &s_timeline_sem_info,
3715 .pSignalSemaphores =
frame->sem,
3716 .pWaitSemaphores =
frame->sem,
3717 .pWaitDstStageMask = sem_wait_dst,
3718 .signalSemaphoreCount =
planes,
3719 .waitSemaphoreCount =
planes,
3723 sem_signal_values[
i] =
frame->sem_value[
i] + 1;
3730 VkImageLayout new_layout = to_buf ? VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL :
3731 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
3732 VkAccessFlags new_access = to_buf ? VK_ACCESS_TRANSFER_READ_BIT :
3733 VK_ACCESS_TRANSFER_WRITE_BIT;
3735 sem_wait_dst[
i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
3738 if ((
frame->layout[
i] == new_layout) && (
frame->access[
i] & new_access))
3741 img_bar[bar_num].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
3742 img_bar[bar_num].srcAccessMask = 0x0;
3743 img_bar[bar_num].dstAccessMask = new_access;
3744 img_bar[bar_num].oldLayout =
frame->layout[
i];
3745 img_bar[bar_num].newLayout = new_layout;
3746 img_bar[bar_num].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
3747 img_bar[bar_num].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
3748 img_bar[bar_num].image =
frame->img[
i];
3749 img_bar[bar_num].subresourceRange.levelCount = 1;
3750 img_bar[bar_num].subresourceRange.layerCount = 1;
3751 img_bar[bar_num].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
3753 frame->layout[
i] = img_bar[bar_num].newLayout;
3754 frame->access[
i] = img_bar[bar_num].dstAccessMask;
3760 vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
3761 VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
3762 0,
NULL, 0,
NULL, bar_num, img_bar);
3767 VkBufferImageCopy buf_reg = {
3768 .bufferOffset = buf_offsets[
i],
3769 .bufferRowLength = buf_stride[
i] /
desc->comp[
i].step,
3770 .imageSubresource.layerCount = 1,
3771 .imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
3772 .imageOffset = { 0, 0, 0, },
3778 buf_reg.bufferImageHeight = p_h;
3779 buf_reg.imageExtent = (VkExtent3D){ p_w, p_h, 1, };
3782 vk->CmdCopyImageToBuffer(cmd_buf,
frame->img[
i],
frame->layout[
i],
3783 vkbuf->
buf, 1, &buf_reg);
3785 vk->CmdCopyBufferToImage(cmd_buf, vkbuf->
buf,
frame->img[
i],
3786 frame->layout[
i], 1, &buf_reg);
3837 if (swf->width > hwfc->
width || swf->height > hwfc->
height)
3841 if (
f->tiling == VK_IMAGE_TILING_LINEAR &&
3842 f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
3846 map->format = swf->format;
3861 VkExternalMemoryBufferCreateInfo create_desc = {
3862 .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO,
3863 .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
3866 VkImportMemoryHostPointerInfoEXT import_desc = {
3867 .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT,
3868 .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
3871 VkMemoryHostPointerPropertiesEXT p_props = {
3872 .sType = VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT,
3875 get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height,
i);
3880 if (map_host && swf->linesize[
i] > 0) {
3882 offs = (uintptr_t)swf->data[
i] % p->
hprops.minImportedHostPointerAlignment;
3883 import_desc.pHostPointer = swf->data[
i] - offs;
3888 p->
hprops.minImportedHostPointerAlignment);
3890 ret = vk->GetMemoryHostPointerPropertiesEXT(hwctx->
act_dev,
3891 import_desc.handleType,
3892 import_desc.pHostPointer,
3895 if (
ret == VK_SUCCESS) {
3897 buf_offsets[
i] = offs;
3901 if (!host_mapped[
i])
3905 from ? VK_BUFFER_USAGE_TRANSFER_DST_BIT :
3906 VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
3907 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
3908 req_size, p_props.memoryTypeBits, host_mapped[
i],
3909 host_mapped[
i] ? &create_desc :
NULL,
3910 host_mapped[
i] ? &import_desc :
NULL);
3924 get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height,
i);
3927 (
const uint8_t *)swf->data[
i], swf->linesize[
i],
3938 swf->width, swf->height, swf->format,
from);
3949 get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height,
i);
3952 (
const uint8_t *)
tmp.data[
i],
tmp.linesize[
i],
3973 switch (
src->format) {
3977 if ((p->extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY) &&
3978 (p->extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM))
3983 return vulkan_transfer_data_from_cuda(hwfc, dst,
src);
3986 if (
src->hw_frames_ctx)
4009 CudaFunctions *cu = cu_internal->
cuda_dl;
4019 err =
CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx));
4032 s_w_par[
i].params.fence.value = dst_f->
sem_value[
i] + 0;
4033 s_s_par[
i].params.fence.value = dst_f->
sem_value[
i] + 1;
4036 err =
CHECK_CU(cu->cuWaitExternalSemaphoresAsync(dst_int->cu_sem, s_w_par,
4037 planes, cuda_dev->stream));
4042 CUDA_MEMCPY2D cpy = {
4043 .dstMemoryType = CU_MEMORYTYPE_DEVICE,
4044 .dstDevice = (CUdeviceptr)dst->
data[
i],
4048 .srcMemoryType = CU_MEMORYTYPE_ARRAY,
4049 .srcArray = dst_int->cu_array[
i],
4055 cpy.WidthInBytes =
w *
desc->comp[
i].step;
4058 err =
CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream));
4063 err =
CHECK_CU(cu->cuSignalExternalSemaphoresAsync(dst_int->cu_sem, s_s_par,
4064 planes, cuda_dev->stream));
4095 if ((p->extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY) &&
4096 (p->extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM))
4101 return vulkan_transfer_data_to_cuda(hwfc, dst,
src);