30 #include <drm_fourcc.h>
33 #include <va/va_drmcommon.h>
41 #define CHECK_CU(x) FF_CUDA_CHECK_DL(cuda_cu, cu, x)
65 VkPhysicalDeviceProperties2
props;
66 VkPhysicalDeviceMemoryProperties
mprops;
67 VkPhysicalDeviceExternalMemoryHostPropertiesEXT
hprops;
107 #define GET_QUEUE_COUNT(hwctx, graph, comp, tx) ( \
108 graph ? hwctx->nb_graphics_queues : \
109 comp ? (hwctx->nb_comp_queues ? \
110 hwctx->nb_comp_queues : hwctx->nb_graphics_queues) : \
111 tx ? (hwctx->nb_tx_queues ? hwctx->nb_tx_queues : \
112 (hwctx->nb_comp_queues ? \
113 hwctx->nb_comp_queues : hwctx->nb_graphics_queues)) : \
117 #define VK_LOAD_PFN(inst, name) PFN_##name pfn_##name = (PFN_##name) \
118 vkGetInstanceProcAddr(inst, #name)
120 #define DEFAULT_USAGE_FLAGS (VK_IMAGE_USAGE_SAMPLED_BIT | \
121 VK_IMAGE_USAGE_STORAGE_BIT | \
122 VK_IMAGE_USAGE_TRANSFER_SRC_BIT | \
123 VK_IMAGE_USAGE_TRANSFER_DST_BIT)
125 #define ADD_VAL_TO_LIST(list, count, val) \
127 list = av_realloc_array(list, sizeof(*list), ++count); \
129 err = AVERROR(ENOMEM); \
132 list[count - 1] = av_strdup(val); \
133 if (!list[count - 1]) { \
134 err = AVERROR(ENOMEM); \
139 static const struct {
151 {
AV_PIX_FMT_YUV420P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
152 {
AV_PIX_FMT_YUV422P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
153 {
AV_PIX_FMT_YUV444P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
172 {
AV_PIX_FMT_GBRPF32, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
193 VkFormatFeatureFlags
flags;
194 VkFormatProperties2 prop = {
195 .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
197 vkGetPhysicalDeviceFormatProperties2(hwctx->
phys_dev, fmt[
i], &prop);
198 flags =
linear ? prop.formatProperties.linearTilingFeatures :
199 prop.formatProperties.optimalTilingFeatures;
237 #define CASE(VAL) case VAL: return #VAL
243 CASE(VK_EVENT_RESET);
245 CASE(VK_ERROR_OUT_OF_HOST_MEMORY);
246 CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY);
247 CASE(VK_ERROR_INITIALIZATION_FAILED);
248 CASE(VK_ERROR_DEVICE_LOST);
249 CASE(VK_ERROR_MEMORY_MAP_FAILED);
250 CASE(VK_ERROR_LAYER_NOT_PRESENT);
251 CASE(VK_ERROR_EXTENSION_NOT_PRESENT);
252 CASE(VK_ERROR_FEATURE_NOT_PRESENT);
253 CASE(VK_ERROR_INCOMPATIBLE_DRIVER);
254 CASE(VK_ERROR_TOO_MANY_OBJECTS);
255 CASE(VK_ERROR_FORMAT_NOT_SUPPORTED);
256 CASE(VK_ERROR_FRAGMENTED_POOL);
257 CASE(VK_ERROR_SURFACE_LOST_KHR);
258 CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR);
259 CASE(VK_SUBOPTIMAL_KHR);
260 CASE(VK_ERROR_OUT_OF_DATE_KHR);
261 CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR);
262 CASE(VK_ERROR_VALIDATION_FAILED_EXT);
263 CASE(VK_ERROR_INVALID_SHADER_NV);
264 CASE(VK_ERROR_OUT_OF_POOL_MEMORY);
265 CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE);
266 CASE(VK_ERROR_NOT_PERMITTED_EXT);
267 CASE(VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT);
268 CASE(VK_ERROR_INVALID_DEVICE_ADDRESS_EXT);
269 CASE(VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT);
270 default:
return "Unknown error";
276 VkDebugUtilsMessageTypeFlagsEXT messageType,
277 const VkDebugUtilsMessengerCallbackDataEXT *
data,
284 case VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT: l =
AV_LOG_VERBOSE;
break;
285 case VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT: l =
AV_LOG_INFO;
break;
286 case VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT: l =
AV_LOG_WARNING;
break;
287 case VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT: l =
AV_LOG_ERROR;
break;
292 for (
int i = 0;
i <
data->cmdBufLabelCount;
i++)
299 const char *
const **dst, uint32_t *num,
int debug)
302 const char **extension_names =
NULL;
305 int err = 0, found, extensions_found = 0;
308 int optional_exts_num;
309 uint32_t sup_ext_count;
310 char *user_exts_str =
NULL;
312 VkExtensionProperties *sup_ext;
322 if (!user_exts_str) {
327 vkEnumerateInstanceExtensionProperties(
NULL, &sup_ext_count,
NULL);
328 sup_ext =
av_malloc_array(sup_ext_count,
sizeof(VkExtensionProperties));
331 vkEnumerateInstanceExtensionProperties(
NULL, &sup_ext_count, sup_ext);
339 if (!user_exts_str) {
344 vkEnumerateDeviceExtensionProperties(hwctx->
phys_dev,
NULL,
345 &sup_ext_count,
NULL);
346 sup_ext =
av_malloc_array(sup_ext_count,
sizeof(VkExtensionProperties));
349 vkEnumerateDeviceExtensionProperties(hwctx->
phys_dev,
NULL,
350 &sup_ext_count, sup_ext);
353 for (
int i = 0;
i < optional_exts_num;
i++) {
354 tstr = optional_exts[
i].
name;
356 for (
int j = 0; j < sup_ext_count; j++) {
357 if (!strcmp(tstr, sup_ext[j].extensionName)) {
371 tstr = VK_EXT_DEBUG_UTILS_EXTENSION_NAME;
373 for (
int j = 0; j < sup_ext_count; j++) {
374 if (!strcmp(tstr, sup_ext[j].extensionName)) {
391 char *save, *token =
av_strtok(user_exts_str,
"+", &save);
394 for (
int j = 0; j < sup_ext_count; j++) {
395 if (!strcmp(token, sup_ext[j].extensionName)) {
411 *dst = extension_names;
412 *num = extensions_found;
420 for (
int i = 0;
i < extensions_found;
i++)
421 av_free((
void *)extension_names[
i]);
436 const int debug_mode = debug_opt && strtol(debug_opt->
value,
NULL, 10);
437 VkApplicationInfo application_info = {
438 .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
439 .pEngineName =
"libavutil",
440 .apiVersion = VK_API_VERSION_1_1,
445 VkInstanceCreateInfo inst_props = {
446 .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
447 .pApplicationInfo = &application_info,
452 &inst_props.enabledExtensionCount, debug_mode);
457 static const char *layers[] = {
"VK_LAYER_KHRONOS_validation" };
458 inst_props.ppEnabledLayerNames = layers;
463 ret = vkCreateInstance(&inst_props, hwctx->
alloc, &hwctx->
inst);
466 if (
ret != VK_SUCCESS) {
469 for (
int i = 0;
i < inst_props.enabledExtensionCount;
i++)
470 av_free((
void *)inst_props.ppEnabledExtensionNames[
i]);
471 av_free((
void *)inst_props.ppEnabledExtensionNames);
476 VkDebugUtilsMessengerCreateInfoEXT dbg = {
477 .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT,
478 .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT |
479 VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT |
480 VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
481 VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT,
482 .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT |
483 VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
484 VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,
490 pfn_vkCreateDebugUtilsMessengerEXT(hwctx->
inst, &dbg,
512 case VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU:
return "integrated";
513 case VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU:
return "discrete";
514 case VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU:
return "virtual";
515 case VK_PHYSICAL_DEVICE_TYPE_CPU:
return "software";
516 default:
return "unknown";
523 int err = 0, choice = -1;
526 VkPhysicalDevice *devices =
NULL;
527 VkPhysicalDeviceIDProperties *idp =
NULL;
528 VkPhysicalDeviceProperties2 *prop =
NULL;
531 ret = vkEnumeratePhysicalDevices(hwctx->
inst, &num,
NULL);
532 if (
ret != VK_SUCCESS || !num) {
541 ret = vkEnumeratePhysicalDevices(hwctx->
inst, &num, devices);
542 if (
ret != VK_SUCCESS) {
562 for (
int i = 0;
i < num;
i++) {
563 idp[
i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES;
564 prop[
i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
565 prop[
i].pNext = &idp[
i];
567 vkGetPhysicalDeviceProperties2(devices[
i], &prop[
i]);
569 prop[
i].properties.deviceName,
571 prop[
i].properties.deviceID);
575 for (
int i = 0;
i < num;
i++) {
576 if (!strncmp(idp[
i].deviceUUID, select->
uuid, VK_UUID_SIZE)) {
584 }
else if (select->
name) {
586 for (
int i = 0;
i < num;
i++) {
587 if (strstr(prop[
i].properties.deviceName, select->
name)) {
598 for (
int i = 0;
i < num;
i++) {
599 if (select->
pci_device == prop[
i].properties.deviceID) {
610 for (
int i = 0;
i < num;
i++) {
611 if (select->
vendor_id == prop[
i].properties.vendorID) {
621 if (select->
index < num) {
622 choice = select->
index;
646 VkQueueFamilyProperties *qs =
NULL;
648 int graph_index = -1, comp_index = -1, tx_index = -1;
649 VkDeviceQueueCreateInfo *pc = (VkDeviceQueueCreateInfo *)cd->pQueueCreateInfos;
652 vkGetPhysicalDeviceQueueFamilyProperties(hwctx->
phys_dev, &num,
NULL);
664 vkGetPhysicalDeviceQueueFamilyProperties(hwctx->
phys_dev, &num, qs);
666 #define SEARCH_FLAGS(expr, out) \
667 for (int i = 0; i < num; i++) { \
668 const VkQueueFlagBits flags = qs[i].queueFlags; \
681 (
i != comp_index), tx_index)
684 #define ADD_QUEUE(fidx, graph, comp, tx) \
685 av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i (total queues: %i) for %s%s%s\n", \
686 fidx, qs[fidx].queueCount, graph ? "graphics " : "", \
687 comp ? "compute " : "", tx ? "transfers " : ""); \
688 av_log(ctx, AV_LOG_VERBOSE, " QF %i flags: %s%s%s%s\n", fidx, \
689 ((qs[fidx].queueFlags) & VK_QUEUE_GRAPHICS_BIT) ? "(graphics) " : "", \
690 ((qs[fidx].queueFlags) & VK_QUEUE_COMPUTE_BIT) ? "(compute) " : "", \
691 ((qs[fidx].queueFlags) & VK_QUEUE_TRANSFER_BIT) ? "(transfers) " : "", \
692 ((qs[fidx].queueFlags) & VK_QUEUE_SPARSE_BINDING_BIT) ? "(sparse) " : ""); \
693 pc[cd->queueCreateInfoCount].queueFamilyIndex = fidx; \
694 pc[cd->queueCreateInfoCount].queueCount = qs[fidx].queueCount; \
695 weights = av_malloc(qs[fidx].queueCount * sizeof(float)); \
696 pc[cd->queueCreateInfoCount].pQueuePriorities = weights; \
699 for (int i = 0; i < qs[fidx].queueCount; i++) \
701 cd->queueCreateInfoCount++;
703 ADD_QUEUE(graph_index, 1, comp_index < 0, tx_index < 0 && comp_index < 0)
709 if (comp_index != -1) {
710 ADD_QUEUE(comp_index, 0, 1, tx_index < 0)
716 if (tx_index != -1) {
737 int queue_family_index,
int num_queues)
743 .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
744 .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
745 .queueFamilyIndex = queue_family_index,
747 VkCommandBufferAllocateInfo cbuf_create = {
748 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
749 .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
750 .commandBufferCount = num_queues,
766 if (
ret != VK_SUCCESS) {
772 cbuf_create.commandPool = cmd->
pool;
775 ret = vkAllocateCommandBuffers(hwctx->
act_dev, &cbuf_create, cmd->
bufs);
776 if (
ret != VK_SUCCESS) {
782 for (
int i = 0;
i < num_queues;
i++) {
784 vkGetDeviceQueue(hwctx->
act_dev, queue_family_index,
i, &q->
queue);
800 vkWaitForFences(hwctx->
act_dev, 1, &q->
fence, VK_TRUE, UINT64_MAX);
843 VkCommandBufferBeginInfo cmd_start = {
844 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
845 .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
850 VkFenceCreateInfo fence_spawn = {
851 .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
855 if (
ret != VK_SUCCESS) {
861 vkWaitForFences(hwctx->
act_dev, 1, &q->
fence, VK_TRUE, UINT64_MAX);
869 if (
ret != VK_SUCCESS) {
884 if (!deps || !nb_deps)
894 for (
int i = 0;
i < nb_deps;
i++) {
909 VkSubmitInfo *s_info,
int synchronous)
915 if (
ret != VK_SUCCESS) {
923 s_info->commandBufferCount = 1;
926 if (
ret != VK_SUCCESS) {
935 vkWaitForFences(hwctx->
act_dev, 1, &q->
fence, VK_TRUE, UINT64_MAX);
954 pfn_vkDestroyDebugUtilsMessengerEXT(hwctx->
inst, p->
debug_ctx,
958 vkDestroyInstance(hwctx->
inst, hwctx->
alloc);
978 VkPhysicalDeviceFeatures dev_features = { 0 };
979 VkDeviceQueueCreateInfo queue_create_info[3] = {
980 { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, },
981 { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, },
982 { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, },
985 VkDeviceCreateInfo dev_info = {
986 .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
988 .pQueueCreateInfos = queue_create_info,
989 .queueCreateInfoCount = 0,
992 hwctx->
device_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
1003 vkGetPhysicalDeviceFeatures(hwctx->
phys_dev, &dev_features);
1004 #define COPY_FEATURE(DST, NAME) (DST).features.NAME = dev_features.NAME;
1016 &dev_info.enabledExtensionCount, 0))) {
1017 av_free((
void *)queue_create_info[0].pQueuePriorities);
1018 av_free((
void *)queue_create_info[1].pQueuePriorities);
1019 av_free((
void *)queue_create_info[2].pQueuePriorities);
1026 av_free((
void *)queue_create_info[0].pQueuePriorities);
1027 av_free((
void *)queue_create_info[1].pQueuePriorities);
1028 av_free((
void *)queue_create_info[2].pQueuePriorities);
1030 if (
ret != VK_SUCCESS) {
1033 for (
int i = 0;
i < dev_info.enabledExtensionCount;
i++)
1034 av_free((
void *)dev_info.ppEnabledExtensionNames[
i]);
1035 av_free((
void *)dev_info.ppEnabledExtensionNames);
1071 p->
props.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
1073 p->
hprops.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT;
1077 p->
props.properties.deviceName);
1080 p->
props.properties.limits.optimalBufferCopyRowPitchAlignment);
1082 p->
props.properties.limits.minMemoryMapAlignment);
1085 p->
hprops.minImportedHostPointerAlignment);
1089 vkGetPhysicalDeviceQueueFamilyProperties(hwctx->
phys_dev, &queue_num,
NULL);
1095 #define CHECK_QUEUE(type, n) \
1096 if (n >= queue_num) { \
1097 av_log(ctx, AV_LOG_ERROR, "Invalid %s queue index %i (device has %i queues)!\n", \
1098 type, n, queue_num); \
1099 return AVERROR(EINVAL); \
1117 vkGetPhysicalDeviceMemoryProperties(hwctx->
phys_dev, &p->
mprops);
1126 if (device && device[0]) {
1128 dev_select.
index = strtol(device, &
end, 10);
1129 if (
end == device) {
1130 dev_select.
index = 0;
1131 dev_select.
name = device;
1147 switch(src_ctx->
type) {
1153 const char *vendor = vaQueryVendorString(src_hwctx->
display);
1159 if (strstr(vendor,
"Intel"))
1160 dev_select.vendor_id = 0x8086;
1161 if (strstr(vendor,
"AMD"))
1162 dev_select.vendor_id = 0x1002;
1170 drmDevice *drm_dev_info;
1171 int err = drmGetDevice(src_hwctx->
fd, &drm_dev_info);
1177 if (drm_dev_info->bustype == DRM_BUS_PCI)
1178 dev_select.pci_device = drm_dev_info->deviceinfo.pci->device_id;
1180 drmFreeDevice(&drm_dev_info);
1190 CudaFunctions *cu = cu_internal->
cuda_dl;
1192 int ret =
CHECK_CU(cu->cuDeviceGetUuid((CUuuid *)&dev_select.uuid,
1199 dev_select.has_uuid = 1;
1210 const void *hwconfig,
1243 constraints->
max_width = p->
props.properties.limits.maxImageDimension2D;
1244 constraints->
max_height = p->
props.properties.limits.maxImageDimension2D;
1257 VkMemoryPropertyFlagBits req_flags,
const void *alloc_extension,
1258 VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
1264 VkMemoryAllocateInfo alloc_info = {
1265 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
1266 .pNext = alloc_extension,
1267 .allocationSize = req->size,
1272 for (
int i = 0;
i < p->
mprops.memoryTypeCount;
i++) {
1274 if (!(req->memoryTypeBits & (1 <<
i)))
1278 if ((p->
mprops.memoryTypes[
i].propertyFlags & req_flags) != req_flags)
1292 alloc_info.memoryTypeIndex =
index;
1294 ret = vkAllocateMemory(dev_hwctx->
act_dev, &alloc_info,
1295 dev_hwctx->
alloc, mem);
1296 if (
ret != VK_SUCCESS) {
1302 *mem_flags |= p->
mprops.memoryTypes[
index].propertyFlags;
1313 if (internal->cuda_fc_ref) {
1319 CudaFunctions *cu = cu_internal->
cuda_dl;
1322 if (internal->cu_sem[
i])
1323 CHECK_CU(cu->cuDestroyExternalSemaphore(internal->cu_sem[
i]));
1324 if (internal->cu_mma[
i])
1325 CHECK_CU(cu->cuMipmappedArrayDestroy(internal->cu_mma[
i]));
1326 if (internal->ext_mem[
i])
1327 CHECK_CU(cu->cuDestroyExternalMemory(internal->ext_mem[
i]));
1356 void *alloc_pnext,
size_t alloc_pnext_stride)
1369 VkImageMemoryRequirementsInfo2 req_desc = {
1370 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
1373 VkMemoryDedicatedAllocateInfo ded_alloc = {
1374 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
1375 .pNext = (
void *)(((
uint8_t *)alloc_pnext) +
i*alloc_pnext_stride),
1377 VkMemoryDedicatedRequirements ded_req = {
1378 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
1380 VkMemoryRequirements2 req = {
1381 .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
1385 vkGetImageMemoryRequirements2(hwctx->
act_dev, &req_desc, &req);
1387 if (
f->tiling == VK_IMAGE_TILING_LINEAR)
1388 req.memoryRequirements.size =
FFALIGN(req.memoryRequirements.size,
1389 p->
props.properties.limits.minMemoryMapAlignment);
1392 use_ded_mem = ded_req.prefersDedicatedAllocation |
1393 ded_req.requiresDedicatedAllocation;
1395 ded_alloc.image =
f->img[
i];
1399 f->tiling == VK_IMAGE_TILING_LINEAR ?
1400 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT :
1401 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
1402 use_ded_mem ? &ded_alloc : (
void *)ded_alloc.pNext,
1403 &
f->flags, &
f->mem[
i])))
1406 f->size[
i] = req.memoryRequirements.size;
1407 bind_info[
i].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
1408 bind_info[
i].image =
f->img[
i];
1409 bind_info[
i].memory =
f->mem[
i];
1414 if (
ret != VK_SUCCESS) {
1434 VkImageLayout new_layout;
1435 VkAccessFlags new_access;
1440 VkSubmitInfo s_info = {
1441 .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
1442 .pSignalSemaphores =
frame->sem,
1443 .signalSemaphoreCount =
planes,
1448 wait_st[
i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
1452 new_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
1453 new_access = VK_ACCESS_TRANSFER_WRITE_BIT;
1454 dst_qf = VK_QUEUE_FAMILY_IGNORED;
1457 new_layout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
1458 new_access = VK_ACCESS_TRANSFER_READ_BIT;
1459 dst_qf = VK_QUEUE_FAMILY_IGNORED;
1462 new_layout = VK_IMAGE_LAYOUT_GENERAL;
1463 new_access = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT;
1464 dst_qf = VK_QUEUE_FAMILY_EXTERNAL_KHR;
1465 s_info.pWaitSemaphores =
frame->sem;
1466 s_info.pWaitDstStageMask = wait_st;
1467 s_info.waitSemaphoreCount =
planes;
1478 img_bar[
i].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
1479 img_bar[
i].srcAccessMask = 0x0;
1480 img_bar[
i].dstAccessMask = new_access;
1481 img_bar[
i].oldLayout =
frame->layout[
i];
1482 img_bar[
i].newLayout = new_layout;
1483 img_bar[
i].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1484 img_bar[
i].dstQueueFamilyIndex = dst_qf;
1485 img_bar[
i].image =
frame->img[
i];
1486 img_bar[
i].subresourceRange.levelCount = 1;
1487 img_bar[
i].subresourceRange.layerCount = 1;
1488 img_bar[
i].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
1490 frame->layout[
i] = img_bar[
i].newLayout;
1491 frame->access[
i] = img_bar[
i].dstAccessMask;
1495 VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
1496 VK_PIPELINE_STAGE_TRANSFER_BIT,
1503 VkImageTiling tiling, VkImageUsageFlagBits
usage,
1515 VkExportSemaphoreCreateInfo ext_sem_info = {
1516 .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO,
1517 .handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
1520 VkSemaphoreCreateInfo sem_spawn = {
1521 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
1539 VkImageCreateInfo image_create_info = {
1540 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
1541 .pNext = create_pnext,
1542 .imageType = VK_IMAGE_TYPE_2D,
1543 .format = img_fmts[
i],
1544 .extent.width = p_w,
1545 .extent.height = p_h,
1549 .flags = VK_IMAGE_CREATE_ALIAS_BIT,
1551 .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
1553 .samples = VK_SAMPLE_COUNT_1_BIT,
1554 .pQueueFamilyIndices = p->
qfs,
1555 .queueFamilyIndexCount = p->
num_qfs,
1556 .sharingMode = p->
num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
1557 VK_SHARING_MODE_EXCLUSIVE,
1560 ret = vkCreateImage(hwctx->
act_dev, &image_create_info,
1562 if (
ret != VK_SUCCESS) {
1570 ret = vkCreateSemaphore(hwctx->
act_dev, &sem_spawn,
1572 if (
ret != VK_SUCCESS) {
1578 f->layout[
i] = image_create_info.initialLayout;
1595 VkExternalMemoryHandleTypeFlags *comp_handle_types,
1596 VkExternalMemoryHandleTypeFlagBits *iexp,
1597 VkExternalMemoryHandleTypeFlagBits
exp)
1602 VkExternalImageFormatProperties eprops = {
1603 .sType = VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES_KHR,
1605 VkImageFormatProperties2 props = {
1606 .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2,
1609 VkPhysicalDeviceExternalImageFormatInfo enext = {
1610 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO,
1613 VkPhysicalDeviceImageFormatInfo2 pinfo = {
1614 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
1615 .pNext = !
exp ?
NULL : &enext,
1617 .type = VK_IMAGE_TYPE_2D,
1619 .usage = hwctx->
usage,
1620 .flags = VK_IMAGE_CREATE_ALIAS_BIT,
1623 ret = vkGetPhysicalDeviceImageFormatProperties2(dev_hwctx->
phys_dev,
1625 if (
ret == VK_SUCCESS) {
1627 *comp_handle_types |= eprops.externalMemoryProperties.compatibleHandleTypes;
1641 VkExternalMemoryHandleTypeFlags e = 0x0;
1643 VkExternalMemoryImageCreateInfo eiinfo = {
1644 .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
1650 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT);
1654 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
1657 eminfo[
i].sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO;
1659 eminfo[
i].handleTypes = e;
1663 eiinfo.handleTypes ? &eiinfo :
NULL);
1707 VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL;
1801 !(
map->frame->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
1806 flush_ranges[
i].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
1807 flush_ranges[
i].memory =
map->frame->mem[
i];
1808 flush_ranges[
i].size = VK_WHOLE_SIZE;
1813 if (
ret != VK_SUCCESS) {
1820 vkUnmapMemory(hwctx->
act_dev,
map->frame->mem[
i]);
1829 int err, mapped_mem_count = 0;
1845 if (!(
f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) ||
1846 !(
f->tiling == VK_IMAGE_TILING_LINEAR)) {
1857 ret = vkMapMemory(hwctx->act_dev,
f->mem[
i], 0,
1858 VK_WHOLE_SIZE, 0, (
void **)&dst->
data[
i]);
1859 if (
ret != VK_SUCCESS) {
1870 !(
f->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
1873 map_mem_ranges[
i].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
1874 map_mem_ranges[
i].size = VK_WHOLE_SIZE;
1875 map_mem_ranges[
i].memory =
f->mem[
i];
1878 ret = vkInvalidateMappedMemoryRanges(hwctx->act_dev,
planes,
1880 if (
ret != VK_SUCCESS) {
1889 VkImageSubresource sub = {
1890 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
1892 VkSubresourceLayout
layout;
1893 vkGetImageSubresourceLayout(hwctx->act_dev,
f->img[
i], &sub, &
layout);
1908 for (
int i = 0;
i < mapped_mem_count;
i++)
1909 vkUnmapMemory(hwctx->act_dev,
f->mem[
i]);
1931 static const struct {
1932 uint32_t drm_fourcc;
1934 } vulkan_drm_format_map[] = {
1935 { DRM_FORMAT_R8, VK_FORMAT_R8_UNORM },
1936 { DRM_FORMAT_R16, VK_FORMAT_R16_UNORM },
1937 { DRM_FORMAT_GR88, VK_FORMAT_R8G8_UNORM },
1938 { DRM_FORMAT_RG88, VK_FORMAT_R8G8_UNORM },
1939 { DRM_FORMAT_GR1616, VK_FORMAT_R16G16_UNORM },
1940 { DRM_FORMAT_RG1616, VK_FORMAT_R16G16_UNORM },
1941 { DRM_FORMAT_ARGB8888, VK_FORMAT_B8G8R8A8_UNORM },
1942 { DRM_FORMAT_XRGB8888, VK_FORMAT_B8G8R8A8_UNORM },
1943 { DRM_FORMAT_ABGR8888, VK_FORMAT_R8G8B8A8_UNORM },
1944 { DRM_FORMAT_XBGR8888, VK_FORMAT_R8G8B8A8_UNORM },
1947 static inline VkFormat drm_to_vulkan_fmt(uint32_t drm_fourcc)
1950 if (vulkan_drm_format_map[
i].drm_fourcc == drm_fourcc)
1951 return vulkan_drm_format_map[
i].vk_format;
1952 return VK_FORMAT_UNDEFINED;
1961 int bind_counts = 0;
1972 VkExternalMemoryHandleTypeFlagBits
htype = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
1976 for (
int i = 0;
i <
desc->nb_layers;
i++) {
1977 if (drm_to_vulkan_fmt(
desc->layers[
i].format) == VK_FORMAT_UNDEFINED) {
1979 desc->layers[
i].format);
1990 f->tiling = has_modifiers ? VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT :
1991 desc->objects[0].format_modifier == DRM_FORMAT_MOD_LINEAR ?
1992 VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL;
1994 for (
int i = 0;
i <
desc->nb_layers;
i++) {
1996 VkImageDrmFormatModifierExplicitCreateInfoEXT drm_info = {
1997 .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT,
1998 .drmFormatModifier =
desc->objects[0].format_modifier,
1999 .drmFormatModifierPlaneCount =
planes,
2000 .pPlaneLayouts = (
const VkSubresourceLayout *)&plane_data,
2003 VkExternalMemoryImageCreateInfo einfo = {
2004 .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
2005 .pNext = has_modifiers ? &drm_info :
NULL,
2006 .handleTypes =
htype,
2009 VkSemaphoreCreateInfo sem_spawn = {
2010 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
2016 VkImageCreateInfo image_create_info = {
2017 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
2019 .imageType = VK_IMAGE_TYPE_2D,
2020 .format = drm_to_vulkan_fmt(
desc->layers[
i].format),
2021 .extent.width = p_w,
2022 .extent.height = p_h,
2026 .flags = VK_IMAGE_CREATE_ALIAS_BIT,
2027 .tiling =
f->tiling,
2028 .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
2029 .usage = frames_hwctx->
usage,
2030 .samples = VK_SAMPLE_COUNT_1_BIT,
2031 .pQueueFamilyIndices = p->
qfs,
2032 .queueFamilyIndexCount = p->
num_qfs,
2033 .sharingMode = p->
num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
2034 VK_SHARING_MODE_EXCLUSIVE,
2037 for (
int j = 0; j <
planes; j++) {
2038 plane_data[j].offset =
desc->layers[
i].planes[j].offset;
2039 plane_data[j].rowPitch =
desc->layers[
i].planes[j].pitch;
2040 plane_data[j].size = 0;
2041 plane_data[j].arrayPitch = 0;
2042 plane_data[j].depthPitch = 0;
2046 ret = vkCreateImage(hwctx->
act_dev, &image_create_info,
2048 if (
ret != VK_SUCCESS) {
2055 ret = vkCreateSemaphore(hwctx->
act_dev, &sem_spawn,
2057 if (
ret != VK_SUCCESS) {
2068 f->layout[
i] = image_create_info.initialLayout;
2072 for (
int i = 0;
i <
desc->nb_objects;
i++) {
2073 int use_ded_mem = 0;
2074 VkMemoryFdPropertiesKHR fdmp = {
2075 .sType = VK_STRUCTURE_TYPE_MEMORY_FD_PROPERTIES_KHR,
2077 VkMemoryRequirements req = {
2078 .size =
desc->objects[
i].size,
2080 VkImportMemoryFdInfoKHR idesc = {
2081 .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR,
2082 .handleType =
htype,
2083 .fd = dup(
desc->objects[
i].fd),
2085 VkMemoryDedicatedAllocateInfo ded_alloc = {
2086 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
2092 if (
ret != VK_SUCCESS) {
2100 req.memoryTypeBits = fdmp.memoryTypeBits;
2105 if (
desc->nb_layers ==
desc->nb_objects) {
2106 VkImageMemoryRequirementsInfo2 req_desc = {
2107 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
2110 VkMemoryDedicatedRequirements ded_req = {
2111 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
2113 VkMemoryRequirements2 req2 = {
2114 .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
2118 vkGetImageMemoryRequirements2(hwctx->
act_dev, &req_desc, &req2);
2120 use_ded_mem = ded_req.prefersDedicatedAllocation |
2121 ded_req.requiresDedicatedAllocation;
2123 ded_alloc.image =
f->img[
i];
2126 err =
alloc_mem(
ctx, &req, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
2127 use_ded_mem ? &ded_alloc : ded_alloc.pNext,
2128 &
f->flags, &
f->mem[
i]);
2134 f->size[
i] =
desc->objects[
i].size;
2137 for (
int i = 0;
i <
desc->nb_layers;
i++) {
2139 const int signal_p = has_modifiers && (
planes > 1);
2140 for (
int j = 0; j <
planes; j++) {
2141 VkImageAspectFlagBits aspect = j == 0 ? VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT :
2142 j == 1 ? VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT :
2143 VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT;
2145 plane_info[bind_counts].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_PLANE_MEMORY_INFO;
2146 plane_info[bind_counts].planeAspect = aspect;
2148 bind_info[bind_counts].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
2149 bind_info[bind_counts].pNext = signal_p ? &
plane_info[bind_counts] :
NULL;
2150 bind_info[bind_counts].image =
f->img[
i];
2151 bind_info[bind_counts].memory =
f->mem[
desc->layers[
i].planes[j].object_index];
2152 bind_info[bind_counts].memoryOffset =
desc->layers[
i].planes[j].offset;
2158 ret = vkBindImageMemory2(hwctx->
act_dev, bind_counts, bind_info);
2159 if (
ret != VK_SUCCESS) {
2177 for (
int i = 0;
i <
desc->nb_layers;
i++) {
2181 for (
int i = 0;
i <
desc->nb_objects;
i++)
2196 err = vulkan_map_from_drm_frame_desc(hwfc, &
f,
2214 &vulkan_unmap_from,
map);
2237 VASurfaceID surface_id = (VASurfaceID)(uintptr_t)
src->data[3];
2243 vaSyncSurface(vaapi_ctx->display, surface_id);
2251 err = vulkan_map_from_drm(dst_fc, dst,
tmp,
flags);
2284 CudaFunctions *cu = cu_internal->
cuda_dl;
2285 CUarray_format cufmt =
desc->comp[0].depth > 8 ? CU_AD_FORMAT_UNSIGNED_INT16 :
2286 CU_AD_FORMAT_UNSIGNED_INT8;
2291 if (!dst_int || !dst_int->cuda_fc_ref) {
2301 if (!dst_int->cuda_fc_ref) {
2307 CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC tex_desc = {
2316 .NumChannels = 1 + ((
planes == 2) &&
i),
2321 CUDA_EXTERNAL_MEMORY_HANDLE_DESC ext_desc = {
2322 .type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD,
2323 .size = dst_f->
size[
i],
2325 VkMemoryGetFdInfoKHR export_info = {
2326 .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
2327 .memory = dst_f->
mem[
i],
2328 .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR,
2330 VkSemaphoreGetFdInfoKHR sem_export = {
2331 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR,
2332 .semaphore = dst_f->
sem[
i],
2333 .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
2335 CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC ext_sem_desc = {
2336 .type = CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD,
2339 ret = pfn_vkGetMemoryFdKHR(hwctx->
act_dev, &export_info,
2340 &ext_desc.handle.fd);
2341 if (
ret != VK_SUCCESS) {
2347 ret =
CHECK_CU(cu->cuImportExternalMemory(&dst_int->ext_mem[
i], &ext_desc));
2353 ret =
CHECK_CU(cu->cuExternalMemoryGetMappedMipmappedArray(&dst_int->cu_mma[
i],
2354 dst_int->ext_mem[
i],
2361 ret =
CHECK_CU(cu->cuMipmappedArrayGetLevel(&dst_int->cu_array[
i],
2362 dst_int->cu_mma[
i], 0));
2368 ret = pfn_vkGetSemaphoreFdKHR(hwctx->
act_dev, &sem_export,
2369 &ext_sem_desc.handle.fd);
2370 if (
ret != VK_SUCCESS) {
2377 ret =
CHECK_CU(cu->cuImportExternalSemaphore(&dst_int->cu_sem[
i],
2407 CudaFunctions *cu = cu_internal->
cuda_dl;
2411 ret =
CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx));
2419 ret = vulkan_export_to_cuda(hwfc,
src->hw_frames_ctx, dst);
2425 ret =
CHECK_CU(cu->cuWaitExternalSemaphoresAsync(dst_int->cu_sem, s_w_par,
2426 planes, cuda_dev->stream));
2433 CUDA_MEMCPY2D cpy = {
2434 .srcMemoryType = CU_MEMORYTYPE_DEVICE,
2435 .srcDevice = (CUdeviceptr)
src->data[
i],
2436 .srcPitch =
src->linesize[
i],
2439 .dstMemoryType = CU_MEMORYTYPE_ARRAY,
2440 .dstArray = dst_int->cu_array[
i],
2447 ret =
CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream));
2454 ret =
CHECK_CU(cu->cuSignalExternalSemaphoresAsync(dst_int->cu_sem, s_s_par,
2455 planes, cuda_dev->stream));
2481 switch (
src->format) {
2486 return vulkan_map_from_vaapi(hwfc, dst,
src,
flags);
2490 return vulkan_map_from_drm(hwfc, dst,
src,
flags);
2498 typedef struct VulkanDRMMapping {
2513 static inline uint32_t vulkan_fmt_to_drm(VkFormat vkfmt)
2516 if (vulkan_drm_format_map[
i].vk_format == vkfmt)
2517 return vulkan_drm_format_map[
i].drm_fourcc;
2518 return DRM_FORMAT_INVALID;
2532 VkImageDrmFormatModifierPropertiesEXT drm_mod = {
2533 .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT,
2550 ret = pfn_vkGetImageDrmFormatModifierPropertiesEXT(hwctx->
act_dev,
f->img[0],
2552 if (
ret != VK_SUCCESS) {
2559 for (
int i = 0; (
i <
planes) && (
f->mem[
i]);
i++) {
2560 VkMemoryGetFdInfoKHR export_info = {
2561 .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
2562 .memory =
f->mem[
i],
2563 .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
2566 ret = pfn_vkGetMemoryFdKHR(hwctx->
act_dev, &export_info,
2568 if (
ret != VK_SUCCESS) {
2581 VkSubresourceLayout
layout;
2582 VkImageSubresource sub = {
2584 VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT :
2585 VK_IMAGE_ASPECT_COLOR_BIT,
2589 drm_desc->
layers[
i].
format = vulkan_fmt_to_drm(plane_vkfmt);
2600 if (
f->tiling == VK_IMAGE_TILING_OPTIMAL)
2603 vkGetImageSubresourceLayout(hwctx->
act_dev,
f->img[
i], &sub, &
layout);
2658 return vulkan_map_to_drm(hwfc, dst,
src,
flags);
2662 return vulkan_map_to_vaapi(hwfc, dst,
src,
flags);
2693 VkMemoryPropertyFlagBits
flags,
void *create_pnext,
2702 VkBufferCreateInfo buf_spawn = {
2703 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
2704 .pNext = create_pnext,
2706 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
2709 VkBufferMemoryRequirementsInfo2 req_desc = {
2710 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
2712 VkMemoryDedicatedAllocateInfo ded_alloc = {
2713 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
2714 .pNext = alloc_pnext,
2716 VkMemoryDedicatedRequirements ded_req = {
2717 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
2719 VkMemoryRequirements2 req = {
2720 .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
2732 buf_spawn.size =
height*(*stride);
2733 buf_spawn.size =
FFALIGN(buf_spawn.size, p->
props.properties.limits.minMemoryMapAlignment);
2735 buf_spawn.size = imp_size;
2739 if (
ret != VK_SUCCESS) {
2745 req_desc.buffer = vkbuf->
buf;
2747 vkGetBufferMemoryRequirements2(hwctx->
act_dev, &req_desc, &req);
2750 use_ded_mem = ded_req.prefersDedicatedAllocation |
2751 ded_req.requiresDedicatedAllocation;
2753 ded_alloc.buffer = vkbuf->
buf;
2756 use_ded_mem ? &ded_alloc : (
void *)ded_alloc.pNext,
2762 if (
ret != VK_SUCCESS) {
2780 int nb_buffers,
int invalidate)
2785 int invalidate_count = 0;
2787 for (
int i = 0;
i < nb_buffers;
i++) {
2793 VK_WHOLE_SIZE, 0, (
void **)&mem[
i]);
2794 if (
ret != VK_SUCCESS) {
2804 for (
int i = 0;
i < nb_buffers;
i++) {
2806 const VkMappedMemoryRange ival_buf = {
2807 .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
2808 .memory = vkbuf->
mem,
2809 .size = VK_WHOLE_SIZE,
2811 if (vkbuf->
flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
2813 invalidate_ctx[invalidate_count++] = ival_buf;
2816 if (invalidate_count) {
2817 ret = vkInvalidateMappedMemoryRanges(hwctx->
act_dev, invalidate_count,
2819 if (
ret != VK_SUCCESS)
2828 int nb_buffers,
int flush)
2834 int flush_count = 0;
2837 for (
int i = 0;
i < nb_buffers;
i++) {
2839 const VkMappedMemoryRange flush_buf = {
2840 .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
2841 .memory = vkbuf->
mem,
2842 .size = VK_WHOLE_SIZE,
2844 if (vkbuf->
flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
2846 flush_ctx[flush_count++] = flush_buf;
2851 ret = vkFlushMappedMemoryRanges(hwctx->
act_dev, flush_count, flush_ctx);
2852 if (
ret != VK_SUCCESS) {
2859 for (
int i = 0;
i < nb_buffers;
i++) {
2888 VkSubmitInfo s_info = {
2889 .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
2890 .pSignalSemaphores =
frame->sem,
2891 .pWaitSemaphores =
frame->sem,
2892 .pWaitDstStageMask = sem_wait_dst,
2893 .signalSemaphoreCount =
planes,
2894 .waitSemaphoreCount =
planes,
2902 VkImageLayout new_layout = to_buf ? VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL :
2903 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
2904 VkAccessFlags new_access = to_buf ? VK_ACCESS_TRANSFER_READ_BIT :
2905 VK_ACCESS_TRANSFER_WRITE_BIT;
2907 sem_wait_dst[
i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
2910 if ((
frame->layout[
i] == new_layout) && (
frame->access[
i] & new_access))
2913 img_bar[bar_num].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
2914 img_bar[bar_num].srcAccessMask = 0x0;
2915 img_bar[bar_num].dstAccessMask = new_access;
2916 img_bar[bar_num].oldLayout =
frame->layout[
i];
2917 img_bar[bar_num].newLayout = new_layout;
2918 img_bar[bar_num].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
2919 img_bar[bar_num].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
2920 img_bar[bar_num].image =
frame->img[
i];
2921 img_bar[bar_num].subresourceRange.levelCount = 1;
2922 img_bar[bar_num].subresourceRange.layerCount = 1;
2923 img_bar[bar_num].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
2925 frame->layout[
i] = img_bar[bar_num].newLayout;
2926 frame->access[
i] = img_bar[bar_num].dstAccessMask;
2932 vkCmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
2933 VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
2934 0,
NULL, 0,
NULL, bar_num, img_bar);
2941 VkBufferImageCopy buf_reg = {
2947 .bufferRowLength = buf_stride[
i] /
desc->comp[
i].step,
2948 .bufferImageHeight = p_h,
2949 .imageSubresource.layerCount = 1,
2950 .imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
2951 .imageOffset = { 0, 0, 0, },
2952 .imageExtent = { p_w, p_h, 1, },
2956 vkCmdCopyImageToBuffer(cmd_buf,
frame->img[
i],
frame->layout[
i],
2957 vkbuf->
buf, 1, &buf_reg);
2959 vkCmdCopyBufferToImage(cmd_buf, vkbuf->
buf,
frame->img[
i],
2960 frame->layout[
i], 1, &buf_reg);
3008 if (
f->tiling == VK_IMAGE_TILING_LINEAR &&
3009 f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
3013 map->format =
src->format;
3026 int h =
src->height;
3028 size_t p_size =
FFABS(
src->linesize[
i]) * p_height;
3030 VkImportMemoryHostPointerInfoEXT import_desc = {
3031 .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT,
3032 .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
3033 .pHostPointer =
src->data[
i],
3038 host_mapped[
i] = map_host &&
src->linesize[
i] > 0 &&
3039 !(p_size % p->
hprops.minImportedHostPointerAlignment) &&
3040 !(((uintptr_t)import_desc.pHostPointer) %
3041 p->
hprops.minImportedHostPointerAlignment);
3042 p_size = host_mapped[
i] ? p_size : 0;
3045 err =
create_buf(dev_ctx, &bufs[
i], p_size, p_height, &
tmp.linesize[
i],
3046 VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
3047 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
NULL,
3048 host_mapped[
i] ? &import_desc :
NULL);
3058 int h =
src->height;
3075 src->width,
src->height,
src->format, 0);
3089 switch (
src->format) {
3094 return vulkan_transfer_data_from_cuda(hwfc, dst,
src);
3097 if (
src->hw_frames_ctx)
3120 CudaFunctions *cu = cu_internal->
cuda_dl;
3122 ret =
CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx));
3138 CUDA_MEMCPY2D cpy = {
3139 .dstMemoryType = CU_MEMORYTYPE_DEVICE,
3140 .dstDevice = (CUdeviceptr)dst->
data[
i],
3144 .srcMemoryType = CU_MEMORYTYPE_ARRAY,
3145 .srcArray = dst_int->cu_array[
i],
3152 ret =
CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream));
3192 if (
f->tiling == VK_IMAGE_TILING_LINEAR &&
3193 f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
3214 VkImportMemoryHostPointerInfoEXT import_desc = {
3215 .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT,
3216 .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
3217 .pHostPointer = dst->
data[
i],
3222 host_mapped[
i] = map_host && dst->
linesize[
i] > 0 &&
3223 !(p_size % p->
hprops.minImportedHostPointerAlignment) &&
3224 !(((uintptr_t)import_desc.pHostPointer) %
3225 p->
hprops.minImportedHostPointerAlignment);
3226 p_size = host_mapped[
i] ? p_size : 0;
3229 err =
create_buf(dev_ctx, &bufs[
i], p_size, p_height,
3230 &
tmp.linesize[
i], VK_BUFFER_USAGE_TRANSFER_DST_BIT,
3231 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
NULL,
3232 host_mapped[
i] ? &import_desc :
NULL);
3278 return vulkan_transfer_data_to_cuda(hwfc, dst,
src);