28 #define FN_CREATING(ctx, type, shortname, array, num) \
29 static av_always_inline type *create_ ##shortname(ctx *dctx) \
31 type **array, *sctx = av_mallocz(sizeof(*sctx)); \
35 array = av_realloc_array(dctx->array, sizeof(*dctx->array), dctx->num + 1);\
41 dctx->array = array; \
42 dctx->array[dctx->num++] = sctx; \
48 .r = VK_COMPONENT_SWIZZLE_IDENTITY,
49 .g = VK_COMPONENT_SWIZZLE_IDENTITY,
50 .b = VK_COMPONENT_SWIZZLE_IDENTITY,
51 .a = VK_COMPONENT_SWIZZLE_IDENTITY,
57 #define CASE(VAL) case VAL: return #VAL
65 CASE(VK_ERROR_OUT_OF_HOST_MEMORY);
66 CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY);
67 CASE(VK_ERROR_INITIALIZATION_FAILED);
68 CASE(VK_ERROR_DEVICE_LOST);
69 CASE(VK_ERROR_MEMORY_MAP_FAILED);
70 CASE(VK_ERROR_LAYER_NOT_PRESENT);
71 CASE(VK_ERROR_EXTENSION_NOT_PRESENT);
72 CASE(VK_ERROR_FEATURE_NOT_PRESENT);
73 CASE(VK_ERROR_INCOMPATIBLE_DRIVER);
74 CASE(VK_ERROR_TOO_MANY_OBJECTS);
75 CASE(VK_ERROR_FORMAT_NOT_SUPPORTED);
76 CASE(VK_ERROR_FRAGMENTED_POOL);
77 CASE(VK_ERROR_SURFACE_LOST_KHR);
78 CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR);
79 CASE(VK_SUBOPTIMAL_KHR);
80 CASE(VK_ERROR_OUT_OF_DATE_KHR);
81 CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR);
82 CASE(VK_ERROR_VALIDATION_FAILED_EXT);
83 CASE(VK_ERROR_INVALID_SHADER_NV);
84 CASE(VK_ERROR_OUT_OF_POOL_MEMORY);
85 CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE);
86 CASE(VK_ERROR_NOT_PERMITTED_EXT);
87 default:
return "Unknown error";
93 VkQueueFlagBits dev_family,
int nb_queues)
98 case VK_QUEUE_GRAPHICS_BIT:
102 case VK_QUEUE_COMPUTE_BIT:
106 case VK_QUEUE_TRANSFER_BIT:
110 case VK_QUEUE_VIDEO_ENCODE_BIT_KHR:
114 case VK_QUEUE_VIDEO_DECODE_BIT_KHR:
136 VkMemoryPropertyFlagBits req_flags,
void *alloc_extension,
137 VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
144 VkMemoryAllocateInfo alloc_info = {
145 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
146 .pNext = alloc_extension,
150 if (req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
151 req->size =
FFALIGN(req->size,
s->props.limits.minMemoryMapAlignment);
153 alloc_info.allocationSize = req->size;
157 for (
int i = 0;
i <
s->mprops.memoryTypeCount;
i++) {
159 if (!(req->memoryTypeBits & (1 <<
i)))
163 if ((
s->mprops.memoryTypes[
i].propertyFlags & req_flags) != req_flags)
177 alloc_info.memoryTypeIndex =
index;
179 ret = vk->AllocateMemory(
s->hwctx->act_dev, &alloc_info,
180 s->hwctx->alloc, mem);
181 if (
ret != VK_SUCCESS) {
187 *mem_flags |=
s->mprops.memoryTypes[
index].propertyFlags;
193 VkBufferUsageFlags
usage, VkMemoryPropertyFlagBits
flags)
201 VkBufferCreateInfo buf_spawn = {
202 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
205 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
210 VkBufferMemoryRequirementsInfo2 req_desc = {
211 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
213 VkMemoryDedicatedAllocateInfo ded_alloc = {
214 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
217 VkMemoryDedicatedRequirements ded_req = {
218 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
220 VkMemoryRequirements2 req = {
221 .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
225 ret = vk->CreateBuffer(
s->hwctx->act_dev, &buf_spawn,
NULL, &buf->
buf);
226 if (
ret != VK_SUCCESS) {
232 req_desc.buffer = buf->
buf;
234 vk->GetBufferMemoryRequirements2(
s->hwctx->act_dev, &req_desc, &req);
237 use_ded_mem = ded_req.prefersDedicatedAllocation |
238 ded_req.requiresDedicatedAllocation;
240 ded_alloc.buffer = buf->
buf;
243 use_ded_mem ? &ded_alloc : (
void *)ded_alloc.pNext,
248 ret = vk->BindBufferMemory(
s->hwctx->act_dev, buf->
buf, buf->
mem, 0);
249 if (
ret != VK_SUCCESS) {
259 int nb_buffers,
int invalidate)
264 VkMappedMemoryRange *inval_list =
NULL;
267 for (
int i = 0;
i < nb_buffers;
i++) {
268 ret = vk->MapMemory(
s->hwctx->act_dev, buf[
i].
mem, 0,
269 VK_WHOLE_SIZE, 0, (
void **)&mem[
i]);
270 if (
ret != VK_SUCCESS) {
280 for (
int i = 0;
i < nb_buffers;
i++) {
281 const VkMappedMemoryRange ival_buf = {
282 .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
283 .memory = buf[
i].
mem,
284 .size = VK_WHOLE_SIZE,
286 if (buf[
i].
flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
289 (++inval_count)*
sizeof(*inval_list));
292 inval_list[inval_count - 1] = ival_buf;
296 ret = vk->InvalidateMappedMemoryRanges(
s->hwctx->act_dev, inval_count,
298 if (
ret != VK_SUCCESS) {
315 VkMappedMemoryRange *flush_list =
NULL;
319 for (
int i = 0;
i < nb_buffers;
i++) {
320 const VkMappedMemoryRange flush_buf = {
321 .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
322 .memory = buf[
i].
mem,
323 .size = VK_WHOLE_SIZE,
325 if (buf[
i].
flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
328 (++flush_count)*
sizeof(*flush_list));
331 flush_list[flush_count - 1] = flush_buf;
336 ret = vk->FlushMappedMemoryRanges(
s->hwctx->act_dev, flush_count,
338 if (
ret != VK_SUCCESS) {
345 for (
int i = 0;
i < nb_buffers;
i++)
346 vk->UnmapMemory(
s->hwctx->act_dev, buf[
i].
mem);
359 if (buf->
buf != VK_NULL_HANDLE)
360 vk->DestroyBuffer(
s->hwctx->act_dev, buf->
buf,
s->hwctx->alloc);
361 if (buf->
mem != VK_NULL_HANDLE)
362 vk->FreeMemory(
s->hwctx->act_dev, buf->
mem,
s->hwctx->alloc);
366 int offset,
int size, VkShaderStageFlagBits stage)
368 VkPushConstantRange *pc;
376 memset(pc, 0,
sizeof(*pc));
378 pc->stageFlags = stage;
395 .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
396 .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
397 .queueFamilyIndex = qf->queue_family,
399 VkCommandBufferAllocateInfo cbuf_create = {
400 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
401 .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
402 .commandBufferCount = qf->nb_queues,
421 s->hwctx->alloc, &e->
pool);
422 if (
ret != VK_SUCCESS) {
428 cbuf_create.commandPool = e->
pool;
431 ret = vk->AllocateCommandBuffers(
s->hwctx->act_dev, &cbuf_create, e->
bufs);
432 if (
ret != VK_SUCCESS) {
438 for (
int i = 0;
i < qf->nb_queues;
i++) {
440 vk->GetDeviceQueue(
s->hwctx->act_dev, qf->queue_family,
441 i % qf->actual_queues, &q->
queue);
472 VkCommandBufferBeginInfo cmd_start = {
473 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
474 .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
479 VkFenceCreateInfo fence_spawn = {
480 .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
482 ret = vk->CreateFence(
s->hwctx->act_dev, &fence_spawn,
s->hwctx->alloc,
484 if (
ret != VK_SUCCESS) {
490 vk->WaitForFences(
s->hwctx->act_dev, 1, &q->
fence, VK_TRUE, UINT64_MAX);
491 vk->ResetFences(
s->hwctx->act_dev, 1, &q->
fence);
498 if (
ret != VK_SUCCESS) {
513 AVFrame *
frame, VkPipelineStageFlagBits in_wait_dst_flag)
576 (q->nb_frame_deps + 1) *
sizeof(*dst));
584 if (!q->frame_deps[q->nb_frame_deps]) {
600 VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = {
601 .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
608 VkSubmitInfo s_info = {
609 .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
610 .pNext = &s_timeline_sem_info,
612 .commandBufferCount = 1,
619 .pSignalSemaphores = e->
sem_sig,
624 if (
ret != VK_SUCCESS) {
631 if (
ret != VK_SUCCESS) {
649 if (!deps || !nb_deps)
659 for (
int i = 0;
i < nb_deps;
i++) {
685 s->hwctx =
s->device->hwctx;
712 if (!
inlink->hw_frames_ctx) {
714 "hardware frames context on the input.\n");
734 s->hwctx->nb_enabled_dev_extensions);
740 vk->GetPhysicalDeviceProperties(
s->hwctx->phys_dev, &
s->props);
741 vk->GetPhysicalDeviceMemoryProperties(
s->hwctx->phys_dev, &
s->mprops);
747 if (!
s->output_width)
749 if (!
s->output_height)
763 if (!
s->device_ref) {
779 outlink->
w =
s->output_width;
780 outlink->
h =
s->output_height;
795 if (!
s->device_ref) {
808 if (!output_frames_ref) {
816 output_frames->
width =
s->output_width;
817 output_frames->
height =
s->output_height;
822 "frames: %d.\n", err);
827 outlink->
w =
s->output_width;
828 outlink->
h =
s->output_height;
856 VkSamplerCreateInfo sampler_info = {
857 .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
859 .minFilter = sampler_info.magFilter,
860 .mipmapMode = unnorm_coords ? VK_SAMPLER_MIPMAP_MODE_NEAREST :
861 VK_SAMPLER_MIPMAP_MODE_LINEAR,
862 .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
863 .addressModeV = sampler_info.addressModeU,
864 .addressModeW = sampler_info.addressModeU,
865 .anisotropyEnable = VK_FALSE,
866 .compareOp = VK_COMPARE_OP_NEVER,
867 .borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK,
868 .unnormalizedCoordinates = unnorm_coords,
875 ret = vk->CreateSampler(
s->hwctx->act_dev, &sampler_info,
876 s->hwctx->alloc, &sctx->
sampler[0]);
877 if (
ret != VK_SUCCESS) {
883 for (
int i = 1;
i < 4;
i++)
904 const int high =
desc->comp[0].depth > 8;
905 return high ?
"rgba16f" :
"rgba8";
918 vk->DestroyImageView(
s->hwctx->act_dev, iv->
view,
s->hwctx->alloc);
923 VkImageView *v, VkImage
img, VkFormat fmt,
924 const VkComponentMapping
map)
931 VkImageViewCreateInfo imgview_spawn = {
932 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
935 .viewType = VK_IMAGE_VIEW_TYPE_2D,
938 .subresourceRange = {
939 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
949 VkResult
ret = vk->CreateImageView(
s->hwctx->act_dev, &imgview_spawn,
950 s->hwctx->alloc, &iv->
view);
951 if (
ret != VK_SUCCESS) {
977 const
char *
name, VkShaderStageFlags stage)
985 shd->
shader.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
986 shd->
shader.stage = stage;
991 GLSLC(0, #define IS_WITHIN(v1, v2) ((v1.x < v2.x) && (v1.y < v2.y)) );
1005 "local_size_y = %i, local_size_z = %i) in;\n\n",
1012 const char *p = shd->
src.str;
1013 const char *start = p;
1018 for (
int i = 0;
i < strlen(p);
i++) {
1026 av_log(avctx, prio,
"Shader %s: \n%s", shd->
name, buf.str);
1031 const char *entrypoint)
1037 VkShaderModuleCreateInfo shader_create;
1042 shd->
shader.pName = entrypoint;
1051 shd->
name, spirv_size);
1053 shader_create.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
1054 shader_create.pNext =
NULL;
1055 shader_create.codeSize = spirv_size;
1056 shader_create.flags = 0;
1057 shader_create.pCode = (
void *)spirv;
1059 ret = vk->CreateShaderModule(
s->hwctx->act_dev, &shader_create,
NULL,
1064 if (
ret != VK_SUCCESS) {
1081 [VK_DESCRIPTOR_TYPE_SAMPLER] = {
sizeof(VkDescriptorImageInfo),
"sampler", 1, 0, 0, 0, },
1082 [VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE] = {
sizeof(VkDescriptorImageInfo),
"texture", 1, 0, 1, 0, },
1083 [VK_DESCRIPTOR_TYPE_STORAGE_IMAGE] = {
sizeof(VkDescriptorImageInfo),
"image", 1, 1, 1, 0, },
1084 [VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT] = {
sizeof(VkDescriptorImageInfo),
"subpassInput", 1, 0, 0, 0, },
1085 [VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER] = {
sizeof(VkDescriptorImageInfo),
"sampler", 1, 0, 1, 0, },
1086 [VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER] = {
sizeof(VkDescriptorBufferInfo),
NULL, 1, 0, 0, 1, },
1087 [VK_DESCRIPTOR_TYPE_STORAGE_BUFFER] = {
sizeof(VkDescriptorBufferInfo),
"buffer", 0, 1, 0, 1, },
1088 [VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC] = {
sizeof(VkDescriptorBufferInfo),
NULL, 1, 0, 0, 1, },
1089 [VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC] = {
sizeof(VkDescriptorBufferInfo),
"buffer", 0, 1, 0, 1, },
1090 [VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER] = {
sizeof(VkBufferView),
"samplerBuffer", 1, 0, 0, 0, },
1091 [VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER] = {
sizeof(VkBufferView),
"imageBuffer", 1, 0, 0, 0, },
1096 int num,
int only_print_to_shader)
1099 VkDescriptorSetLayout *
layout;
1103 if (only_print_to_shader)
1121 VkDescriptorSetLayoutCreateInfo desc_create_layout = { 0 };
1122 VkDescriptorSetLayoutBinding *desc_binding;
1124 desc_binding =
av_mallocz(
sizeof(*desc_binding)*num);
1128 for (
int i = 0;
i < num;
i++) {
1129 desc_binding[
i].binding =
i;
1130 desc_binding[
i].descriptorType =
desc[
i].type;
1131 desc_binding[
i].descriptorCount =
FFMAX(
desc[
i].elems, 1);
1132 desc_binding[
i].stageFlags =
desc[
i].stages;
1133 desc_binding[
i].pImmutableSamplers =
desc[
i].sampler ?
1134 desc[
i].sampler->sampler :
1138 desc_create_layout.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
1139 desc_create_layout.pBindings = desc_binding;
1140 desc_create_layout.bindingCount = num;
1143 ret = vk->CreateDescriptorSetLayout(
s->hwctx->act_dev, &desc_create_layout,
1145 if (
ret != VK_SUCCESS) {
1157 for (
int i = 0;
i < num;
i++) {
1176 VkDescriptorUpdateTemplateCreateInfo *dt;
1177 VkDescriptorUpdateTemplateEntry *des_entries;
1180 des_entries =
av_mallocz(num*
sizeof(VkDescriptorUpdateTemplateEntry));
1184 for (
int i = 0;
i < num;
i++) {
1185 des_entries[
i].dstBinding =
i;
1186 des_entries[
i].descriptorType =
desc[
i].type;
1187 des_entries[
i].descriptorCount =
FFMAX(
desc[
i].elems, 1);
1188 des_entries[
i].dstArrayElement = 0;
1189 des_entries[
i].offset = ((uint8_t *)
desc[
i].updater) - (uint8_t *)
s;
1203 dt[
i].sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO;
1204 dt[
i].templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET;
1205 dt[
i].descriptorSetLayout =
layout[
i];
1206 dt[
i].pDescriptorUpdateEntries = des_entries;
1207 dt[
i].descriptorUpdateEntryCount = num;
1218 for (
int i = 0;
i < num;
i++) {
1222 if (
desc[
i].mem_layout)
1242 else if (
desc[
i].elems > 0)
1262 vk->UpdateDescriptorSetWithTemplate(
s->hwctx->act_dev,
1273 vk->UpdateDescriptorSetWithTemplate(
s->hwctx->act_dev,
1280 VkShaderStageFlagBits stage,
int offset,
1301 VkDescriptorPoolCreateInfo pool_create_info = {
1302 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
1308 ret = vk->CreateDescriptorPool(
s->hwctx->act_dev, &pool_create_info,
1311 if (
ret != VK_SUCCESS) {
1319 VkDescriptorSetAllocateInfo alloc_info = {
1320 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
1330 ret = vk->AllocateDescriptorSets(
s->hwctx->act_dev, &alloc_info,
1332 if (
ret != VK_SUCCESS) {
1340 VkPipelineLayoutCreateInfo spawn_pipeline_layout = {
1341 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1342 .pSetLayouts = (VkDescriptorSetLayout *)pl->
desc_staging,
1350 ret = vk->CreatePipelineLayout(
s->hwctx->act_dev, &spawn_pipeline_layout,
1354 if (
ret != VK_SUCCESS) {
1362 VkDescriptorUpdateTemplateCreateInfo *dt;
1372 ret = vk->CreateDescriptorUpdateTemplate(
s->hwctx->act_dev,
1373 dt,
s->hwctx->alloc,
1375 if (
ret != VK_SUCCESS) {
1385 av_free((
void *)dt->pDescriptorUpdateEntries);
1412 VkComputePipelineCreateInfo pipe = {
1413 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1418 if (pl->
shaders[
i]->
shader.stage & VK_SHADER_STAGE_COMPUTE_BIT) {
1428 ret = vk->CreateComputePipelines(
s->hwctx->act_dev, VK_NULL_HANDLE, 1, &pipe,
1430 if (
ret != VK_SUCCESS) {
1436 pl->
bind_point = VK_PIPELINE_BIND_POINT_COMPUTE;
1470 vk->WaitForFences(
s->hwctx->act_dev, 1, &q->
fence, VK_TRUE, UINT64_MAX);
1471 vk->ResetFences(
s->hwctx->act_dev, 1, &q->
fence);
1476 vk->DestroyFence(
s->hwctx->act_dev, q->
fence,
s->hwctx->alloc);
1492 vk->DestroyCommandPool(
s->hwctx->act_dev, e->
pool,
s->hwctx->alloc);
1512 vk->DestroyShaderModule(
s->hwctx->act_dev, shd->
shader.module,
1517 vk->DestroyPipeline(
s->hwctx->act_dev, pl->
pipeline,
s->hwctx->alloc);
1523 vk->DestroyDescriptorUpdateTemplate(
s->hwctx->act_dev, pl->
desc_template[
i],
1526 vk->DestroyDescriptorSetLayout(
s->hwctx->act_dev, pl->
desc_layout[
i],
1532 vk->DestroyDescriptorPool(
s->hwctx->act_dev, pl->
desc_pool,
1549 av_free((
void *)dt->pDescriptorUpdateEntries);
1564 for (
int i = 0;
i <
s->exec_ctx_num;
i++)
1568 for (
int i = 0;
i <
s->samplers_num;
i++) {
1569 vk->DestroySampler(
s->hwctx->act_dev,
s->samplers[
i]->sampler[0],
1575 for (
int i = 0;
i <
s->pipelines_num;
i++)
1580 s->scratch_size = 0;