157 GLSLC(0,
layout(push_constant, scalar) uniform pushConstants { );
158 GLSLC(1, u8buf slice_state; );
159 GLSLC(1, u8buf scratch_data; );
160 GLSLC(1, u8buf out_data; );
162 GLSLC(1, ivec2 sar; );
163 GLSLC(1, uvec2 chroma_shift; );
165 GLSLC(1, uint plane_state_size; );
166 GLSLC(1, uint context_count; );
167 GLSLC(1, uint32_t crcref; );
168 GLSLC(1, uint32_t slice_size_max; );
170 GLSLC(1, uint8_t bits_per_raw_sample; );
171 GLSLC(1, uint8_t context_model; );
173 GLSLC(1, uint8_t micro_version; );
174 GLSLC(1, uint8_t force_pcm; );
175 GLSLC(1, uint8_t key_frame; );
177 GLSLC(1, uint8_t codec_planes; );
178 GLSLC(1, uint8_t transparency; );
179 GLSLC(1, uint8_t colorspace; );
180 GLSLC(1, uint8_t pic_mode; );
181 GLSLC(1, uint8_t ec; );
182 GLSLC(1, uint8_t ppi; );
183 GLSLC(1, uint8_t chunks; );
184 GLSLC(1, uint8_t padding[2]; );
187 VK_SHADER_STAGE_COMPUTE_BIT);
191 AVFrame *enc_in, VkImageView *enc_in_views,
192 AVFrame **intermediate_frame, VkImageView *intermediate_views,
193 VkImageMemoryBarrier2 *img_bar,
int *nb_img_bar,
194 VkBufferMemoryBarrier2 *buf_bar,
int *nb_buf_bar,
195 FFVkBuffer *slice_data_buf, uint32_t slice_data_size)
206 if (!(*intermediate_frame))
210 *intermediate_frame, 0));
213 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
214 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
223 0, slice_data_size*
f->slice_count,
224 VK_FORMAT_UNDEFINED);
226 enc_in, enc_in_views,
228 VK_IMAGE_LAYOUT_GENERAL,
231 *intermediate_frame, intermediate_views,
233 VK_IMAGE_LAYOUT_GENERAL,
237 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
238 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
239 VK_ACCESS_SHADER_WRITE_BIT,
240 VK_IMAGE_LAYOUT_GENERAL,
241 VK_QUEUE_FAMILY_IGNORED);
244 vk->CmdPipelineBarrier2(exec->
buf, &(VkDependencyInfo) {
245 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
246 .pImageMemoryBarriers = img_bar,
247 .imageMemoryBarrierCount = *nb_img_bar,
248 .pBufferMemoryBarriers = buf_bar,
249 .bufferMemoryBarrierCount = *nb_buf_bar,
253 slice_data_buf->
stage = buf_bar[0].dstStageMask;
254 slice_data_buf->
access = buf_bar[0].dstAccessMask;
261 .offset = 1 <<
f->bits_per_raw_sample,
262 .bits =
f->bits_per_raw_sample,
265 .transparency =
f->transparency,
272 memcpy(pd.fmt_lut, (
int [4]) { 2, 1, 0, 3 }, 4*
sizeof(
int));
277 VK_SHADER_STAGE_COMPUTE_BIT,
284 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
285 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
286 VK_ACCESS_SHADER_READ_BIT,
287 VK_IMAGE_LAYOUT_GENERAL,
288 VK_QUEUE_FAMILY_IGNORED);
309 size_t tmp_data_size;
316 uint32_t plane_state_size;
317 uint32_t slice_state_size;
318 uint32_t slice_data_size;
327 int has_inter = avctx->
gop_size > 1;
328 uint32_t context_count =
f->context_count[
f->context_model];
334 VkImageView *enc_in_views = in_views;
336 VkImageMemoryBarrier2 img_bar[37];
338 VkBufferMemoryBarrier2 buf_bar[8];
345 f->cur_enc_frame = pict;
354 f->slice_count =
f->max_slice_count;
360 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
361 VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
363 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT));
369 plane_state_size = 8;
373 plane_state_size *= context_count;
374 slice_state_size = plane_state_size*
f->plane_count;
376 slice_data_size = 256;
377 slice_state_size += slice_data_size;
378 slice_state_size =
FFALIGN(slice_state_size, 8);
382 if (!slice_data_ref) {
385 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
386 VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
387 NULL, slice_state_size*
f->slice_count,
388 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT));
400 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
401 VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
402 NULL, 2*
f->slice_count*
sizeof(uint64_t),
403 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
404 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
415 VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
416 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
417 VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
419 maxsize < fv->max_heap_size ?
420 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT : 0x0));
426 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
427 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
432 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
433 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
434 VK_ACCESS_SHADER_READ_BIT,
435 VK_IMAGE_LAYOUT_GENERAL,
436 VK_QUEUE_FAMILY_IGNORED);
442 0, slice_data_size*
f->slice_count,
443 VK_FORMAT_UNDEFINED);
445 enc_in, enc_in_views,
447 VK_IMAGE_LAYOUT_GENERAL,
452 buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
453 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
454 .srcStageMask = slice_data_buf->
stage,
455 .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
456 .srcAccessMask = slice_data_buf->
access,
457 .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
458 VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
459 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
460 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
461 .buffer = slice_data_buf->
buf,
462 .size = VK_WHOLE_SIZE,
467 vk->CmdPipelineBarrier2(exec->
buf, &(VkDependencyInfo) {
468 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
469 .pImageMemoryBarriers = img_bar,
470 .imageMemoryBarrierCount = nb_img_bar,
471 .pBufferMemoryBarriers = buf_bar,
472 .bufferMemoryBarrierCount = nb_buf_bar,
476 slice_data_buf->
stage = buf_bar[0].dstStageMask;
477 slice_data_buf->
access = buf_bar[0].dstAccessMask;
485 .scratch_data = tmp_data_buf->
address,
486 .out_data = out_data_buf->
address,
487 .bits_per_raw_sample =
f->bits_per_raw_sample,
490 .chroma_shift[0] =
f->chroma_h_shift,
491 .chroma_shift[1] =
f->chroma_v_shift,
492 .plane_state_size = plane_state_size,
493 .context_count = context_count,
495 .slice_size_max = out_data_buf->
size /
f->slice_count,
497 .version =
f->version,
498 .micro_version =
f->micro_version,
500 .key_frame =
f->key_frame,
502 .codec_planes =
f->plane_count,
503 .transparency =
f->transparency,
504 .colorspace =
f->colorspace,
512 VK_SHADER_STAGE_COMPUTE_BIT,
517 buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
518 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
519 .srcStageMask = slice_data_buf->
stage,
520 .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
521 .srcAccessMask = slice_data_buf->
access,
522 .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
523 VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
524 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
525 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
526 .buffer = slice_data_buf->
buf,
527 .size = slice_data_size*
f->slice_count,
531 if (
f->key_frame ||
f->version > 3) {
537 0, slice_data_size*
f->slice_count,
538 VK_FORMAT_UNDEFINED);
544 .plane_state_size = plane_state_size,
545 .codec_planes =
f->plane_count,
546 .key_frame =
f->key_frame,
548 for (
int i = 0;
i <
f->quant_table_count;
i++)
552 VK_SHADER_STAGE_COMPUTE_BIT,
553 0,
sizeof(pd_reset), &pd_reset);
556 vk->CmdPipelineBarrier2(exec->
buf, &(VkDependencyInfo) {
557 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
558 .pBufferMemoryBarriers = buf_bar,
559 .bufferMemoryBarrierCount = nb_buf_bar,
561 slice_data_buf->
stage = buf_bar[0].dstStageMask;
562 slice_data_buf->
access = buf_bar[0].dstAccessMask;
572 enc_in, enc_in_views,
573 &intermediate_frame, intermediate_views,
574 img_bar, &nb_img_bar, buf_bar, &nb_buf_bar,
575 slice_data_buf, slice_data_size));
578 enc_in = intermediate_frame;
579 enc_in_views = intermediate_views;
583 if (
f->key_frame ||
f->version > 3) {
585 buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
586 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
587 .srcStageMask = slice_data_buf->
stage,
588 .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
589 .srcAccessMask = slice_data_buf->
access,
590 .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
591 VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
592 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
593 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
594 .buffer = slice_data_buf->
buf,
595 .size = slice_data_buf->
size - slice_data_size*
f->slice_count,
596 .offset = slice_data_size*
f->slice_count,
601 vk->CmdPipelineBarrier2(exec->
buf, &(VkDependencyInfo) {
602 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
603 .pImageMemoryBarriers = img_bar,
604 .imageMemoryBarrierCount = nb_img_bar,
605 .pBufferMemoryBarriers = buf_bar,
606 .bufferMemoryBarrierCount = nb_buf_bar,
610 slice_data_buf->
stage = buf_bar[0].dstStageMask;
611 slice_data_buf->
access = buf_bar[0].dstAccessMask;
619 0, slice_data_size*
f->slice_count,
620 VK_FORMAT_UNDEFINED);
622 enc_in, enc_in_views,
624 VK_IMAGE_LAYOUT_GENERAL,
629 0, results_data_buf->
size,
630 VK_FORMAT_UNDEFINED);
634 VK_SHADER_STAGE_COMPUTE_BIT,
658 VkBufferCopy *buf_regions,
int nb_regions,
670 VkBufferMemoryBarrier2 buf_bar[8];
683 buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
684 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
685 .srcStageMask = out_data_buf->
stage,
686 .dstStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT,
687 .srcAccessMask = out_data_buf->
access,
688 .dstAccessMask = VK_ACCESS_2_TRANSFER_READ_BIT,
689 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
690 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
691 .buffer = out_data_buf->
buf,
692 .size = VK_WHOLE_SIZE,
695 vk->CmdPipelineBarrier2(exec->
buf, &(VkDependencyInfo) {
696 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
697 .pBufferMemoryBarriers = buf_bar,
698 .bufferMemoryBarrierCount = nb_buf_bar,
700 out_data_buf->
stage = buf_bar[0].dstStageMask;
701 out_data_buf->
access = buf_bar[0].dstAccessMask;
704 vk->CmdCopyBuffer(exec->
buf,
705 out_data_buf->
buf, pkt_data_buf->buf,
706 nb_regions, buf_regions);
717 if (!(pkt_data_buf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
718 VkMappedMemoryRange invalidate_data = {
719 .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
720 .memory = pkt_data_buf->mem,
722 .size = VK_WHOLE_SIZE,
725 1, &invalidate_data);
752 if (!(results_data_buf->
flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
753 VkMappedMemoryRange invalidate_data = {
754 .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
755 .memory = results_data_buf->
mem,
757 .size = VK_WHOLE_SIZE,
760 1, &invalidate_data);
765 for (
int i = 0;
i <
f->slice_count;
i++) {
766 sc = &((uint64_t *)results_data_buf->
mapped_mem)[
i*2];
768 "src offset = %"PRIu64
"\n",
784 VK_BUFFER_USAGE_TRANSFER_DST_BIT,
786 VK_MEMORY_PROPERTY_HOST_CACHED_BIT |
787 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
882 vk_frames = frames_ctx->
hwctx;
883 vk_frames->
tiling = VK_IMAGE_TILING_OPTIMAL;
884 vk_frames->
usage = VK_IMAGE_USAGE_STORAGE_BIT;
885 vk_frames->
img_flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
960 GLSLF(0, #define
TYPE int%i_t ,smp_bits);
961 GLSLF(0, #define VTYPE2
i%ivec2 ,smp_bits);
962 GLSLF(0, #define VTYPE3
i%ivec3 ,smp_bits);
980 void *spv_opaque =
NULL;
983 VK_SHADER_STAGE_COMPUTE_BIT,
984 (
const char *[]) {
"GL_EXT_buffer_reference",
985 "GL_EXT_buffer_reference2" }, 2,
999 .
name =
"rangecoder_static_buf",
1000 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
1001 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1002 .mem_layout =
"scalar",
1003 .buf_content =
"uint8_t zero_one_state[512];",
1006 .name =
"quant_buf",
1007 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
1008 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1009 .mem_layout =
"scalar",
1010 .buf_content =
"int16_t quant_table[MAX_QUANT_TABLES]"
1011 "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
1020 .
name =
"slice_data_buf",
1021 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
1022 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1023 .buf_content =
"SliceContext slice_ctx[1024];",
1027 .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1032 .mem_quali =
"readonly",
1033 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1062 void *spv_opaque =
NULL;
1063 int wg_dim =
FFMIN(fv->
s.
props.properties.limits.maxComputeWorkGroupSize[0], 1024);
1066 VK_SHADER_STAGE_COMPUTE_BIT,
1067 (
const char *[]) {
"GL_EXT_buffer_reference",
1068 "GL_EXT_buffer_reference2" }, 2,
1075 GLSLC(0,
layout(push_constant, scalar) uniform pushConstants { );
1077 GLSLC(1, u8buf slice_state; );
1078 GLSLC(1, uint plane_state_size; );
1079 GLSLC(1, uint8_t codec_planes; );
1080 GLSLC(1, uint8_t key_frame; );
1082 GLSLC(1, uint8_t micro_version; );
1083 GLSLC(1, uint8_t padding[1]; );
1086 VK_SHADER_STAGE_COMPUTE_BIT);
1094 .
name =
"rangecoder_static_buf",
1095 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
1096 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1097 .mem_layout =
"scalar",
1098 .buf_content =
"uint8_t zero_one_state[512];",
1101 .name =
"quant_buf",
1102 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
1103 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1104 .mem_layout =
"scalar",
1105 .buf_content =
"int16_t quant_table[MAX_QUANT_TABLES]"
1106 "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
1115 .
name =
"slice_data_buf",
1116 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
1117 .mem_quali =
"readonly",
1118 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1119 .buf_content =
"SliceContext slice_ctx[1024];",
1148 void *spv_opaque =
NULL;
1149 int wg_count = sqrt(fv->
s.
props.properties.limits.maxComputeWorkGroupInvocations);
1154 "pixel format for RCT buffer!\n");
1161 VK_SHADER_STAGE_COMPUTE_BIT,
1162 (
const char *[]) {
"GL_EXT_buffer_reference",
1163 "GL_EXT_buffer_reference2" }, 2,
1164 wg_count, wg_count, 1,
1170 GLSLC(0,
layout(push_constant, scalar) uniform pushConstants { );
1171 GLSLC(1, ivec4 fmt_lut; );
1174 GLSLC(1, uint8_t planar_rgb; );
1175 GLSLC(1, uint8_t color_planes; );
1176 GLSLC(1, uint8_t transparency; );
1178 GLSLC(1, uint8_t micro_version; );
1179 GLSLC(1, uint8_t padding[2]; );
1182 VK_SHADER_STAGE_COMPUTE_BIT);
1190 .
name =
"rangecoder_static_buf",
1191 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
1192 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1193 .mem_layout =
"scalar",
1194 .buf_content =
"uint8_t zero_one_state[512];",
1197 .name =
"quant_buf",
1198 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
1199 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1200 .mem_layout =
"scalar",
1201 .buf_content =
"int16_t quant_table[MAX_QUANT_TABLES]"
1202 "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
1211 .
name =
"slice_data_buf",
1212 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
1213 .mem_quali =
"readonly",
1214 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1215 .buf_content =
"SliceContext slice_ctx[1024];",
1219 .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1224 .mem_quali =
"readonly",
1225 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1229 .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1234 .mem_quali =
"writeonly",
1235 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1269 void *spv_opaque =
NULL;
1272 VK_SHADER_STAGE_COMPUTE_BIT,
1273 (
const char *[]) {
"GL_EXT_buffer_reference",
1274 "GL_EXT_buffer_reference2" }, 2,
1289 .
name =
"rangecoder_static_buf",
1290 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
1291 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1292 .mem_layout =
"scalar",
1293 .buf_content =
"uint8_t zero_one_state[512];",
1296 .name =
"quant_buf",
1297 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
1298 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1299 .mem_layout =
"scalar",
1300 .buf_content =
"int16_t quant_table[MAX_QUANT_TABLES]"
1301 "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
1304 .name =
"crc_ieee_buf",
1305 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
1306 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1307 .mem_layout =
"scalar",
1308 .buf_content =
"uint32_t crc_ieee[256];",
1318 .
name =
"slice_data_buf",
1319 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
1320 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1321 .buf_content =
"SliceContext slice_ctx[1024];",
1325 .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1330 .mem_quali =
"readonly",
1331 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1334 .name =
"results_data_buf",
1335 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
1336 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1337 .mem_quali =
"writeonly",
1338 .buf_content =
"uint64_t slice_results[2048];",
1372 size_t maxsize, max_heap_size, max_host_size;
1395 if (
f->bits_per_raw_sample > (
f->version > 3 ? 16 : 8)) {
1398 "forcing range coder\n");
1403 if (
f->version < 4 && avctx->
gop_size > 1) {
1414 if (
f->version == 4 &&
f->micro_version > 4)
1415 f->micro_version = 3;
1443 f->num_h_slices = w_sl;
1444 f->num_v_slices = h_sl;
1452 if (
f->num_h_slices <= 0 &&
f->num_v_slices <= 0) {
1458 f->num_h_slices = 32;
1459 f->num_v_slices = 32;
1461 }
else if (
f->num_h_slices &&
f->num_v_slices <= 0) {
1462 f->num_v_slices = 1024 /
f->num_h_slices;
1463 }
else if (
f->num_v_slices &&
f->num_h_slices <= 0) {
1464 f->num_h_slices = 1024 /
f->num_v_slices;
1467 f->num_h_slices =
FFMIN(
f->num_h_slices, avctx->
width);
1470 if (
f->num_h_slices *
f->num_v_slices > 1024) {
1472 "by the standard is 1024\n",
1473 f->num_h_slices *
f->num_v_slices);
1481 if (
f->version < 4) {
1482 if (((
f->chroma_h_shift > 0) && (avctx->
width % (64 <<
f->chroma_h_shift))) ||
1483 ((
f->chroma_v_shift > 0) && (avctx->
height % (64 <<
f->chroma_v_shift)))) {
1485 "dimensions is only supported in version 4 (-level 4)\n");
1491 if (
f->version < 4) {
1514 for (
int i = 0;
i < fv->
s.
mprops.memoryHeapCount;
i++) {
1515 if (fv->
s.
mprops.memoryHeaps[
i].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT)
1518 if (!(fv->
s.
mprops.memoryHeaps[
i].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT))
1519 max_host_size =
FFMAX(max_host_size,
1525 if (maxsize > fv->
s.
props_11.maxMemoryAllocationSize) {
1527 "than maximum device allocation (%zu), clipping\n",
1528 maxsize, fv->
s.
props_11.maxMemoryAllocationSize);
1529 maxsize = fv->
s.
props_11.maxMemoryAllocationSize;
1532 if (max_heap_size < maxsize) {
1534 "using host memory (slower)\n",
1538 max_heap_size = max_host_size - (max_host_size >> 1);
1541 max_heap_size = max_heap_size - (max_heap_size >> 3);
1544 av_log(avctx,
AV_LOG_INFO,
"Async buffers: %zuMiB per context, %zuMiB total, depth: %i\n",
1545 maxsize / (1024*1024),
1567 spv = ff_vk_spirv_init();
1580 if (!fv->
is_rgb &&
f->bits_per_raw_sample > 8)
1638 &fv->
setup, 0, 0, 0,
1641 VK_FORMAT_UNDEFINED));
1648 VK_FORMAT_UNDEFINED));
1653 VK_FORMAT_UNDEFINED));
1658 VK_FORMAT_UNDEFINED));
1673 f->max_slice_count =
f->num_h_slices *
f->num_v_slices;
1726 #define OFFSET(x) offsetof(VulkanEncodeFFv1Context, x)
1727 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1730 { .i64 = -1 }, -1, 1,
VE },
1732 { .i64 = 0 }, 0, 1,
VE },
1742 { .i64 = -1 }, -1, 2,
VE , .unit =
"qtable"},
1746 { .i64 =
QTABLE_8BIT }, INT_MIN, INT_MAX,
VE, .unit =
"qtable" },
1751 { .i64 = -1 }, -1, 1024,
VE },
1753 { .i64 = -1 }, -1, 1024,
VE },
1756 { .i64 = 0 }, 0, 1,
VE },
1759 { .i64 = 1 }, 1, INT_MAX,
VE },
1782 .
p.
name =
"ffv1_vulkan",
1800 .p.wrapper_name =
"vulkan",