29 #define RGB_LINECACHE 2
42 .queue_flags = VK_QUEUE_COMPUTE_BIT,
109 GLSLC(0,
layout(push_constant, scalar) uniform pushConstants { );
110 GLSLC(1, u8buf slice_data; );
111 GLSLC(1, u8buf slice_state; );
113 GLSLC(1, ivec4 fmt_lut; );
114 GLSLC(1, uvec2 img_size; );
115 GLSLC(1, uvec2 chroma_shift; );
117 GLSLC(1, uint plane_state_size; );
118 GLSLC(1, uint32_t crcref; );
119 GLSLC(1,
int rct_offset; );
121 GLSLC(1, uint8_t extend_lookup[8]; );
122 GLSLC(1, uint8_t bits_per_raw_sample; );
123 GLSLC(1, uint8_t quant_table_count; );
125 GLSLC(1, uint8_t micro_version; );
126 GLSLC(1, uint8_t key_frame; );
128 GLSLC(1, uint8_t codec_planes; );
129 GLSLC(1, uint8_t color_planes; );
130 GLSLC(1, uint8_t transparency; );
131 GLSLC(1, uint8_t planar_rgb; );
132 GLSLC(1, uint8_t colorspace; );
133 GLSLC(1, uint8_t ec; );
134 GLSLC(1, uint8_t golomb; );
135 GLSLC(1, uint8_t check_crc; );
136 GLSLC(1, uint8_t padding[3]; );
139 VK_SHADER_STAGE_COMPUTE_BIT);
166 for (
int i = 0;
i <
f->quant_table_count;
i++)
167 max_contexts =
FFMAX(
f->context_count[
i], max_contexts);
188 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
189 VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
195 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
196 VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
198 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
211 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
212 VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
213 NULL, 2*
f->slice_count*
sizeof(uint32_t),
214 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
215 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
222 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
223 VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
224 NULL, 2*
f->slice_count*
sizeof(uint32_t),
225 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
226 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
263 if (slices_buf && slices_buf->host_ref) {
265 data - slices_buf->mapped_mem);
301 int bits =
f->avctx->bits_per_raw_sample > 0 ?
f->avctx->bits_per_raw_sample : 8;
320 VkImageView *decode_dst_view = is_rgb ? rct_image_views : vp->
view.
out;
322 VkImageMemoryBarrier2 img_bar[37];
324 VkBufferMemoryBarrier2 buf_bar[8];
332 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
333 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
344 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
345 VK_PIPELINE_STAGE_2_CLEAR_BIT));
347 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
348 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
349 VK_ACCESS_2_TRANSFER_WRITE_BIT,
350 VK_IMAGE_LAYOUT_GENERAL,
351 VK_QUEUE_FAMILY_IGNORED);
360 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT));
371 buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
372 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
373 .srcStageMask = slice_state->stage,
374 .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
375 .srcAccessMask = slice_state->access,
376 .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
377 VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
378 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
379 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
380 .buffer = slice_state->buf,
385 vk->CmdPipelineBarrier2(exec->
buf, &(VkDependencyInfo) {
386 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
387 .pImageMemoryBarriers = img_bar,
388 .imageMemoryBarrierCount = nb_img_bar,
389 .pBufferMemoryBarriers = buf_bar,
390 .bufferMemoryBarrierCount = nb_buf_bar,
392 slice_state->stage = buf_bar[0].dstStageMask;
393 slice_state->access = buf_bar[0].dstAccessMask;
402 VK_FORMAT_UNDEFINED);
406 0, 2*
f->slice_count*
sizeof(uint32_t),
407 VK_FORMAT_UNDEFINED);
411 0, 2*
f->slice_count*
sizeof(uint32_t),
412 VK_FORMAT_UNDEFINED);
419 .img_size[0] =
f->picture.f->width,
420 .img_size[1] =
f->picture.f->height,
421 .chroma_shift[0] =
f->chroma_h_shift,
422 .chroma_shift[1] =
f->chroma_v_shift,
426 .rct_offset = 1 <<
bits,
428 .bits_per_raw_sample =
bits,
429 .quant_table_count =
f->quant_table_count,
430 .version =
f->version,
431 .micro_version =
f->micro_version,
434 .codec_planes =
f->plane_count,
435 .color_planes = color_planes,
436 .transparency =
f->transparency,
439 .colorspace =
f->colorspace,
444 for (
int i = 0;
i <
f->quant_table_count;
i++)
446 (
f->quant_tables[
i][4][127] != 0);
452 memcpy(pd.
fmt_lut, (
int [4]) { 2, 1, 0, 3 }, 4*
sizeof(
int));
457 VK_SHADER_STAGE_COMPUTE_BIT,
460 vk->CmdDispatch(exec->
buf,
f->num_h_slices,
f->num_v_slices, 1);
464 for (
int i = 0;
i < color_planes;
i++)
465 vk->CmdClearColorImage(exec->
buf, vkf->
img[
i], VK_IMAGE_LAYOUT_GENERAL,
466 &((VkClearColorValue) { 0 }),
467 1, &((VkImageSubresourceRange) {
468 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
475 reset_shader = &fv->
reset;
480 VK_FORMAT_UNDEFINED);
487 .codec_planes =
f->plane_count,
489 .version =
f->version,
490 .micro_version =
f->micro_version,
492 for (
int i = 0;
i <
f->quant_table_count;
i++)
496 VK_SHADER_STAGE_COMPUTE_BIT,
497 0,
sizeof(pd_reset), &pd_reset);
500 buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
501 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
502 .srcStageMask = slice_state->stage,
503 .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
504 .srcAccessMask = slice_state->access,
505 .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT,
506 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
507 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
508 .buffer = slice_state->buf,
512 vk->CmdPipelineBarrier2(exec->
buf, &(VkDependencyInfo) {
513 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
514 .pImageMemoryBarriers = img_bar,
515 .imageMemoryBarrierCount = nb_img_bar,
516 .pBufferMemoryBarriers = buf_bar,
517 .bufferMemoryBarrierCount = nb_buf_bar,
519 slice_state->stage = buf_bar[0].dstStageMask;
520 slice_state->access = buf_bar[0].dstAccessMask;
524 vk->CmdDispatch(exec->
buf,
f->num_h_slices,
f->num_v_slices,
528 decode_shader = &fv->
decode;
533 VK_FORMAT_UNDEFINED);
535 decode_dst, decode_dst_view,
537 VK_IMAGE_LAYOUT_GENERAL,
542 0, 2*
f->slice_count*
sizeof(uint32_t),
543 VK_FORMAT_UNDEFINED);
548 VK_IMAGE_LAYOUT_GENERAL,
553 VK_SHADER_STAGE_COMPUTE_BIT,
557 buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
558 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
559 .srcStageMask = slice_state->stage,
560 .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
561 .srcAccessMask = slice_state->access,
562 .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
563 VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
564 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
565 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
566 .buffer = slice_state->buf,
573 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
574 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
575 VK_ACCESS_SHADER_WRITE_BIT |
576 (!is_rgb ? VK_ACCESS_SHADER_READ_BIT : 0),
577 VK_IMAGE_LAYOUT_GENERAL,
578 VK_QUEUE_FAMILY_IGNORED);
581 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
582 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
583 VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
584 VK_IMAGE_LAYOUT_GENERAL,
585 VK_QUEUE_FAMILY_IGNORED);
587 vk->CmdPipelineBarrier2(exec->
buf, &(VkDependencyInfo) {
588 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
589 .pImageMemoryBarriers = img_bar,
590 .imageMemoryBarrierCount = nb_img_bar,
591 .pBufferMemoryBarriers = buf_bar,
592 .bufferMemoryBarrierCount = nb_buf_bar,
594 slice_state->stage = buf_bar[0].dstStageMask;
595 slice_state->access = buf_bar[0].dstAccessMask;
599 vk->CmdDispatch(exec->
buf,
f->num_h_slices,
f->num_v_slices, 1);
614 int smp_bits = use32bit ? 32 : 16;
622 GLSLF(0, #define
TYPE int%i_t ,smp_bits);
623 GLSLF(0, #define VTYPE2
i%ivec2 ,smp_bits);
624 GLSLF(0, #define VTYPE3
i%ivec3 ,smp_bits);
638 void *spv_opaque =
NULL;
641 VK_SHADER_STAGE_COMPUTE_BIT,
642 (
const char *[]) {
"GL_EXT_buffer_reference",
643 "GL_EXT_buffer_reference2" }, 2,
658 .
name =
"rangecoder_static_buf",
659 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
660 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
661 .mem_layout =
"scalar",
662 .buf_content =
"uint8_t zero_one_state[512];",
665 .name =
"crc_ieee_buf",
666 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
667 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
668 .mem_layout =
"scalar",
669 .buf_content =
"uint32_t crc_ieee[256];",
673 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
674 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
675 .mem_layout =
"scalar",
676 .buf_content =
"int16_t quant_table[MAX_QUANT_TABLES]"
677 "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
687 .
name =
"slice_data_buf",
688 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
689 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
690 .buf_content =
"SliceContext slice_ctx",
691 .buf_elems =
f->max_slice_count,
694 .name =
"slice_offsets_buf",
695 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
696 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
697 .mem_quali =
"readonly",
698 .buf_content =
"u32vec2 slice_offsets",
699 .buf_elems = 2*
f->max_slice_count,
702 .name =
"slice_status_buf",
703 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
704 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
705 .mem_quali =
"writeonly",
706 .buf_content =
"uint32_t slice_status",
707 .buf_elems = 2*
f->max_slice_count,
736 void *spv_opaque =
NULL;
737 int wg_dim =
FFMIN(
s->props.properties.limits.maxComputeWorkGroupSize[0], 1024);
740 VK_SHADER_STAGE_COMPUTE_BIT,
741 (
const char *[]) {
"GL_EXT_buffer_reference",
742 "GL_EXT_buffer_reference2" }, 2,
752 GLSLC(0,
layout(push_constant, scalar) uniform pushConstants { );
754 GLSLC(1, u8buf slice_state; );
755 GLSLC(1, uint plane_state_size; );
756 GLSLC(1, uint8_t codec_planes; );
757 GLSLC(1, uint8_t key_frame; );
759 GLSLC(1, uint8_t micro_version; );
760 GLSLC(1, uint8_t padding[1]; );
763 VK_SHADER_STAGE_COMPUTE_BIT);
771 .
name =
"rangecoder_static_buf",
772 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
773 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
774 .mem_layout =
"scalar",
775 .buf_content =
"uint8_t zero_one_state[512];",
779 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
780 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
781 .mem_layout =
"scalar",
782 .buf_content =
"int16_t quant_table[MAX_QUANT_TABLES]"
783 "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
794 .
name =
"slice_data_buf",
795 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
796 .mem_quali =
"readonly",
797 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
798 .buf_content =
"SliceContext slice_ctx",
799 .buf_elems =
f->max_slice_count,
831 void *spv_opaque =
NULL;
834 s->driver_props.driverID == VK_DRIVER_ID_MESA_RADV;
837 VK_SHADER_STAGE_COMPUTE_BIT,
838 (
const char *[]) {
"GL_EXT_buffer_reference",
839 "GL_EXT_buffer_reference2" }, 2,
849 if (use_cached_reader)
863 .
name =
"rangecoder_static_buf",
864 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
865 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
866 .mem_layout =
"scalar",
867 .buf_content =
"uint8_t zero_one_state[512];",
871 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
872 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
873 .mem_layout =
"scalar",
874 .buf_content =
"int16_t quant_table[MAX_QUANT_TABLES]"
875 "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
887 .
name =
"slice_data_buf",
888 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
889 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
890 .buf_content =
"SliceContext slice_ctx",
891 .buf_elems =
f->max_slice_count,
895 .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
900 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
903 .name =
"slice_status_buf",
904 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
905 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
906 .mem_quali =
"writeonly",
907 .buf_content =
"uint32_t slice_status",
908 .buf_elems = 2*
f->max_slice_count,
912 .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
916 .mem_quali =
"writeonly",
918 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
953 frames_ctx->
width =
s->frames->width;
956 vk_frames = frames_ctx->
hwctx;
957 vk_frames->
tiling = VK_IMAGE_TILING_OPTIMAL;
958 vk_frames->
img_flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
959 vk_frames->
usage = VK_IMAGE_USAGE_STORAGE_BIT |
960 VK_IMAGE_USAGE_TRANSFER_DST_BIT;
1003 if (
f->version < 3 ||
1004 (
f->version == 4 &&
f->micro_version > 3))
1007 spv = ff_vk_spirv_init();
1044 spv, &fv->
reset,
f->ac));
1071 &fv->
setup, 0, 0, 0,
1074 VK_FORMAT_UNDEFINED));
1076 &fv->
setup, 0, 1, 0,
1079 VK_FORMAT_UNDEFINED));
1086 VK_FORMAT_UNDEFINED));
1091 VK_FORMAT_UNDEFINED));
1111 if (!(slice_status->
flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
1112 VkMappedMemoryRange invalidate_data = {
1113 .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
1114 .memory = slice_status->
mem,
1116 .size = 2*fp->
slice_num*
sizeof(uint32_t),
1119 1, &invalidate_data);
1123 uint32_t crc_res = 0;
1138 .
p.
name =
"ffv1_vulkan",