* [FFmpeg-devel] [PATCH 01/13] hwcontext_vulkan: add a new mechanism to expose used queue families
@ 2024-08-07 21:33 Lynne via ffmpeg-devel
2024-08-07 21:33 ` [FFmpeg-devel] [PATCH 02/13] vulkan: use the new queue family mechanism Lynne via ffmpeg-devel
` (9 more replies)
0 siblings, 10 replies; 13+ messages in thread
From: Lynne via ffmpeg-devel @ 2024-08-07 21:33 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
The issue with the old mechanism is that we had to introduce new
API each time we needed a new queue family, and all the queue families
were functionally fixed to a given purpose.
Nvidia's GPUs are able to handle video encoding and compute on the
same queue, which results in a speedup when pre-processing is required.
Also, this enables us to expose optical flow queues for frame interpolation.
---
libavutil/hwcontext_vulkan.c | 85 ++++++++++++++++++++++++++++--------
libavutil/hwcontext_vulkan.h | 25 +++++++++++
2 files changed, 93 insertions(+), 17 deletions(-)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index da377aa1a4..33d856ddd3 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -1423,12 +1423,13 @@ static void unlock_queue(AVHWDeviceContext *ctx, uint32_t queue_family, uint32_t
static int vulkan_device_init(AVHWDeviceContext *ctx)
{
- int err;
+ int err = 0;
uint32_t qf_num;
VulkanDevicePriv *p = ctx->hwctx;
AVVulkanDeviceContext *hwctx = &p->p;
FFVulkanFunctions *vk = &p->vkctx.vkfn;
- VkQueueFamilyProperties *qf;
+ VkQueueFamilyProperties2 *qf;
+ VkQueueFamilyVideoPropertiesKHR *qf_vid;
int graph_index, comp_index, tx_index, enc_index, dec_index;
/* Set device extension flags */
@@ -1474,38 +1475,53 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
return AVERROR_EXTERNAL;
}
- qf = av_malloc_array(qf_num, sizeof(VkQueueFamilyProperties));
+ qf = av_malloc_array(qf_num, sizeof(VkQueueFamilyProperties2));
if (!qf)
return AVERROR(ENOMEM);
- vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &qf_num, qf);
+ qf_vid = av_malloc_array(qf_num, sizeof(VkQueueFamilyVideoPropertiesKHR));
+ if (!qf_vid) {
+ av_free(qf);
+ return AVERROR(ENOMEM);
+ }
+
+ for (uint32_t i = 0; i < qf_num; i++) {
+ qf_vid[i] = (VkQueueFamilyVideoPropertiesKHR) {
+ .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR,
+ };
+ qf[i] = (VkQueueFamilyProperties2) {
+ .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_PROPERTIES_2,
+ .pNext = &qf_vid[i],
+ };
+ }
+
+ vk->GetPhysicalDeviceQueueFamilyProperties2(hwctx->phys_dev, &qf_num, qf);
p->qf_mutex = av_calloc(qf_num, sizeof(*p->qf_mutex));
if (!p->qf_mutex) {
- av_free(qf);
- return AVERROR(ENOMEM);
+ err = AVERROR(ENOMEM);
+ goto end;
}
p->nb_tot_qfs = qf_num;
for (uint32_t i = 0; i < qf_num; i++) {
- p->qf_mutex[i] = av_calloc(qf[i].queueCount, sizeof(**p->qf_mutex));
+ p->qf_mutex[i] = av_calloc(qf[i].queueFamilyProperties.queueCount,
+ sizeof(**p->qf_mutex));
if (!p->qf_mutex[i]) {
- av_free(qf);
- return AVERROR(ENOMEM);
+ err = AVERROR(ENOMEM);
+ goto end;
}
- for (uint32_t j = 0; j < qf[i].queueCount; j++) {
+ for (uint32_t j = 0; j < qf[i].queueFamilyProperties.queueCount; j++) {
err = pthread_mutex_init(&p->qf_mutex[i][j], NULL);
if (err != 0) {
av_log(ctx, AV_LOG_ERROR, "pthread_mutex_init failed : %s\n",
av_err2str(err));
- av_free(qf);
- return AVERROR(err);
+ err = AVERROR(err);
+ goto end;
}
}
}
- av_free(qf);
-
graph_index = hwctx->nb_graphics_queues ? hwctx->queue_family_index : -1;
comp_index = hwctx->nb_comp_queues ? hwctx->queue_family_comp_index : -1;
tx_index = hwctx->nb_tx_queues ? hwctx->queue_family_tx_index : -1;
@@ -1517,13 +1533,15 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
if (ctx_qf < 0 && required) { \
av_log(ctx, AV_LOG_ERROR, "%s queue family is required, but marked as missing" \
" in the context!\n", type); \
- return AVERROR(EINVAL); \
+ err = AVERROR(EINVAL); \
+ goto end; \
} else if (fidx < 0 || ctx_qf < 0) { \
break; \
} else if (ctx_qf >= qf_num) { \
av_log(ctx, AV_LOG_ERROR, "Invalid %s family index %i (device has %i families)!\n", \
type, ctx_qf, qf_num); \
- return AVERROR(EINVAL); \
+ err = AVERROR(EINVAL); \
+ goto end; \
} \
\
av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i (queues: %i)" \
@@ -1550,6 +1568,36 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
#undef CHECK_QUEUE
+ /* Update the new queue family fields. If non-zero already,
+ * it means API users have set it. */
+ if (!hwctx->nb_qf) {
+#define ADD_QUEUE(ctx_qf, qc, flag) \
+ do { \
+ if (ctx_qf != -1) { \
+ hwctx->qf[hwctx->nb_qf++] = (AVVulkanDeviceQueueFamily) { \
+ .idx = ctx_qf, \
+ .num = qc, \
+ .flags = flag, \
+ }; \
+ } \
+ } while (0)
+
+ ADD_QUEUE(hwctx->queue_family_index, hwctx->nb_graphics_queues, VK_QUEUE_GRAPHICS_BIT);
+ ADD_QUEUE(hwctx->queue_family_comp_index, hwctx->nb_comp_queues, VK_QUEUE_COMPUTE_BIT);
+ ADD_QUEUE(hwctx->queue_family_tx_index, hwctx->nb_tx_queues, VK_QUEUE_TRANSFER_BIT);
+ ADD_QUEUE(hwctx->queue_family_decode_index, hwctx->nb_decode_queues, VK_QUEUE_VIDEO_DECODE_BIT_KHR);
+ ADD_QUEUE(hwctx->queue_family_encode_index, hwctx->nb_encode_queues, VK_QUEUE_VIDEO_ENCODE_BIT_KHR);
+#undef ADD_QUEUE
+ }
+
+ for (int i = 0; i < hwctx->nb_qf; i++) {
+ if (!hwctx->qf[i].video_caps &&
+ hwctx->qf[i].flags & (VK_QUEUE_VIDEO_DECODE_BIT_KHR |
+ VK_QUEUE_VIDEO_ENCODE_BIT_KHR)) {
+ hwctx->qf[i].video_caps = qf_vid[hwctx->qf[i].idx].videoCodecOperations;
+ }
+ }
+
if (!hwctx->lock_queue)
hwctx->lock_queue = lock_queue;
if (!hwctx->unlock_queue)
@@ -1565,7 +1613,10 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
ff_vk_qf_init(&p->vkctx, &p->compute_qf, VK_QUEUE_COMPUTE_BIT);
ff_vk_qf_init(&p->vkctx, &p->transfer_qf, VK_QUEUE_TRANSFER_BIT);
- return 0;
+end:
+ av_free(qf_vid);
+ av_free(qf);
+ return err;
}
static int vulkan_device_create(AVHWDeviceContext *ctx, const char *device,
diff --git a/libavutil/hwcontext_vulkan.h b/libavutil/hwcontext_vulkan.h
index cbbd2390c1..e9e42015f7 100644
--- a/libavutil/hwcontext_vulkan.h
+++ b/libavutil/hwcontext_vulkan.h
@@ -30,6 +30,20 @@
typedef struct AVVkFrame AVVkFrame;
+typedef struct AVVulkanDeviceQueueFamily {
+ /* Queue family index */
+ int idx;
+ /* Number of queues in the queue family in use */
+ int num;
+ /* Queue family capabilities. Must be non-zero.
+ * Flags may be removed to indicate the queue family may not be used
+ * for a given purpose. */
+ VkQueueFlagBits flags;
+ /* Vulkan implementations are allowed to list multiple video queues
+ * which differ in what they can encode or decode. */
+ VkVideoCodecOperationFlagBitsKHR video_caps;
+} AVVulkanDeviceQueueFamily;
+
/**
* @file
* API-specific header for AV_HWDEVICE_TYPE_VULKAN.
@@ -151,6 +165,17 @@ typedef struct AVVulkanDeviceContext {
* Similar to lock_queue(), unlocks a queue. Must only be called after locking.
*/
void (*unlock_queue)(struct AVHWDeviceContext *ctx, uint32_t queue_family, uint32_t index);
+
+ /**
+ * Queue families used. Must be preferentially ordered. List may contain
+ * duplicates.
+ *
+ * For compatibility reasons, all the enabled queue families listed above
+ * (queue_family_(tx/comp/encode/decode)_index) must also be included in
+ * this list until they're removed after deprecation.
+ */
+ AVVulkanDeviceQueueFamily qf[32];
+ int nb_qf;
} AVVulkanDeviceContext;
/**
--
2.45.2.753.g447d99e1c3b
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 13+ messages in thread
* [FFmpeg-devel] [PATCH 02/13] vulkan: use the new queue family mechanism
2024-08-07 21:33 [FFmpeg-devel] [PATCH 01/13] hwcontext_vulkan: add a new mechanism to expose used queue families Lynne via ffmpeg-devel
@ 2024-08-07 21:33 ` Lynne via ffmpeg-devel
2024-08-07 21:33 ` [FFmpeg-devel] [PATCH 03/13] hwcontext_vulkan: rewrite queue picking system for the new API Lynne via ffmpeg-devel
` (8 subsequent siblings)
9 siblings, 0 replies; 13+ messages in thread
From: Lynne via ffmpeg-devel @ 2024-08-07 21:33 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
---
libavutil/vulkan.c | 68 ++++++++++++++--------------------------------
libavutil/vulkan.h | 2 +-
2 files changed, 21 insertions(+), 49 deletions(-)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index 11c17ee6f3..cec8354ba6 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -189,37 +189,14 @@ int ff_vk_load_props(FFVulkanContext *s)
static int vk_qf_get_index(FFVulkanContext *s, VkQueueFlagBits dev_family, int *nb)
{
- int ret, num;
-
- switch (dev_family) {
- case VK_QUEUE_GRAPHICS_BIT:
- ret = s->hwctx->queue_family_index;
- num = s->hwctx->nb_graphics_queues;
- break;
- case VK_QUEUE_COMPUTE_BIT:
- ret = s->hwctx->queue_family_comp_index;
- num = s->hwctx->nb_comp_queues;
- break;
- case VK_QUEUE_TRANSFER_BIT:
- ret = s->hwctx->queue_family_tx_index;
- num = s->hwctx->nb_tx_queues;
- break;
- case VK_QUEUE_VIDEO_ENCODE_BIT_KHR:
- ret = s->hwctx->queue_family_encode_index;
- num = s->hwctx->nb_encode_queues;
- break;
- case VK_QUEUE_VIDEO_DECODE_BIT_KHR:
- ret = s->hwctx->queue_family_decode_index;
- num = s->hwctx->nb_decode_queues;
- break;
- default:
- av_assert0(0); /* Should never happen */
+ for (int i = 0; i < s->hwctx->nb_qf; i++) {
+ if (s->hwctx->qf[i].flags & dev_family) {
+ *nb = s->hwctx->qf[i].num;
+ return s->hwctx->qf[i].idx;
+ }
}
- if (nb)
- *nb = num;
-
- return ret;
+ av_assert0(0); /* Should never happen */
}
int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
@@ -229,25 +206,20 @@ int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
if (!s->nb_qfs) {
s->nb_qfs = 0;
- /* Simply fills in all unique queues into s->qfs */
- if (s->hwctx->queue_family_index >= 0)
- s->qfs[s->nb_qfs++] = s->hwctx->queue_family_index;
- if (!s->nb_qfs || s->qfs[0] != s->hwctx->queue_family_tx_index)
- s->qfs[s->nb_qfs++] = s->hwctx->queue_family_tx_index;
- if (!s->nb_qfs || (s->qfs[0] != s->hwctx->queue_family_comp_index &&
- s->qfs[1] != s->hwctx->queue_family_comp_index))
- s->qfs[s->nb_qfs++] = s->hwctx->queue_family_comp_index;
- if (s->hwctx->queue_family_decode_index >= 0 &&
- (s->qfs[0] != s->hwctx->queue_family_decode_index &&
- s->qfs[1] != s->hwctx->queue_family_decode_index &&
- s->qfs[2] != s->hwctx->queue_family_decode_index))
- s->qfs[s->nb_qfs++] = s->hwctx->queue_family_decode_index;
- if (s->hwctx->queue_family_encode_index >= 0 &&
- (s->qfs[0] != s->hwctx->queue_family_encode_index &&
- s->qfs[1] != s->hwctx->queue_family_encode_index &&
- s->qfs[2] != s->hwctx->queue_family_encode_index &&
- s->qfs[3] != s->hwctx->queue_family_encode_index))
- s->qfs[s->nb_qfs++] = s->hwctx->queue_family_encode_index;
+ for (int i = 0; i < s->hwctx->nb_qf; i++) {
+ /* Skip duplicates */
+ int skip = 0;
+ for (int j = 0; j < s->nb_qfs; j++) {
+ if (s->qfs[j] == s->hwctx->qf[i].idx) {
+ skip = 1;
+ break;
+ }
+ }
+ if (skip)
+ continue;
+
+ s->qfs[s->nb_qfs++] = s->hwctx->qf[i].idx;
+ }
}
return (qf->queue_family = vk_qf_get_index(s, dev_family, &qf->nb_queues));
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index 81898841ad..eaefc954ed 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -258,7 +258,7 @@ typedef struct FFVulkanContext {
AVHWFramesContext *frames;
AVVulkanFramesContext *hwfc;
- uint32_t qfs[5];
+ uint32_t qfs[32];
int nb_qfs;
/* Properties */
--
2.45.2.753.g447d99e1c3b
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 13+ messages in thread
* [FFmpeg-devel] [PATCH 03/13] hwcontext_vulkan: rewrite queue picking system for the new API
2024-08-07 21:33 [FFmpeg-devel] [PATCH 01/13] hwcontext_vulkan: add a new mechanism to expose used queue families Lynne via ffmpeg-devel
2024-08-07 21:33 ` [FFmpeg-devel] [PATCH 02/13] vulkan: use the new queue family mechanism Lynne via ffmpeg-devel
@ 2024-08-07 21:33 ` Lynne via ffmpeg-devel
2024-08-07 21:33 ` [FFmpeg-devel] [PATCH 04/13] hwcontext_vulkan: initialize optical flow queues if available Lynne via ffmpeg-devel
` (7 subsequent siblings)
9 siblings, 0 replies; 13+ messages in thread
From: Lynne via ffmpeg-devel @ 2024-08-07 21:33 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
This allows us to support different video ops on different queues,
as well as any other arbitrary queues we need.
---
libavutil/hwcontext_vulkan.c | 262 ++++++++++++++++++++++-------------
1 file changed, 167 insertions(+), 95 deletions(-)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 33d856ddd3..5baf68660a 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -1028,16 +1028,51 @@ end:
}
/* Picks the least used qf with the fewest unneeded flags, or -1 if none found */
-static inline int pick_queue_family(VkQueueFamilyProperties *qf, uint32_t num_qf,
+static inline int pick_queue_family(VkQueueFamilyProperties2 *qf, uint32_t num_qf,
VkQueueFlagBits flags)
{
int index = -1;
uint32_t min_score = UINT32_MAX;
for (int i = 0; i < num_qf; i++) {
- const VkQueueFlagBits qflags = qf[i].queueFlags;
+ VkQueueFlagBits qflags = qf[i].queueFamilyProperties.queueFlags;
+
+ /* Per the spec, reporting transfer caps is optional for these 2 types */
+ if ((flags & VK_QUEUE_TRANSFER_BIT) &&
+ (qflags & (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT)))
+ qflags |= VK_QUEUE_TRANSFER_BIT;
+
if (qflags & flags) {
- uint32_t score = av_popcount(qflags) + qf[i].timestampValidBits;
+ uint32_t score = av_popcount(qflags) + qf[i].queueFamilyProperties.timestampValidBits;
+ if (score < min_score) {
+ index = i;
+ min_score = score;
+ }
+ }
+ }
+
+ if (index > -1)
+ qf[index].queueFamilyProperties.timestampValidBits++;
+
+ return index;
+}
+
+static inline int pick_video_queue_family(VkQueueFamilyProperties2 *qf,
+ VkQueueFamilyVideoPropertiesKHR *qf_vid, uint32_t num_qf,
+ VkVideoCodecOperationFlagBitsKHR flags)
+{
+ int index = -1;
+ uint32_t min_score = UINT32_MAX;
+
+ for (int i = 0; i < num_qf; i++) {
+ const VkQueueFlagBits qflags = qf[i].queueFamilyProperties.queueFlags;
+ const VkQueueFlagBits vflags = qf_vid[i].videoCodecOperations;
+
+ if (!(qflags & (VK_QUEUE_VIDEO_ENCODE_BIT_KHR | VK_QUEUE_VIDEO_DECODE_BIT_KHR)))
+ continue;
+
+ if (vflags & flags) {
+ uint32_t score = av_popcount(vflags) + qf[i].queueFamilyProperties.timestampValidBits;
if (score < min_score) {
index = i;
min_score = score;
@@ -1046,7 +1081,7 @@ static inline int pick_queue_family(VkQueueFamilyProperties *qf, uint32_t num_qf
}
if (index > -1)
- qf[index].timestampValidBits++;
+ qf[index].queueFamilyProperties.timestampValidBits++;
return index;
}
@@ -1054,12 +1089,12 @@ static inline int pick_queue_family(VkQueueFamilyProperties *qf, uint32_t num_qf
static int setup_queue_families(AVHWDeviceContext *ctx, VkDeviceCreateInfo *cd)
{
uint32_t num;
- float *weights;
- VkQueueFamilyProperties *qf = NULL;
VulkanDevicePriv *p = ctx->hwctx;
AVVulkanDeviceContext *hwctx = &p->p;
FFVulkanFunctions *vk = &p->vkctx.vkfn;
- int graph_index, comp_index, tx_index, enc_index, dec_index;
+
+ VkQueueFamilyProperties2 *qf = NULL;
+ VkQueueFamilyVideoPropertiesKHR *qf_vid = NULL;
/* First get the number of queue families */
vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, NULL);
@@ -1069,118 +1104,155 @@ static int setup_queue_families(AVHWDeviceContext *ctx, VkDeviceCreateInfo *cd)
}
/* Then allocate memory */
- qf = av_malloc_array(num, sizeof(VkQueueFamilyProperties));
+ qf = av_malloc_array(num, sizeof(VkQueueFamilyProperties2));
if (!qf)
return AVERROR(ENOMEM);
+ qf_vid = av_malloc_array(num, sizeof(VkQueueFamilyVideoPropertiesKHR));
+ if (!qf_vid)
+ return AVERROR(ENOMEM);
+
+ for (uint32_t i = 0; i < num; i++) {
+ qf_vid[i] = (VkQueueFamilyVideoPropertiesKHR) {
+ .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR,
+ };
+ qf[i] = (VkQueueFamilyProperties2) {
+ .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_PROPERTIES_2,
+ .pNext = &qf_vid[i],
+ };
+ }
+
/* Finally retrieve the queue families */
- vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, qf);
+ vk->GetPhysicalDeviceQueueFamilyProperties2(hwctx->phys_dev, &num, qf);
av_log(ctx, AV_LOG_VERBOSE, "Queue families:\n");
for (int i = 0; i < num; i++) {
av_log(ctx, AV_LOG_VERBOSE, " %i:%s%s%s%s%s%s%s (queues: %i)\n", i,
- ((qf[i].queueFlags) & VK_QUEUE_GRAPHICS_BIT) ? " graphics" : "",
- ((qf[i].queueFlags) & VK_QUEUE_COMPUTE_BIT) ? " compute" : "",
- ((qf[i].queueFlags) & VK_QUEUE_TRANSFER_BIT) ? " transfer" : "",
- ((qf[i].queueFlags) & VK_QUEUE_VIDEO_ENCODE_BIT_KHR) ? " encode" : "",
- ((qf[i].queueFlags) & VK_QUEUE_VIDEO_DECODE_BIT_KHR) ? " decode" : "",
- ((qf[i].queueFlags) & VK_QUEUE_SPARSE_BINDING_BIT) ? " sparse" : "",
- ((qf[i].queueFlags) & VK_QUEUE_PROTECTED_BIT) ? " protected" : "",
- qf[i].queueCount);
+ ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_GRAPHICS_BIT) ? " graphics" : "",
+ ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_COMPUTE_BIT) ? " compute" : "",
+ ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_TRANSFER_BIT) ? " transfer" : "",
+ ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_VIDEO_ENCODE_BIT_KHR) ? " encode" : "",
+ ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_VIDEO_DECODE_BIT_KHR) ? " decode" : "",
+ ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_SPARSE_BINDING_BIT) ? " sparse" : "",
+ ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_PROTECTED_BIT) ? " protected" : "",
+ qf[i].queueFamilyProperties.queueCount);
/* We use this field to keep a score of how many times we've used that
* queue family in order to make better choices. */
- qf[i].timestampValidBits = 0;
+ qf[i].queueFamilyProperties.timestampValidBits = 0;
}
+ hwctx->nb_qf = 0;
+
/* Pick each queue family to use */
- graph_index = pick_queue_family(qf, num, VK_QUEUE_GRAPHICS_BIT);
- comp_index = pick_queue_family(qf, num, VK_QUEUE_COMPUTE_BIT);
- tx_index = pick_queue_family(qf, num, VK_QUEUE_TRANSFER_BIT);
- enc_index = pick_queue_family(qf, num, VK_QUEUE_VIDEO_ENCODE_BIT_KHR);
- dec_index = pick_queue_family(qf, num, VK_QUEUE_VIDEO_DECODE_BIT_KHR);
+#define PICK_QF(type, vid_op) \
+ do { \
+ uint32_t i; \
+ uint32_t idx; \
+ \
+ if (vid_op) \
+ idx = pick_video_queue_family(qf, qf_vid, num, vid_op); \
+ else \
+ idx = pick_queue_family(qf, num, type); \
+ \
+ if (idx == -1) \
+ continue; \
+ \
+ for (i = 0; i < hwctx->nb_qf; i++) { \
+ if (hwctx->qf[i].idx == idx) { \
+ hwctx->qf[i].flags |= type; \
+ hwctx->qf[i].video_caps |= vid_op; \
+ break; \
+ } \
+ } \
+ if (i == hwctx->nb_qf) { \
+ hwctx->qf[i].idx = idx; \
+ hwctx->qf[i].num = qf[idx].queueFamilyProperties.queueCount; \
+ hwctx->qf[i].flags = type; \
+ hwctx->qf[i].video_caps = vid_op; \
+ hwctx->nb_qf++; \
+ } \
+ } while (0)
+
+ PICK_QF(VK_QUEUE_GRAPHICS_BIT, VK_VIDEO_CODEC_OPERATION_NONE_KHR);
+ PICK_QF(VK_QUEUE_COMPUTE_BIT, VK_VIDEO_CODEC_OPERATION_NONE_KHR);
+ PICK_QF(VK_QUEUE_TRANSFER_BIT, VK_VIDEO_CODEC_OPERATION_NONE_KHR);
+
+ PICK_QF(VK_QUEUE_VIDEO_ENCODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR);
+ PICK_QF(VK_QUEUE_VIDEO_DECODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR);
+
+ PICK_QF(VK_QUEUE_VIDEO_ENCODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR);
+ PICK_QF(VK_QUEUE_VIDEO_DECODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR);
+
+ PICK_QF(VK_QUEUE_VIDEO_DECODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR);
+
+ av_free(qf);
+ av_free(qf_vid);
+
+#undef PICK_QF
+
+ cd->pQueueCreateInfos = av_malloc_array(hwctx->nb_qf,
+ sizeof(VkDeviceQueueCreateInfo));
+ if (!cd->pQueueCreateInfos)
+ return AVERROR(ENOMEM);
+
+ for (uint32_t i = 0; i < hwctx->nb_qf; i++) {
+ int dup = 0;
+ float *weights = NULL;
+ VkDeviceQueueCreateInfo *pc;
+ for (uint32_t j = 0; j < cd->queueCreateInfoCount; j++) {
+ if (hwctx->qf[i].idx == cd->pQueueCreateInfos[j].queueFamilyIndex) {
+ dup = 1;
+ break;
+ }
+ }
+ if (dup)
+ continue;
+
+ weights = av_malloc_array(hwctx->qf[i].num, sizeof(float));
+ if (!weights) {
+ for (uint32_t j = 0; j < cd->queueCreateInfoCount; j++)
+ av_free((void *)cd->pQueueCreateInfos[i].pQueuePriorities);
+ av_free((void *)cd->pQueueCreateInfos);
+ return AVERROR(ENOMEM);
+ }
+
+ for (uint32_t j = 0; j < hwctx->qf[i].num; j++)
+ weights[j] = 1.0;
- /* Signalling the transfer capabilities on a queue family is optional */
- if (tx_index < 0) {
- tx_index = pick_queue_family(qf, num, VK_QUEUE_COMPUTE_BIT);
- if (tx_index < 0)
- tx_index = pick_queue_family(qf, num, VK_QUEUE_GRAPHICS_BIT);
+ pc = (VkDeviceQueueCreateInfo *)cd->pQueueCreateInfos;
+ pc[cd->queueCreateInfoCount++] = (VkDeviceQueueCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
+ .queueFamilyIndex = hwctx->qf[i].idx,
+ .queueCount = hwctx->qf[i].num,
+ .pQueuePriorities = weights,
+ };
}
+ /* Setup deprecated fields */
hwctx->queue_family_index = -1;
hwctx->queue_family_comp_index = -1;
hwctx->queue_family_tx_index = -1;
hwctx->queue_family_encode_index = -1;
hwctx->queue_family_decode_index = -1;
-#define SETUP_QUEUE(qf_idx) \
- if (qf_idx > -1) { \
- int fidx = qf_idx; \
- int qc = qf[fidx].queueCount; \
- VkDeviceQueueCreateInfo *pc; \
- \
- if (fidx == graph_index) { \
- hwctx->queue_family_index = fidx; \
- hwctx->nb_graphics_queues = qc; \
- graph_index = -1; \
- } \
- if (fidx == comp_index) { \
- hwctx->queue_family_comp_index = fidx; \
- hwctx->nb_comp_queues = qc; \
- comp_index = -1; \
- } \
- if (fidx == tx_index) { \
- hwctx->queue_family_tx_index = fidx; \
- hwctx->nb_tx_queues = qc; \
- tx_index = -1; \
- } \
- if (fidx == enc_index) { \
- hwctx->queue_family_encode_index = fidx; \
- hwctx->nb_encode_queues = qc; \
- enc_index = -1; \
- } \
- if (fidx == dec_index) { \
- hwctx->queue_family_decode_index = fidx; \
- hwctx->nb_decode_queues = qc; \
- dec_index = -1; \
- } \
- \
- pc = av_realloc((void *)cd->pQueueCreateInfos, \
- sizeof(*pc) * (cd->queueCreateInfoCount + 1)); \
- if (!pc) { \
- av_free(qf); \
- return AVERROR(ENOMEM); \
- } \
- cd->pQueueCreateInfos = pc; \
- pc = &pc[cd->queueCreateInfoCount]; \
- \
- weights = av_malloc(qc * sizeof(float)); \
- if (!weights) { \
- av_free(qf); \
- return AVERROR(ENOMEM); \
- } \
- \
- memset(pc, 0, sizeof(*pc)); \
- pc->sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; \
- pc->queueFamilyIndex = fidx; \
- pc->queueCount = qc; \
- pc->pQueuePriorities = weights; \
- \
- for (int i = 0; i < qc; i++) \
- weights[i] = 1.0f / qc; \
- \
- cd->queueCreateInfoCount++; \
- }
-
- SETUP_QUEUE(graph_index)
- SETUP_QUEUE(comp_index)
- SETUP_QUEUE(tx_index)
- SETUP_QUEUE(enc_index)
- SETUP_QUEUE(dec_index)
-
-#undef SETUP_QUEUE
+#define SET_OLD_QF(field, nb_field, type) \
+ do { \
+ if (field < 0 && hwctx->qf[i].flags & type) { \
+ field = hwctx->qf[i].idx; \
+ nb_field = hwctx->qf[i].num; \
+ } \
+ } while (0)
- av_free(qf);
+ for (uint32_t i = 0; i < hwctx->nb_qf; i++) {
+ SET_OLD_QF(hwctx->queue_family_index, hwctx->nb_graphics_queues, VK_QUEUE_GRAPHICS_BIT);
+ SET_OLD_QF(hwctx->queue_family_comp_index, hwctx->nb_comp_queues, VK_QUEUE_COMPUTE_BIT);
+ SET_OLD_QF(hwctx->queue_family_tx_index, hwctx->nb_tx_queues, VK_QUEUE_TRANSFER_BIT);
+ SET_OLD_QF(hwctx->queue_family_encode_index, hwctx->nb_encode_queues, VK_QUEUE_VIDEO_ENCODE_BIT_KHR);
+ SET_OLD_QF(hwctx->queue_family_decode_index, hwctx->nb_decode_queues, VK_QUEUE_VIDEO_DECODE_BIT_KHR);
+ }
+
+#undef SET_OLD_QF
return 0;
}
--
2.45.2.753.g447d99e1c3b
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 13+ messages in thread
* [FFmpeg-devel] [PATCH 04/13] hwcontext_vulkan: initialize optical flow queues if available
2024-08-07 21:33 [FFmpeg-devel] [PATCH 01/13] hwcontext_vulkan: add a new mechanism to expose used queue families Lynne via ffmpeg-devel
2024-08-07 21:33 ` [FFmpeg-devel] [PATCH 02/13] vulkan: use the new queue family mechanism Lynne via ffmpeg-devel
2024-08-07 21:33 ` [FFmpeg-devel] [PATCH 03/13] hwcontext_vulkan: rewrite queue picking system for the new API Lynne via ffmpeg-devel
@ 2024-08-07 21:33 ` Lynne via ffmpeg-devel
2024-08-07 21:33 ` [FFmpeg-devel] [PATCH 05/13] vulkan_video: remove NIH pooled buffer implementation Lynne via ffmpeg-devel
` (6 subsequent siblings)
9 siblings, 0 replies; 13+ messages in thread
From: Lynne via ffmpeg-devel @ 2024-08-07 21:33 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
Lets us implement FPS conversion.
---
libavutil/hwcontext_vulkan.c | 18 +++++++++++++++---
libavutil/vulkan.c | 6 +++++-
libavutil/vulkan.h | 1 +
libavutil/vulkan_functions.h | 8 ++++++++
libavutil/vulkan_loader.h | 1 +
5 files changed, 30 insertions(+), 4 deletions(-)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 5baf68660a..c81fc95af2 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -105,6 +105,7 @@ typedef struct VulkanDevicePriv {
VkPhysicalDeviceDescriptorBufferFeaturesEXT desc_buf_features;
VkPhysicalDeviceShaderAtomicFloatFeaturesEXT atomic_float_features;
VkPhysicalDeviceCooperativeMatrixFeaturesKHR coop_matrix_features;
+ VkPhysicalDeviceOpticalFlowFeaturesNV optical_flow_features;
/* Queues */
pthread_mutex_t **qf_mutex;
@@ -429,6 +430,7 @@ static const VulkanOptExtension optional_device_exts[] = {
{ VK_EXT_PHYSICAL_DEVICE_DRM_EXTENSION_NAME, FF_VK_EXT_DEVICE_DRM },
{ VK_EXT_SHADER_ATOMIC_FLOAT_EXTENSION_NAME, FF_VK_EXT_ATOMIC_FLOAT },
{ VK_KHR_COOPERATIVE_MATRIX_EXTENSION_NAME, FF_VK_EXT_COOP_MATRIX },
+ { VK_NV_OPTICAL_FLOW_EXTENSION_NAME, FF_VK_EXT_OPTICAL_FLOW },
/* Imports/exports */
{ VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_FD_MEMORY },
@@ -1127,13 +1129,14 @@ static int setup_queue_families(AVHWDeviceContext *ctx, VkDeviceCreateInfo *cd)
av_log(ctx, AV_LOG_VERBOSE, "Queue families:\n");
for (int i = 0; i < num; i++) {
- av_log(ctx, AV_LOG_VERBOSE, " %i:%s%s%s%s%s%s%s (queues: %i)\n", i,
+ av_log(ctx, AV_LOG_VERBOSE, " %i:%s%s%s%s%s%s%s%s (queues: %i)\n", i,
((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_GRAPHICS_BIT) ? " graphics" : "",
((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_COMPUTE_BIT) ? " compute" : "",
((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_TRANSFER_BIT) ? " transfer" : "",
((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_VIDEO_ENCODE_BIT_KHR) ? " encode" : "",
((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_VIDEO_DECODE_BIT_KHR) ? " decode" : "",
((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_SPARSE_BINDING_BIT) ? " sparse" : "",
+ ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_OPTICAL_FLOW_BIT_NV) ? " optical_flow" : "",
((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_PROTECTED_BIT) ? " protected" : "",
qf[i].queueFamilyProperties.queueCount);
@@ -1177,6 +1180,7 @@ static int setup_queue_families(AVHWDeviceContext *ctx, VkDeviceCreateInfo *cd)
PICK_QF(VK_QUEUE_GRAPHICS_BIT, VK_VIDEO_CODEC_OPERATION_NONE_KHR);
PICK_QF(VK_QUEUE_COMPUTE_BIT, VK_VIDEO_CODEC_OPERATION_NONE_KHR);
PICK_QF(VK_QUEUE_TRANSFER_BIT, VK_VIDEO_CODEC_OPERATION_NONE_KHR);
+ PICK_QF(VK_QUEUE_OPTICAL_FLOW_BIT_NV, VK_VIDEO_CODEC_OPERATION_NONE_KHR);
PICK_QF(VK_QUEUE_VIDEO_ENCODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR);
PICK_QF(VK_QUEUE_VIDEO_DECODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR);
@@ -1318,9 +1322,13 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
VkPhysicalDeviceTimelineSemaphoreFeatures timeline_features = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES,
};
+ VkPhysicalDeviceOpticalFlowFeaturesNV optical_flow_features = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_OPTICAL_FLOW_FEATURES_NV,
+ .pNext = &timeline_features,
+ };
VkPhysicalDeviceCooperativeMatrixFeaturesKHR coop_matrix_features = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_FEATURES_KHR,
- .pNext = &timeline_features,
+ .pNext = &optical_flow_features,
};
VkPhysicalDeviceShaderAtomicFloatFeaturesEXT atomic_float_features = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT,
@@ -1364,7 +1372,9 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
p->atomic_float_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT;
p->atomic_float_features.pNext = &p->coop_matrix_features;
p->coop_matrix_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_FEATURES_KHR;
- p->coop_matrix_features.pNext = NULL;
+ p->coop_matrix_features.pNext = &p->optical_flow_features;
+ p->optical_flow_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_OPTICAL_FLOW_FEATURES_NV;
+ p->optical_flow_features.pNext = NULL;
ctx->free = vulkan_device_free;
@@ -1428,6 +1438,8 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
p->coop_matrix_features.cooperativeMatrix = coop_matrix_features.cooperativeMatrix;
+ p->optical_flow_features.opticalFlow = optical_flow_features.opticalFlow;
+
dev_info.pNext = &hwctx->device_features;
/* Setup queue family */
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index cec8354ba6..ade8d482b9 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -90,9 +90,13 @@ int ff_vk_load_props(FFVulkanContext *s)
s->hprops = (VkPhysicalDeviceExternalMemoryHostPropertiesEXT) {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT,
};
+ s->optical_flow_props = (VkPhysicalDeviceOpticalFlowPropertiesNV) {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_OPTICAL_FLOW_PROPERTIES_NV,
+ .pNext = &s->hprops,
+ };
s->coop_matrix_props = (VkPhysicalDeviceCooperativeMatrixPropertiesKHR) {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_PROPERTIES_KHR,
- .pNext = &s->hprops,
+ .pNext = &s->optical_flow_props,
};
s->subgroup_props = (VkPhysicalDeviceSubgroupSizeControlProperties) {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES,
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index eaefc954ed..1764d0eb33 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -238,6 +238,7 @@ typedef struct FFVulkanContext {
VkPhysicalDeviceDescriptorBufferPropertiesEXT desc_buf_props;
VkPhysicalDeviceSubgroupSizeControlProperties subgroup_props;
VkPhysicalDeviceCooperativeMatrixPropertiesKHR coop_matrix_props;
+ VkPhysicalDeviceOpticalFlowPropertiesNV optical_flow_props;
VkQueueFamilyQueryResultStatusPropertiesKHR *query_props;
VkQueueFamilyVideoPropertiesKHR *video_props;
VkQueueFamilyProperties2 *qf_props;
diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h
index 6b379acf93..3f0b96f77e 100644
--- a/libavutil/vulkan_functions.h
+++ b/libavutil/vulkan_functions.h
@@ -46,6 +46,7 @@ typedef enum FFVulkanExtensions {
FF_VK_EXT_VIDEO_DECODE_AV1 = 1ULL << 14, /* VK_KHR_video_decode_av1 */
FF_VK_EXT_ATOMIC_FLOAT = 1ULL << 15, /* VK_EXT_shader_atomic_float */
FF_VK_EXT_COOP_MATRIX = 1ULL << 16, /* VK_KHR_cooperative_matrix */
+ FF_VK_EXT_OPTICAL_FLOW = 1ULL << 17, /* VK_NV_optical_flow */
FF_VK_EXT_NO_FLAG = 1ULL << 31,
} FFVulkanExtensions;
@@ -208,6 +209,13 @@ typedef enum FFVulkanExtensions {
MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroySamplerYcbcrConversion) \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateSampler) \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroySampler) \
+ \
+ /* Optical flow */ \
+ MACRO(1, 1, FF_VK_EXT_OPTICAL_FLOW, BindOpticalFlowSessionImageNV) \
+ MACRO(1, 1, FF_VK_EXT_OPTICAL_FLOW, CmdOpticalFlowExecuteNV) \
+ MACRO(1, 1, FF_VK_EXT_OPTICAL_FLOW, CreateOpticalFlowSessionNV) \
+ MACRO(1, 1, FF_VK_EXT_OPTICAL_FLOW, DestroyOpticalFlowSessionNV) \
+ MACRO(1, 0, FF_VK_EXT_OPTICAL_FLOW, GetPhysicalDeviceOpticalFlowImageFormatsNV)\
\
/* Shaders */ \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateShaderModule) \
diff --git a/libavutil/vulkan_loader.h b/libavutil/vulkan_loader.h
index 73cf03935d..bb22591957 100644
--- a/libavutil/vulkan_loader.h
+++ b/libavutil/vulkan_loader.h
@@ -49,6 +49,7 @@ static inline uint64_t ff_vk_extensions_to_mask(const char * const *extensions,
{ VK_EXT_PHYSICAL_DEVICE_DRM_EXTENSION_NAME, FF_VK_EXT_DEVICE_DRM },
{ VK_EXT_SHADER_ATOMIC_FLOAT_EXTENSION_NAME, FF_VK_EXT_ATOMIC_FLOAT },
{ VK_KHR_COOPERATIVE_MATRIX_EXTENSION_NAME, FF_VK_EXT_COOP_MATRIX },
+ { VK_NV_OPTICAL_FLOW_EXTENSION_NAME, FF_VK_EXT_OPTICAL_FLOW },
#ifdef _WIN32
{ VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_MEMORY },
{ VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_SEM },
--
2.45.2.753.g447d99e1c3b
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 13+ messages in thread
* [FFmpeg-devel] [PATCH 05/13] vulkan_video: remove NIH pooled buffer implementation
2024-08-07 21:33 [FFmpeg-devel] [PATCH 01/13] hwcontext_vulkan: add a new mechanism to expose used queue families Lynne via ffmpeg-devel
` (2 preceding siblings ...)
2024-08-07 21:33 ` [FFmpeg-devel] [PATCH 04/13] hwcontext_vulkan: initialize optical flow queues if available Lynne via ffmpeg-devel
@ 2024-08-07 21:33 ` Lynne via ffmpeg-devel
2024-08-07 21:33 ` [FFmpeg-devel] [PATCH 06/13] hwcontext_vulkan: remove unused struct Lynne via ffmpeg-devel
` (5 subsequent siblings)
9 siblings, 0 replies; 13+ messages in thread
From: Lynne via ffmpeg-devel @ 2024-08-07 21:33 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
The code predates ff_vk_get_pooled_buffer().
---
libavcodec/vulkan_decode.c | 41 ++++++++++++-------
libavcodec/vulkan_decode.h | 2 +
libavcodec/vulkan_video.c | 82 --------------------------------------
libavcodec/vulkan_video.h | 15 -------
4 files changed, 28 insertions(+), 112 deletions(-)
diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c
index d8c75cd0e6..67d9b27242 100644
--- a/libavcodec/vulkan_decode.c
+++ b/libavcodec/vulkan_decode.c
@@ -260,7 +260,7 @@ int ff_vk_decode_add_slice(AVCodecContext *avctx, FFVulkanDecodePicture *vp,
const int nb = *nb_slices;
uint8_t *slices;
uint32_t *slice_off;
- FFVkVideoBuffer *vkbuf;
+ FFVkBuffer *vkbuf;
size_t new_size = vp->slices_size + startcode_len + size +
ctx->caps.minBitstreamBufferSizeAlignment;
@@ -274,29 +274,38 @@ int ff_vk_decode_add_slice(AVCodecContext *avctx, FFVulkanDecodePicture *vp,
*offsets = dec->slice_off = slice_off;
slice_off[nb] = vp->slices_size;
- vkbuf = vp->slices_buf ? (FFVkVideoBuffer *)vp->slices_buf->data : NULL;
- if (!vkbuf || vkbuf->buf.size < new_size) {
+ vkbuf = vp->slices_buf ? (FFVkBuffer *)vp->slices_buf->data : NULL;
+ if (!vkbuf || vkbuf->size < new_size) {
int err;
AVBufferRef *new_ref;
- FFVkVideoBuffer *new_buf;
- err = ff_vk_video_get_buffer(&ctx->s, &ctx->common, &new_ref,
- VK_BUFFER_USAGE_VIDEO_DECODE_SRC_BIT_KHR,
- ctx->s.hwfc->create_pnext, new_size);
+ FFVkBuffer *new_buf;
+
+ /* No point in requesting anything smaller. */
+ size_t buf_size = FFMAX(new_size, 1024*1024);
+
+ /* Align buffer to nearest power of two. Makes fragmentation management
+ * easier, and gives us ample headroom. */
+ buf_size = 2 << av_log2(buf_size);
+
+ err = ff_vk_get_pooled_buffer(&ctx->s, &ctx->buf_pool, &new_ref,
+ VK_BUFFER_USAGE_VIDEO_DECODE_SRC_BIT_KHR,
+ ctx->s.hwfc->create_pnext, buf_size,
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
if (err < 0)
return err;
- new_buf = (FFVkVideoBuffer *)new_ref->data;
+ new_buf = (FFVkBuffer *)new_ref->data;
/* Copy data from the old buffer */
if (vkbuf) {
- memcpy(new_buf->mem, vkbuf->mem, vp->slices_size);
+ memcpy(new_buf->mapped_mem, vkbuf->mapped_mem, vp->slices_size);
av_buffer_unref(&vp->slices_buf);
}
vp->slices_buf = new_ref;
vkbuf = new_buf;
}
- slices = vkbuf->mem;
+ slices = vkbuf->mapped_mem;
/* Startcode */
memcpy(slices + vp->slices_size, startcode_prefix, startcode_len);
@@ -347,7 +356,7 @@ int ff_vk_decode_frame(AVCodecContext *avctx,
int err;
VkResult ret;
VkCommandBuffer cmd_buf;
- FFVkVideoBuffer *sd_buf;
+ FFVkBuffer *sd_buf;
FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
FFVulkanDecodeShared *ctx = dec->shared_ctx;
@@ -400,13 +409,13 @@ int ff_vk_decode_frame(AVCodecContext *avctx,
"Result of previous frame decoding: %"PRId64"\n", prev_sub_res);
}
- sd_buf = (FFVkVideoBuffer *)vp->slices_buf->data;
+ sd_buf = (FFVkBuffer *)vp->slices_buf->data;
/* Flush if needed */
- if (!(sd_buf->buf.flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
+ if (!(sd_buf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
VkMappedMemoryRange flush_buf = {
.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
- .memory = sd_buf->buf.mem,
+ .memory = sd_buf->mem,
.offset = 0,
.size = FFALIGN(vp->slices_size,
ctx->s.props.properties.limits.nonCoherentAtomSize),
@@ -420,7 +429,7 @@ int ff_vk_decode_frame(AVCodecContext *avctx,
}
}
- vp->decode_info.srcBuffer = sd_buf->buf.buf;
+ vp->decode_info.srcBuffer = sd_buf->buf;
vp->decode_info.srcBufferOffset = 0;
vp->decode_info.srcBufferRange = data_size;
@@ -621,6 +630,8 @@ static void free_common(FFRefStructOpaque unused, void *obj)
ctx->empty_session_params,
s->hwctx->alloc);
+ av_buffer_pool_uninit(&ctx->buf_pool);
+
ff_vk_video_common_uninit(s, &ctx->common);
if (ctx->yuv_sampler)
diff --git a/libavcodec/vulkan_decode.h b/libavcodec/vulkan_decode.h
index 076af93499..c181277cdc 100644
--- a/libavcodec/vulkan_decode.h
+++ b/libavcodec/vulkan_decode.h
@@ -48,6 +48,8 @@ typedef struct FFVulkanDecodeShared {
FFVkVideoCommon common;
FFVkQueueFamilyCtx qf;
+ AVBufferPool *buf_pool;
+
VkVideoCapabilitiesKHR caps;
VkVideoDecodeCapabilitiesKHR dec_caps;
diff --git a/libavcodec/vulkan_video.c b/libavcodec/vulkan_video.c
index 412ae9b61e..f2a15d392e 100644
--- a/libavcodec/vulkan_video.c
+++ b/libavcodec/vulkan_video.c
@@ -177,86 +177,6 @@ int ff_vk_h265_level_to_av(StdVideoH265LevelIdc level)
}
}
-static void free_data_buf(void *opaque, uint8_t *data)
-{
- FFVulkanContext *ctx = opaque;
- FFVkVideoBuffer *buf = (FFVkVideoBuffer *)data;
- ff_vk_unmap_buffer(ctx, &buf->buf, 0);
- ff_vk_free_buf(ctx, &buf->buf);
- av_free(data);
-}
-
-static AVBufferRef *alloc_data_buf(void *opaque, size_t size)
-{
- AVBufferRef *ref;
- uint8_t *buf = av_mallocz(size);
- if (!buf)
- return NULL;
-
- ref = av_buffer_create(buf, size, free_data_buf, opaque, 0);
- if (!ref)
- av_free(buf);
- return ref;
-}
-
-int ff_vk_video_get_buffer(FFVulkanContext *ctx, FFVkVideoCommon *s,
- AVBufferRef **buf, VkBufferUsageFlags usage,
- void *create_pNext, size_t size)
-{
- int err;
- AVBufferRef *ref;
- FFVkVideoBuffer *data;
-
- if (!s->buf_pool) {
- s->buf_pool = av_buffer_pool_init2(sizeof(FFVkVideoBuffer), ctx,
- alloc_data_buf, NULL);
- if (!s->buf_pool)
- return AVERROR(ENOMEM);
- }
-
- *buf = ref = av_buffer_pool_get(s->buf_pool);
- if (!ref)
- return AVERROR(ENOMEM);
-
- data = (FFVkVideoBuffer *)ref->data;
-
- if (data->buf.size >= size)
- return 0;
-
- /* No point in requesting anything smaller. */
- size = FFMAX(size, 1024*1024);
-
- /* Align buffer to nearest power of two. Makes fragmentation management
- * easier, and gives us ample headroom. */
- size--;
- size |= size >> 1;
- size |= size >> 2;
- size |= size >> 4;
- size |= size >> 8;
- size |= size >> 16;
- size++;
-
- ff_vk_free_buf(ctx, &data->buf);
- memset(data, 0, sizeof(FFVkVideoBuffer));
-
- err = ff_vk_create_buf(ctx, &data->buf, size,
- create_pNext, NULL, usage,
- VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
- if (err < 0) {
- av_buffer_unref(&ref);
- return err;
- }
-
- /* Map the buffer */
- err = ff_vk_map_buffer(ctx, &data->buf, &data->mem, 0);
- if (err < 0) {
- av_buffer_unref(&ref);
- return err;
- }
-
- return 0;
-}
-
av_cold void ff_vk_video_common_uninit(FFVulkanContext *s,
FFVkVideoCommon *common)
{
@@ -273,8 +193,6 @@ av_cold void ff_vk_video_common_uninit(FFVulkanContext *s,
vk->FreeMemory(s->hwctx->act_dev, common->mem[i], s->hwctx->alloc);
av_freep(&common->mem);
-
- av_buffer_pool_uninit(&common->buf_pool);
}
av_cold int ff_vk_video_common_init(void *log, FFVulkanContext *s,
diff --git a/libavcodec/vulkan_video.h b/libavcodec/vulkan_video.h
index 01a1de7d9d..1894f1f1b7 100644
--- a/libavcodec/vulkan_video.h
+++ b/libavcodec/vulkan_video.h
@@ -32,8 +32,6 @@ typedef struct FFVkVideoSession {
VkVideoSessionKHR session;
VkDeviceMemory *mem;
uint32_t nb_mem;
-
- AVBufferPool *buf_pool;
} FFVkVideoCommon;
/**
@@ -63,19 +61,6 @@ VkVideoComponentBitDepthFlagBitsKHR ff_vk_depth_from_av_depth(int depth);
int ff_vk_h264_level_to_av(StdVideoH264LevelIdc level);
int ff_vk_h265_level_to_av(StdVideoH265LevelIdc level);
-typedef struct FFVkVideoBuffer {
- FFVkBuffer buf;
- uint8_t *mem;
-} FFVkVideoBuffer;
-
-/**
- * Get a mapped FFVkPooledBuffer with a specific guaranteed minimum size
- * from a pool.
- */
-int ff_vk_video_get_buffer(FFVulkanContext *ctx, FFVkVideoCommon *s,
- AVBufferRef **buf, VkBufferUsageFlags usage,
- void *create_pNext, size_t size);
-
/**
* Initialize video session, allocating and binding necessary memory.
*/
--
2.45.2.753.g447d99e1c3b
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 13+ messages in thread
* [FFmpeg-devel] [PATCH 06/13] hwcontext_vulkan: remove unused struct
2024-08-07 21:33 [FFmpeg-devel] [PATCH 01/13] hwcontext_vulkan: add a new mechanism to expose used queue families Lynne via ffmpeg-devel
` (3 preceding siblings ...)
2024-08-07 21:33 ` [FFmpeg-devel] [PATCH 05/13] vulkan_video: remove NIH pooled buffer implementation Lynne via ffmpeg-devel
@ 2024-08-07 21:33 ` Lynne via ffmpeg-devel
2024-08-07 21:33 ` [FFmpeg-devel] [PATCH 07/13] hwcontext_vulkan: rewrite upload/download Lynne via ffmpeg-devel
` (4 subsequent siblings)
9 siblings, 0 replies; 13+ messages in thread
From: Lynne via ffmpeg-devel @ 2024-08-07 21:33 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
---
libavutil/hwcontext_vulkan.c | 13 -------------
1 file changed, 13 deletions(-)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index c81fc95af2..e2ef599a0d 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -67,19 +67,6 @@
#define CHECK_CU(x) FF_CUDA_CHECK_DL(cuda_cu, cu, x)
#endif
-typedef struct VulkanQueueCtx {
- VkFence fence;
- VkQueue queue;
- int was_synchronous;
- int qf;
- int qidx;
-
- /* Buffer dependencies */
- AVBufferRef **buf_deps;
- int nb_buf_deps;
- unsigned int buf_deps_alloc_size;
-} VulkanQueueCtx;
-
typedef struct VulkanDevicePriv {
/**
* The public AVVulkanDeviceContext. See hwcontext_vulkan.h for it.
--
2.45.2.753.g447d99e1c3b
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 13+ messages in thread
* [FFmpeg-devel] [PATCH 07/13] hwcontext_vulkan: rewrite upload/download
2024-08-07 21:33 [FFmpeg-devel] [PATCH 01/13] hwcontext_vulkan: add a new mechanism to expose used queue families Lynne via ffmpeg-devel
` (4 preceding siblings ...)
2024-08-07 21:33 ` [FFmpeg-devel] [PATCH 06/13] hwcontext_vulkan: remove unused struct Lynne via ffmpeg-devel
@ 2024-08-07 21:33 ` Lynne via ffmpeg-devel
2024-08-07 21:33 ` [FFmpeg-devel] [PATCH 08/13] hwcontext_vulkan: constify validation layer features table Lynne via ffmpeg-devel
` (3 subsequent siblings)
9 siblings, 0 replies; 13+ messages in thread
From: Lynne via ffmpeg-devel @ 2024-08-07 21:33 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
This commit was long overdue. The old transfer dubiously tried to
merge as much code as possible, and had very little in the way
of optimizations, apart from basic host-mapping.
The new code uses buffer pools for any temporary bufflers, and
handles falling back to buffer-based uploads if host-mapping fails.
Roundtrip performance difference:
ffmpeg -init_hw_device "vulkan=vk:0,debug=0,disable_multiplane=1" -f lavfi \
-i color=red:s=3840x2160 -vf hwupload,hwdownload,format=yuv420p -f null -
7900XTX:
Before: 224fps
After: 502fps
Ada, with proprietary drivers:
Before: 29fps
After: 54fps
Alder Lake:
Before: 85fps
After: 108fps
With the host-mapping codepath disabled:
Before: 32fps
After: 51fps
---
libavutil/hwcontext_vulkan.c | 513 +++++++++++++++++++++++------------
libavutil/vulkan.c | 5 +-
2 files changed, 336 insertions(+), 182 deletions(-)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index e2ef599a0d..443862be3b 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -129,6 +129,9 @@ typedef struct VulkanFramesPriv {
FFVkExecPool upload_exec;
FFVkExecPool download_exec;
+ /* Temporary buffer pools */
+ AVBufferPool *tmp;
+
/* Modifier info list to free at uninit */
VkImageDrmFormatModifierListCreateInfoEXT *modifier_info;
} VulkanFramesPriv;
@@ -2425,6 +2428,8 @@ static void vulkan_frames_uninit(AVHWFramesContext *hwfc)
ff_vk_exec_pool_free(&p->vkctx, &fp->compute_exec);
ff_vk_exec_pool_free(&p->vkctx, &fp->upload_exec);
ff_vk_exec_pool_free(&p->vkctx, &fp->download_exec);
+
+ av_buffer_pool_uninit(&fp->tmp);
}
static int vulkan_frames_init(AVHWFramesContext *hwfc)
@@ -3451,128 +3456,298 @@ static int vulkan_map_from(AVHWFramesContext *hwfc, AVFrame *dst,
return AVERROR(ENOSYS);
}
-static size_t get_req_buffer_size(VulkanDevicePriv *p, int *stride, int height)
+static int copy_buffer_data(AVHWFramesContext *hwfc, AVBufferRef *buf,
+ AVFrame *swf, VkBufferImageCopy *region,
+ int planes, int upload)
{
- size_t size;
- *stride = FFALIGN(*stride, p->props.properties.limits.optimalBufferCopyRowPitchAlignment);
- size = height*(*stride);
- size = FFALIGN(size, p->props.properties.limits.minMemoryMapAlignment);
- return size;
+ VkResult ret;
+ VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
+ FFVulkanFunctions *vk = &p->vkctx.vkfn;
+ AVVulkanDeviceContext *hwctx = &p->p;
+
+ FFVkBuffer *vkbuf = (FFVkBuffer *)buf->data;
+
+ const VkMappedMemoryRange flush_info = {
+ .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
+ .memory = vkbuf->mem,
+ .size = VK_WHOLE_SIZE,
+ };
+
+ if (!(vkbuf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) && !upload) {
+ ret = vk->InvalidateMappedMemoryRanges(hwctx->act_dev, 1,
+ &flush_info);
+ if (ret != VK_SUCCESS) {
+ av_log(hwfc, AV_LOG_ERROR, "Failed to invalidate buffer data: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+ }
+
+ if (upload) {
+ for (int i = 0; i < planes; i++)
+ av_image_copy_plane(vkbuf->mapped_mem + region[i].bufferOffset,
+ region[i].bufferRowLength,
+ swf->data[i],
+ swf->linesize[i],
+ swf->linesize[i],
+ region[i].imageExtent.height);
+ } else {
+ for (int i = 0; i < planes; i++)
+ av_image_copy_plane_uc_from(swf->data[i],
+ swf->linesize[i],
+ vkbuf->mapped_mem + region[i].bufferOffset,
+ region[i].bufferRowLength,
+ swf->linesize[i],
+ region[i].imageExtent.height);
+ }
+
+ if (!(vkbuf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) && upload) {
+ ret = vk->FlushMappedMemoryRanges(hwctx->act_dev, 1,
+ &flush_info);
+ if (ret != VK_SUCCESS) {
+ av_log(hwfc, AV_LOG_ERROR, "Failed to flush buffer data: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+ }
+
+ return 0;
}
-static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f,
- AVBufferRef **bufs, size_t *buf_offsets,
- const int *buf_stride, int w,
- int h, enum AVPixelFormat pix_fmt, int to_buf)
+static int get_plane_buf(AVHWFramesContext *hwfc, AVBufferRef **dst,
+ AVFrame *swf, VkBufferImageCopy *region, int upload)
{
int err;
- AVVkFrame *frame = (AVVkFrame *)f->data[0];
VulkanFramesPriv *fp = hwfc->hwctx;
VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
- FFVulkanFunctions *vk = &p->vkctx.vkfn;
- VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS];
- int nb_img_bar = 0;
+ const int planes = av_pix_fmt_count_planes(swf->format);
+
+ size_t buf_offset = 0;
+ for (int i = 0; i < planes; i++) {
+ size_t size;
+ ptrdiff_t linesize = swf->linesize[i];
- const int nb_images = ff_vk_count_images(frame);
- int pixfmt_planes = av_pix_fmt_count_planes(pix_fmt);
- const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
+ uint32_t p_w, p_h;
+ get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i);
- VkCommandBuffer cmd_buf;
- FFVkExecContext *exec = ff_vk_exec_get(to_buf ? &fp->download_exec :
- &fp->upload_exec);
- cmd_buf = exec->buf;
- ff_vk_exec_start(&p->vkctx, exec);
+ linesize = FFALIGN(linesize,
+ p->props.properties.limits.optimalBufferCopyRowPitchAlignment);
+ size = p_h*linesize;
- err = ff_vk_exec_add_dep_buf(&p->vkctx, exec, bufs, pixfmt_planes, 1);
+ region[i] = (VkBufferImageCopy) {
+ .bufferOffset = buf_offset,
+ .bufferRowLength = linesize,
+ .bufferImageHeight = p_h,
+ .imageSubresource.layerCount = 1,
+ .imageExtent = (VkExtent3D){ p_w, p_h, 1 },
+ /* Rest of the fields adjusted/filled in later */
+ };
+
+ buf_offset = FFALIGN(buf_offset + size,
+ p->props.properties.limits.optimalBufferCopyOffsetAlignment);
+ }
+
+ err = ff_vk_get_pooled_buffer(&p->vkctx, &fp->tmp, dst,
+ VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
+ VK_BUFFER_USAGE_TRANSFER_DST_BIT,
+ NULL, buf_offset,
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
if (err < 0)
return err;
- err = ff_vk_exec_add_dep_frame(&p->vkctx, exec, f,
- VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
- VK_PIPELINE_STAGE_2_TRANSFER_BIT);
+ return 0;
+}
+
+static int create_mapped_buffer(AVHWFramesContext *hwfc,
+ FFVkBuffer *vkb, VkBufferUsageFlags usage,
+ size_t size,
+ VkExternalMemoryBufferCreateInfo *create_desc,
+ VkImportMemoryHostPointerInfoEXT *import_desc,
+ VkMemoryHostPointerPropertiesEXT props)
+{
+ int err;
+ VkResult ret;
+ VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
+ FFVulkanFunctions *vk = &p->vkctx.vkfn;
+ AVVulkanDeviceContext *hwctx = &p->p;
+
+ VkBufferCreateInfo buf_spawn = {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+ .pNext = create_desc,
+ .usage = usage,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .size = size,
+ };
+ VkMemoryRequirements req = {
+ .size = size,
+ .alignment = p->hprops.minImportedHostPointerAlignment,
+ .memoryTypeBits = props.memoryTypeBits,
+ };
+
+ err = ff_vk_alloc_mem(&p->vkctx, &req,
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
+ import_desc, &vkb->flags, &vkb->mem);
if (err < 0)
return err;
- ff_vk_frame_barrier(&p->vkctx, exec, f, img_bar, &nb_img_bar,
- VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
- VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR,
- to_buf ? VK_ACCESS_TRANSFER_READ_BIT :
- VK_ACCESS_TRANSFER_WRITE_BIT,
- to_buf ? VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL :
- VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
- VK_QUEUE_FAMILY_IGNORED);
+ ret = vk->CreateBuffer(hwctx->act_dev, &buf_spawn, hwctx->alloc, &vkb->buf);
+ if (ret != VK_SUCCESS) {
+ vk->FreeMemory(hwctx->act_dev, vkb->mem, hwctx->alloc);
+ return AVERROR_EXTERNAL;
+ }
- vk->CmdPipelineBarrier2(cmd_buf, &(VkDependencyInfo) {
- .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
- .pImageMemoryBarriers = img_bar,
- .imageMemoryBarrierCount = nb_img_bar,
- });
+ ret = vk->BindBufferMemory(hwctx->act_dev, vkb->buf, vkb->mem, 0);
+ if (ret != VK_SUCCESS) {
+ vk->FreeMemory(hwctx->act_dev, vkb->mem, hwctx->alloc);
+ vk->DestroyBuffer(hwctx->act_dev, vkb->buf, hwctx->alloc);
+ return AVERROR_EXTERNAL;
+ }
+
+ return 0;
+}
+
+static void destroy_avvkbuf(void *opaque, uint8_t *data)
+{
+ FFVulkanContext *s = opaque;
+ FFVkBuffer *buf = (FFVkBuffer *)data;
+ ff_vk_free_buf(s, buf);
+ av_free(buf);
+}
+
+static int host_map_frame(AVHWFramesContext *hwfc, AVBufferRef **dst, int *nb_bufs,
+ AVFrame *swf, VkBufferImageCopy *region, int upload)
+{
+ int err;
+ VkResult ret;
+ VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
+ FFVulkanFunctions *vk = &p->vkctx.vkfn;
+ AVVulkanDeviceContext *hwctx = &p->p;
+
+ const int planes = av_pix_fmt_count_planes(swf->format);
+
+ VkExternalMemoryBufferCreateInfo create_desc = {
+ .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO,
+ .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
+ };
+ VkImportMemoryHostPointerInfoEXT import_desc = {
+ .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT,
+ .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
+ };
+ VkMemoryHostPointerPropertiesEXT props;
+
+ for (int i = 0; i < planes; i++) {
+ FFVkBuffer *vkb;
+ uint32_t p_w, p_h;
+ size_t offs;
+ size_t buffer_size;
+
+ /* We can't host map images with negative strides */
+ if (swf->linesize[i] < 0) {
+ err = AVERROR(EINVAL);
+ goto fail;
+ }
+
+ get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i);
+
+ /* Get the previous point at which mapping was possible and use it */
+ offs = (uintptr_t)swf->data[i] % p->hprops.minImportedHostPointerAlignment;
+ import_desc.pHostPointer = swf->data[i] - offs;
- /* Schedule a copy for each plane */
- for (int i = 0; i < pixfmt_planes; i++) {
- int idx = FFMIN(i, nb_images - 1);
- VkImageAspectFlags plane_aspect[] = { VK_IMAGE_ASPECT_COLOR_BIT,
- VK_IMAGE_ASPECT_PLANE_0_BIT,
- VK_IMAGE_ASPECT_PLANE_1_BIT,
- VK_IMAGE_ASPECT_PLANE_2_BIT, };
-
- FFVkBuffer *vkbuf = (FFVkBuffer *)bufs[i]->data;
- VkBufferImageCopy buf_reg = {
- .bufferOffset = buf_offsets[i],
- .bufferRowLength = buf_stride[i] / desc->comp[i].step,
+ props = (VkMemoryHostPointerPropertiesEXT) {
+ VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT,
+ };
+ ret = vk->GetMemoryHostPointerPropertiesEXT(hwctx->act_dev,
+ import_desc.handleType,
+ import_desc.pHostPointer,
+ &props);
+ if (!(ret == VK_SUCCESS && props.memoryTypeBits)) {
+ err = AVERROR(EINVAL);
+ goto fail;
+ }
+
+ /* Buffer region for this plane */
+ region[i] = (VkBufferImageCopy) {
+ .bufferOffset = offs,
+ .bufferRowLength = swf->linesize[i],
+ .bufferImageHeight = p_h,
.imageSubresource.layerCount = 1,
- .imageSubresource.aspectMask = plane_aspect[(pixfmt_planes != nb_images) +
- i*(pixfmt_planes != nb_images)],
- .imageOffset = { 0, 0, 0, },
+ .imageExtent = (VkExtent3D){ p_w, p_h, 1 },
+ /* Rest of the fields adjusted/filled in later */
};
- uint32_t p_w, p_h;
- get_plane_wh(&p_w, &p_h, pix_fmt, w, h, i);
+ /* Add the offset at the start, which gets ignored */
+ buffer_size = offs + swf->linesize[i]*p_h;
+ buffer_size = FFALIGN(buffer_size, p->props.properties.limits.minMemoryMapAlignment);
- buf_reg.bufferImageHeight = p_h;
- buf_reg.imageExtent = (VkExtent3D){ p_w, p_h, 1, };
+ /* Create a buffer */
+ vkb = av_mallocz(sizeof(*vkb));
+ if (!vkb) {
+ err = AVERROR(ENOMEM);
+ goto fail;
+ }
- if (to_buf)
- vk->CmdCopyImageToBuffer(cmd_buf, frame->img[idx],
- img_bar[0].newLayout,
- vkbuf->buf,
- 1, &buf_reg);
- else
- vk->CmdCopyBufferToImage(cmd_buf, vkbuf->buf, frame->img[idx],
- img_bar[0].newLayout,
- 1, &buf_reg);
- }
+ err = create_mapped_buffer(hwfc, vkb,
+ upload ? VK_BUFFER_USAGE_TRANSFER_SRC_BIT :
+ VK_BUFFER_USAGE_TRANSFER_DST_BIT,
+ buffer_size, &create_desc, &import_desc,
+ props);
+ if (err < 0) {
+ av_free(vkb);
+ goto fail;
+ }
- err = ff_vk_exec_submit(&p->vkctx, exec);
- if (err < 0)
- return err;
+ /* Create a ref */
+ dst[*nb_bufs] = av_buffer_create((uint8_t *)vkb, sizeof(*vkb),
+ destroy_avvkbuf, &p->vkctx, 0);
+ if (!dst[*nb_bufs]) {
+ destroy_avvkbuf(&p->vkctx, (uint8_t *)vkb);
+ err = AVERROR(ENOMEM);
+ goto fail;
+ }
- ff_vk_exec_wait(&p->vkctx, exec);
+ (*nb_bufs)++;
+ }
return 0;
+
+fail:
+ for (int i = 0; i < (*nb_bufs); i++)
+ av_buffer_unref(&dst[i]);
+ return err;
}
-static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf,
- const AVFrame *swf, int from)
+static int vulkan_transfer_frame(AVHWFramesContext *hwfc,
+ AVFrame *swf, AVFrame *hwf,
+ int upload)
{
- int err = 0;
- VkResult ret;
- AVHWDeviceContext *dev_ctx = hwfc->device_ctx;
- VulkanDevicePriv *p = dev_ctx->hwctx;
- AVVulkanDeviceContext *hwctx = &p->p;
+ int err;
+ VulkanFramesPriv *fp = hwfc->hwctx;
+ VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
FFVulkanFunctions *vk = &p->vkctx.vkfn;
- AVFrame tmp;
- FFVkBuffer *vkbufs[AV_NUM_DATA_POINTERS];
- AVBufferRef *bufs[AV_NUM_DATA_POINTERS] = { 0 };
- size_t buf_offsets[AV_NUM_DATA_POINTERS] = { 0 };
+ int host_mapped = 0;
+
+ AVVkFrame *hwf_vk = (AVVkFrame *)hwf->data[0];
+ VkBufferImageCopy region[AV_NUM_DATA_POINTERS]; // always one per plane
- uint32_t p_w, p_h;
const int planes = av_pix_fmt_count_planes(swf->format);
+ const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(swf->format);
+ const int nb_images = ff_vk_count_images(hwf_vk);
+ static const VkImageAspectFlags plane_aspect[] = { VK_IMAGE_ASPECT_COLOR_BIT,
+ VK_IMAGE_ASPECT_PLANE_0_BIT,
+ VK_IMAGE_ASPECT_PLANE_1_BIT,
+ VK_IMAGE_ASPECT_PLANE_2_BIT, };
+
+ VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS];
+ int nb_img_bar = 0;
- int host_mapped[AV_NUM_DATA_POINTERS] = { 0 };
- const int map_host = !!(p->vkctx.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY);
+ AVBufferRef *bufs[AV_NUM_DATA_POINTERS];
+ int nb_bufs = 0;
+ VkCommandBuffer cmd_buf;
+ FFVkExecContext *exec;
+
+ /* Sanity checking */
if ((swf->format != AV_PIX_FMT_NONE && !av_vkfmt_from_pixfmt(swf->format))) {
av_log(hwfc, AV_LOG_ERROR, "Unsupported software frame pixel format!\n");
return AVERROR(EINVAL);
@@ -3581,115 +3756,97 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf,
if (swf->width > hwfc->width || swf->height > hwfc->height)
return AVERROR(EINVAL);
- /* Create buffers */
- for (int i = 0; i < planes; i++) {
- size_t req_size;
-
- VkExternalMemoryBufferCreateInfo create_desc = {
- .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO,
- .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
- };
-
- VkImportMemoryHostPointerInfoEXT import_desc = {
- .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT,
- .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
- };
-
- VkMemoryHostPointerPropertiesEXT p_props = {
- .sType = VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT,
- };
-
- get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i);
-
- tmp.linesize[i] = FFABS(swf->linesize[i]);
-
- /* Do not map images with a negative stride */
- if (map_host && swf->linesize[i] > 0) {
- size_t offs;
- offs = (uintptr_t)swf->data[i] % p->hprops.minImportedHostPointerAlignment;
- import_desc.pHostPointer = swf->data[i] - offs;
-
- /* We have to compensate for the few extra bytes of padding we
- * completely ignore at the start */
- req_size = FFALIGN(offs + tmp.linesize[i] * p_h,
- p->hprops.minImportedHostPointerAlignment);
-
- ret = vk->GetMemoryHostPointerPropertiesEXT(hwctx->act_dev,
- import_desc.handleType,
- import_desc.pHostPointer,
- &p_props);
- if (ret == VK_SUCCESS && p_props.memoryTypeBits) {
- host_mapped[i] = 1;
- buf_offsets[i] = offs;
- }
- }
+ /* Setup buffers first */
+ if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY) {
+ err = host_map_frame(hwfc, bufs, &nb_bufs, swf, region, upload);
+ if (err >= 0)
+ host_mapped = 1;
+ }
- if (!host_mapped[i])
- req_size = get_req_buffer_size(p, &tmp.linesize[i], p_h);
-
- err = ff_vk_create_avbuf(&p->vkctx, &bufs[i], req_size,
- host_mapped[i] ? &create_desc : NULL,
- host_mapped[i] ? &import_desc : NULL,
- from ? VK_BUFFER_USAGE_TRANSFER_DST_BIT :
- VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
- VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
- (host_mapped[i] ?
- VK_MEMORY_PROPERTY_HOST_COHERENT_BIT : 0x0));
+ if (!host_mapped) {
+ err = get_plane_buf(hwfc, &bufs[0], swf, region, upload);
if (err < 0)
goto end;
+ nb_bufs = 1;
- vkbufs[i] = (FFVkBuffer *)bufs[i]->data;
+ if (upload) {
+ err = copy_buffer_data(hwfc, bufs[0], swf, region, planes, 1);
+ if (err < 0)
+ goto end;
+ }
}
- if (!from) {
- /* Map, copy image TO buffer (which then goes to the VkImage), unmap */
- if ((err = ff_vk_map_buffers(&p->vkctx, vkbufs, tmp.data, planes, 0)))
- goto end;
-
- for (int i = 0; i < planes; i++) {
- if (host_mapped[i])
- continue;
+ exec = ff_vk_exec_get(&fp->upload_exec);
+ cmd_buf = exec->buf;
- get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i);
+ ff_vk_exec_start(&p->vkctx, exec);
- av_image_copy_plane(tmp.data[i], tmp.linesize[i],
- (const uint8_t *)swf->data[i], swf->linesize[i],
- FFMIN(tmp.linesize[i], FFABS(swf->linesize[i])),
- p_h);
- }
+ /* Prep destination Vulkan frame */
+ err = ff_vk_exec_add_dep_frame(&p->vkctx, exec, hwf,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_TRANSFER_BIT);
+ if (err < 0)
+ goto end;
- if ((err = ff_vk_unmap_buffers(&p->vkctx, vkbufs, planes, 1)))
+ /* No need to declare buf deps for synchronous transfers */
+ if (upload) {
+ err = ff_vk_exec_add_dep_buf(&p->vkctx, exec, bufs, nb_bufs, 1);
+ if (err < 0) {
+ ff_vk_exec_discard_deps(&p->vkctx, exec);
goto end;
+ }
}
- /* Copy buffers into/from image */
- err = transfer_image_buf(hwfc, (AVFrame *)vkf, bufs, buf_offsets,
- tmp.linesize, swf->width, swf->height, swf->format,
- from);
-
- if (from) {
- /* Map, copy buffer (which came FROM the VkImage) to the frame, unmap */
- if ((err = ff_vk_map_buffers(&p->vkctx, vkbufs, tmp.data, planes, 0)))
- goto end;
+ ff_vk_frame_barrier(&p->vkctx, exec, hwf, img_bar, &nb_img_bar,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR,
+ upload ? VK_ACCESS_TRANSFER_WRITE_BIT :
+ VK_ACCESS_TRANSFER_READ_BIT,
+ upload ? VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL :
+ VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+ VK_QUEUE_FAMILY_IGNORED);
- for (int i = 0; i < planes; i++) {
- if (host_mapped[i])
- continue;
+ vk->CmdPipelineBarrier2(cmd_buf, &(VkDependencyInfo) {
+ .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+ .pImageMemoryBarriers = img_bar,
+ .imageMemoryBarrierCount = nb_img_bar,
+ });
- get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i);
+ for (int i = 0; i < planes; i++) {
+ int buf_idx = FFMIN(i, (nb_bufs - 1));
+ int img_idx = FFMIN(i, (nb_images - 1));
+ FFVkBuffer *vkbuf = (FFVkBuffer *)bufs[buf_idx]->data;
+
+ uint32_t orig_stride = region[i].bufferRowLength;
+ region[i].bufferRowLength /= desc->comp[i].step;
+ region[i].imageSubresource.aspectMask = plane_aspect[(planes != nb_images) +
+ i*(planes != nb_images)];
+
+ if (upload)
+ vk->CmdCopyBufferToImage(cmd_buf, vkbuf->buf,
+ hwf_vk->img[img_idx],
+ img_bar[img_idx].newLayout,
+ 1, ®ion[i]);
+ else
+ vk->CmdCopyImageToBuffer(cmd_buf, hwf_vk->img[img_idx],
+ img_bar[img_idx].newLayout,
+ vkbuf->buf,
+ 1, ®ion[i]);
- av_image_copy_plane_uc_from(swf->data[i], swf->linesize[i],
- (const uint8_t *)tmp.data[i], tmp.linesize[i],
- FFMIN(tmp.linesize[i], FFABS(swf->linesize[i])),
- p_h);
- }
+ region[i].bufferRowLength = orig_stride;
+ }
- if ((err = ff_vk_unmap_buffers(&p->vkctx, vkbufs, planes, 1)))
- goto end;
+ err = ff_vk_exec_submit(&p->vkctx, exec);
+ if (err < 0) {
+ ff_vk_exec_discard_deps(&p->vkctx, exec);
+ } else if (!upload) {
+ ff_vk_exec_wait(&p->vkctx, exec);
+ if (!host_mapped)
+ err = copy_buffer_data(hwfc, bufs[0], swf, region, planes, 0);
}
end:
- for (int i = 0; i < planes; i++)
+ for (int i = 0; i < nb_bufs; i++)
av_buffer_unref(&bufs[i]);
return err;
@@ -3716,7 +3873,7 @@ static int vulkan_transfer_data_to(AVHWFramesContext *hwfc, AVFrame *dst,
if (src->hw_frames_ctx)
return AVERROR(ENOSYS);
else
- return vulkan_transfer_data(hwfc, dst, src, 0);
+ return vulkan_transfer_frame(hwfc, (AVFrame *)src, dst, 1);
}
}
@@ -3833,7 +3990,7 @@ static int vulkan_transfer_data_from(AVHWFramesContext *hwfc, AVFrame *dst,
if (dst->hw_frames_ctx)
return AVERROR(ENOSYS);
else
- return vulkan_transfer_data(hwfc, src, dst, 1);
+ return vulkan_transfer_frame(hwfc, dst, (AVFrame *)src, 0);
}
}
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index ade8d482b9..df7758cc1e 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -809,11 +809,8 @@ int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
ret = vk->AllocateMemory(s->hwctx->act_dev, &alloc_info,
s->hwctx->alloc, mem);
- if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Failed to allocate memory: %s\n",
- ff_vk_ret2str(ret));
+ if (ret != VK_SUCCESS)
return AVERROR(ENOMEM);
- }
if (mem_flags)
*mem_flags |= s->mprops.memoryTypes[index].propertyFlags;
--
2.45.2.753.g447d99e1c3b
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 13+ messages in thread
* [FFmpeg-devel] [PATCH 08/13] hwcontext_vulkan: constify validation layer features table
2024-08-07 21:33 [FFmpeg-devel] [PATCH 01/13] hwcontext_vulkan: add a new mechanism to expose used queue families Lynne via ffmpeg-devel
` (5 preceding siblings ...)
2024-08-07 21:33 ` [FFmpeg-devel] [PATCH 07/13] hwcontext_vulkan: rewrite upload/download Lynne via ffmpeg-devel
@ 2024-08-07 21:33 ` Lynne via ffmpeg-devel
2024-08-07 21:33 ` [FFmpeg-devel] [PATCH 09/13] vulkan_shaderc: fix error reporting for certain errors Lynne via ffmpeg-devel
` (2 subsequent siblings)
9 siblings, 0 replies; 13+ messages in thread
From: Lynne via ffmpeg-devel @ 2024-08-07 21:33 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
The struct data seem to get corrupted otherwise.
Possibly a validation layer or libvulkan issue.
---
libavutil/hwcontext_vulkan.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 443862be3b..07c2a970a8 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -777,7 +777,7 @@ static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts)
goto fail;
if (debug_mode) {
- VkValidationFeatureEnableEXT feat_list[] = {
+ static const VkValidationFeatureEnableEXT feat_list[] = {
VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT,
VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_RESERVE_BINDING_SLOT_EXT,
VK_VALIDATION_FEATURE_ENABLE_SYNCHRONIZATION_VALIDATION_EXT,
--
2.45.2.753.g447d99e1c3b
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 13+ messages in thread
* [FFmpeg-devel] [PATCH 09/13] vulkan_shaderc: fix error reporting for certain errors
2024-08-07 21:33 [FFmpeg-devel] [PATCH 01/13] hwcontext_vulkan: add a new mechanism to expose used queue families Lynne via ffmpeg-devel
` (6 preceding siblings ...)
2024-08-07 21:33 ` [FFmpeg-devel] [PATCH 08/13] hwcontext_vulkan: constify validation layer features table Lynne via ffmpeg-devel
@ 2024-08-07 21:33 ` Lynne via ffmpeg-devel
2024-08-07 21:33 ` [FFmpeg-devel] [PATCH 10/13] hwcontext_vulkan: enable storageBuffer16BitAccess if available Lynne via ffmpeg-devel
2024-08-07 21:35 ` [FFmpeg-devel] [PATCH 11/13] vulkan_shaderc: add debug information to shaders Lynne via ffmpeg-devel
9 siblings, 0 replies; 13+ messages in thread
From: Lynne via ffmpeg-devel @ 2024-08-07 21:33 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
The issue is that shaderc_result_get_num_errors may sometime
return 0 even when shaderc_result_get_compilation_status returns
a non-zero error code.
Since we use the result from the former, override the status
if it returned 0.
---
libavfilter/vulkan_shaderc.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/libavfilter/vulkan_shaderc.c b/libavfilter/vulkan_shaderc.c
index 38be1030ad..455e81767e 100644
--- a/libavfilter/vulkan_shaderc.c
+++ b/libavfilter/vulkan_shaderc.c
@@ -65,6 +65,9 @@ static int shdc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx,
warn = shaderc_result_get_num_warnings(res);
message = shaderc_result_get_error_message(res);
+ if (ret != shaderc_compilation_status_success && !err)
+ err = 1;
+
loglevel = err ? AV_LOG_ERROR : warn ? AV_LOG_WARNING : AV_LOG_VERBOSE;
ff_vk_shader_print(avctx, shd, loglevel);
--
2.45.2.753.g447d99e1c3b
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 13+ messages in thread
* [FFmpeg-devel] [PATCH 10/13] hwcontext_vulkan: enable storageBuffer16BitAccess if available
2024-08-07 21:33 [FFmpeg-devel] [PATCH 01/13] hwcontext_vulkan: add a new mechanism to expose used queue families Lynne via ffmpeg-devel
` (7 preceding siblings ...)
2024-08-07 21:33 ` [FFmpeg-devel] [PATCH 09/13] vulkan_shaderc: fix error reporting for certain errors Lynne via ffmpeg-devel
@ 2024-08-07 21:33 ` Lynne via ffmpeg-devel
2024-08-07 21:35 ` [FFmpeg-devel] [PATCH 11/13] vulkan_shaderc: add debug information to shaders Lynne via ffmpeg-devel
9 siblings, 0 replies; 13+ messages in thread
From: Lynne via ffmpeg-devel @ 2024-08-07 21:33 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
---
libavutil/hwcontext_vulkan.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 07c2a970a8..1e46ee2cee 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -1399,6 +1399,8 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
p->device_features_1_1.samplerYcbcrConversion = dev_features_1_1.samplerYcbcrConversion;
p->device_features_1_1.storagePushConstant16 = dev_features_1_1.storagePushConstant16;
+ p->device_features_1_1.storageBuffer16BitAccess = dev_features_1_1.storageBuffer16BitAccess;
+ p->device_features_1_1.uniformAndStorageBuffer16BitAccess = dev_features_1_1.uniformAndStorageBuffer16BitAccess;
p->device_features_1_2.timelineSemaphore = 1;
p->device_features_1_2.bufferDeviceAddress = dev_features_1_2.bufferDeviceAddress;
--
2.45.2.753.g447d99e1c3b
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 13+ messages in thread
* [FFmpeg-devel] [PATCH 11/13] vulkan_shaderc: add debug information to shaders
2024-08-07 21:33 [FFmpeg-devel] [PATCH 01/13] hwcontext_vulkan: add a new mechanism to expose used queue families Lynne via ffmpeg-devel
` (8 preceding siblings ...)
2024-08-07 21:33 ` [FFmpeg-devel] [PATCH 10/13] hwcontext_vulkan: enable storageBuffer16BitAccess if available Lynne via ffmpeg-devel
@ 2024-08-07 21:35 ` Lynne via ffmpeg-devel
2024-08-07 21:35 ` [FFmpeg-devel] [PATCH 12/13] hwcontext_vulkan: add support for VK_EXT_shader_object Lynne via ffmpeg-devel
2024-08-07 21:35 ` [FFmpeg-devel] [PATCH 13/13] vulkan: use allocator callback for buffer creation Lynne via ffmpeg-devel
9 siblings, 2 replies; 13+ messages in thread
From: Lynne via ffmpeg-devel @ 2024-08-07 21:35 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
---
libavfilter/vulkan_shaderc.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/libavfilter/vulkan_shaderc.c b/libavfilter/vulkan_shaderc.c
index 455e81767e..9e8a3d17ac 100644
--- a/libavfilter/vulkan_shaderc.c
+++ b/libavfilter/vulkan_shaderc.c
@@ -51,6 +51,7 @@ static int shdc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx,
shaderc_compile_options_set_target_env(opts, shaderc_target_env_vulkan,
shaderc_env_version_vulkan_1_2);
shaderc_compile_options_set_target_spirv(opts, shaderc_spirv_version_1_5);
+ shaderc_compile_options_set_generate_debug_info(opts);
shaderc_compile_options_set_optimization_level(opts,
shaderc_optimization_level_performance);
--
2.45.2.753.g447d99e1c3b
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 13+ messages in thread
* [FFmpeg-devel] [PATCH 12/13] hwcontext_vulkan: add support for VK_EXT_shader_object
2024-08-07 21:35 ` [FFmpeg-devel] [PATCH 11/13] vulkan_shaderc: add debug information to shaders Lynne via ffmpeg-devel
@ 2024-08-07 21:35 ` Lynne via ffmpeg-devel
2024-08-07 21:35 ` [FFmpeg-devel] [PATCH 13/13] vulkan: use allocator callback for buffer creation Lynne via ffmpeg-devel
1 sibling, 0 replies; 13+ messages in thread
From: Lynne via ffmpeg-devel @ 2024-08-07 21:35 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
We'd like to use it eventually, and its already covered by
the minimum version of the headers we require.
---
libavutil/hwcontext_vulkan.c | 14 ++++++++++++--
libavutil/vulkan_functions.h | 7 ++++++-
libavutil/vulkan_loader.h | 1 +
3 files changed, 19 insertions(+), 3 deletions(-)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 1e46ee2cee..df7f42d995 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -93,6 +93,7 @@ typedef struct VulkanDevicePriv {
VkPhysicalDeviceShaderAtomicFloatFeaturesEXT atomic_float_features;
VkPhysicalDeviceCooperativeMatrixFeaturesKHR coop_matrix_features;
VkPhysicalDeviceOpticalFlowFeaturesNV optical_flow_features;
+ VkPhysicalDeviceShaderObjectFeaturesEXT shader_object_features;
/* Queues */
pthread_mutex_t **qf_mutex;
@@ -421,6 +422,7 @@ static const VulkanOptExtension optional_device_exts[] = {
{ VK_EXT_SHADER_ATOMIC_FLOAT_EXTENSION_NAME, FF_VK_EXT_ATOMIC_FLOAT },
{ VK_KHR_COOPERATIVE_MATRIX_EXTENSION_NAME, FF_VK_EXT_COOP_MATRIX },
{ VK_NV_OPTICAL_FLOW_EXTENSION_NAME, FF_VK_EXT_OPTICAL_FLOW },
+ { VK_EXT_SHADER_OBJECT_EXTENSION_NAME, FF_VK_EXT_SHADER_OBJECT },
/* Imports/exports */
{ VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_FD_MEMORY },
@@ -1312,9 +1314,13 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
VkPhysicalDeviceTimelineSemaphoreFeatures timeline_features = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES,
};
+ VkPhysicalDeviceShaderObjectFeaturesEXT shader_object_features = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_OBJECT_FEATURES_EXT,
+ .pNext = &timeline_features,
+ };
VkPhysicalDeviceOpticalFlowFeaturesNV optical_flow_features = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_OPTICAL_FLOW_FEATURES_NV,
- .pNext = &timeline_features,
+ .pNext = &shader_object_features,
};
VkPhysicalDeviceCooperativeMatrixFeaturesKHR coop_matrix_features = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_FEATURES_KHR,
@@ -1364,7 +1370,9 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
p->coop_matrix_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_FEATURES_KHR;
p->coop_matrix_features.pNext = &p->optical_flow_features;
p->optical_flow_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_OPTICAL_FLOW_FEATURES_NV;
- p->optical_flow_features.pNext = NULL;
+ p->optical_flow_features.pNext = &p->shader_object_features;
+ p->shader_object_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_OBJECT_FEATURES_EXT;
+ p->shader_object_features.pNext = NULL;
ctx->free = vulkan_device_free;
@@ -1432,6 +1440,8 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
p->optical_flow_features.opticalFlow = optical_flow_features.opticalFlow;
+ p->shader_object_features.shaderObject = shader_object_features.shaderObject;
+
dev_info.pNext = &hwctx->device_features;
/* Setup queue family */
diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h
index 3f0b96f77e..20711b130d 100644
--- a/libavutil/vulkan_functions.h
+++ b/libavutil/vulkan_functions.h
@@ -47,6 +47,7 @@ typedef enum FFVulkanExtensions {
FF_VK_EXT_ATOMIC_FLOAT = 1ULL << 15, /* VK_EXT_shader_atomic_float */
FF_VK_EXT_COOP_MATRIX = 1ULL << 16, /* VK_KHR_cooperative_matrix */
FF_VK_EXT_OPTICAL_FLOW = 1ULL << 17, /* VK_NV_optical_flow */
+ FF_VK_EXT_SHADER_OBJECT = 1ULL << 18, /* VK_EXT_shader_object */
FF_VK_EXT_NO_FLAG = 1ULL << 31,
} FFVulkanExtensions;
@@ -219,7 +220,11 @@ typedef enum FFVulkanExtensions {
\
/* Shaders */ \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateShaderModule) \
- MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyShaderModule)
+ MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyShaderModule) \
+ MACRO(1, 1, FF_VK_EXT_SHADER_OBJECT, CmdBindShadersEXT) \
+ MACRO(1, 1, FF_VK_EXT_SHADER_OBJECT, CreateShadersEXT) \
+ MACRO(1, 1, FF_VK_EXT_SHADER_OBJECT, DestroyShaderEXT) \
+ MACRO(1, 1, FF_VK_EXT_SHADER_OBJECT, GetShaderBinaryDataEXT)
/* Macro containing every win32 specific function that we utilize in our codebase */
#define FN_LIST_WIN32(MACRO) \
diff --git a/libavutil/vulkan_loader.h b/libavutil/vulkan_loader.h
index bb22591957..befed51860 100644
--- a/libavutil/vulkan_loader.h
+++ b/libavutil/vulkan_loader.h
@@ -50,6 +50,7 @@ static inline uint64_t ff_vk_extensions_to_mask(const char * const *extensions,
{ VK_EXT_SHADER_ATOMIC_FLOAT_EXTENSION_NAME, FF_VK_EXT_ATOMIC_FLOAT },
{ VK_KHR_COOPERATIVE_MATRIX_EXTENSION_NAME, FF_VK_EXT_COOP_MATRIX },
{ VK_NV_OPTICAL_FLOW_EXTENSION_NAME, FF_VK_EXT_OPTICAL_FLOW },
+ { VK_EXT_SHADER_OBJECT_EXTENSION_NAME, FF_VK_EXT_SHADER_OBJECT },
#ifdef _WIN32
{ VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_MEMORY },
{ VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_SEM },
--
2.45.2.753.g447d99e1c3b
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 13+ messages in thread
* [FFmpeg-devel] [PATCH 13/13] vulkan: use allocator callback for buffer creation
2024-08-07 21:35 ` [FFmpeg-devel] [PATCH 11/13] vulkan_shaderc: add debug information to shaders Lynne via ffmpeg-devel
2024-08-07 21:35 ` [FFmpeg-devel] [PATCH 12/13] hwcontext_vulkan: add support for VK_EXT_shader_object Lynne via ffmpeg-devel
@ 2024-08-07 21:35 ` Lynne via ffmpeg-devel
1 sibling, 0 replies; 13+ messages in thread
From: Lynne via ffmpeg-devel @ 2024-08-07 21:35 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
This would've let to a segfault if custom allocators were used.
---
libavutil/vulkan.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index df7758cc1e..7b45e43a89 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -855,7 +855,7 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
.pNext = &ded_req,
};
- ret = vk->CreateBuffer(s->hwctx->act_dev, &buf_spawn, NULL, &buf->buf);
+ ret = vk->CreateBuffer(s->hwctx->act_dev, &buf_spawn, s->hwctx->alloc, &buf->buf);
if (ret != VK_SUCCESS) {
av_log(s, AV_LOG_ERROR, "Failed to create buffer: %s\n",
ff_vk_ret2str(ret));
--
2.45.2.753.g447d99e1c3b
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 13+ messages in thread
end of thread, other threads:[~2024-08-07 21:35 UTC | newest]
Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-08-07 21:33 [FFmpeg-devel] [PATCH 01/13] hwcontext_vulkan: add a new mechanism to expose used queue families Lynne via ffmpeg-devel
2024-08-07 21:33 ` [FFmpeg-devel] [PATCH 02/13] vulkan: use the new queue family mechanism Lynne via ffmpeg-devel
2024-08-07 21:33 ` [FFmpeg-devel] [PATCH 03/13] hwcontext_vulkan: rewrite queue picking system for the new API Lynne via ffmpeg-devel
2024-08-07 21:33 ` [FFmpeg-devel] [PATCH 04/13] hwcontext_vulkan: initialize optical flow queues if available Lynne via ffmpeg-devel
2024-08-07 21:33 ` [FFmpeg-devel] [PATCH 05/13] vulkan_video: remove NIH pooled buffer implementation Lynne via ffmpeg-devel
2024-08-07 21:33 ` [FFmpeg-devel] [PATCH 06/13] hwcontext_vulkan: remove unused struct Lynne via ffmpeg-devel
2024-08-07 21:33 ` [FFmpeg-devel] [PATCH 07/13] hwcontext_vulkan: rewrite upload/download Lynne via ffmpeg-devel
2024-08-07 21:33 ` [FFmpeg-devel] [PATCH 08/13] hwcontext_vulkan: constify validation layer features table Lynne via ffmpeg-devel
2024-08-07 21:33 ` [FFmpeg-devel] [PATCH 09/13] vulkan_shaderc: fix error reporting for certain errors Lynne via ffmpeg-devel
2024-08-07 21:33 ` [FFmpeg-devel] [PATCH 10/13] hwcontext_vulkan: enable storageBuffer16BitAccess if available Lynne via ffmpeg-devel
2024-08-07 21:35 ` [FFmpeg-devel] [PATCH 11/13] vulkan_shaderc: add debug information to shaders Lynne via ffmpeg-devel
2024-08-07 21:35 ` [FFmpeg-devel] [PATCH 12/13] hwcontext_vulkan: add support for VK_EXT_shader_object Lynne via ffmpeg-devel
2024-08-07 21:35 ` [FFmpeg-devel] [PATCH 13/13] vulkan: use allocator callback for buffer creation Lynne via ffmpeg-devel
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git