Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
* [FFmpeg-devel] [PATCH v1 5/8] avformat/mov_muxer: Extended MOV muxer to handle APV video content
       [not found] <CGME20250423141306eucas1p229fa078339a2a993c609464e101c9c6d@eucas1p2.samsung.com>
@ 2025-04-23 14:13 ` Dawid Kozinski
  2025-04-23 14:43   ` James Almer
  2025-04-23 21:08   ` Mark Thompson
  0 siblings, 2 replies; 7+ messages in thread
From: Dawid Kozinski @ 2025-04-23 14:13 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Dawid Kozinski

- Changes in mov_write_video_tag function to handle APV elementary stream
- Provided structure APVDecoderConfigurationRecord that specifies the decoder configuration information for APV video content

Signed-off-by: Dawid Kozinski <d.kozinski@samsung.com>
---
 libavformat/Makefile    |   2 +-
 libavformat/apv.c       | 827 ++++++++++++++++++++++++++++++++++++++++
 libavformat/apv.h       |  94 +++++
 libavformat/isom_tags.c |   2 +
 libavformat/movenc.c    |  47 +++
 5 files changed, 971 insertions(+), 1 deletion(-)
 create mode 100644 libavformat/apv.c
 create mode 100644 libavformat/apv.h

diff --git a/libavformat/Makefile b/libavformat/Makefile
index 5a4ffc1568..004ab9fb6a 100644
--- a/libavformat/Makefile
+++ b/libavformat/Makefile
@@ -379,7 +379,7 @@ OBJS-$(CONFIG_MOV_DEMUXER)               += mov.o mov_chan.o mov_esds.o \
 OBJS-$(CONFIG_MOV_MUXER)                 += movenc.o \
                                             movenchint.o mov_chan.o rtp.o \
                                             movenccenc.o movenc_ttml.o rawutils.o \
-                                            dovi_isom.o evc.o cbs.o cbs_av1.o
+                                            dovi_isom.o evc.o cbs.o cbs_av1.o apv.o
 OBJS-$(CONFIG_MP2_MUXER)                 += rawenc.o
 OBJS-$(CONFIG_MP3_DEMUXER)               += mp3dec.o replaygain.o
 OBJS-$(CONFIG_MP3_MUXER)                 += mp3enc.o rawenc.o id3v2enc.o
diff --git a/libavformat/apv.c b/libavformat/apv.c
new file mode 100644
index 0000000000..9c71b39022
--- /dev/null
+++ b/libavformat/apv.c
@@ -0,0 +1,827 @@
+/*
+ * APV helper functions for muxers
+ * Copyright (c) 2025 Dawid Kozinski <d.kozinski@samsung.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+ #include <stdbool.h>
+
+#include "libavutil/intreadwrite.h"
+#include "libavutil/mem.h"
+
+#include "libavcodec/get_bits.h"
+#include "libavcodec/golomb.h"
+#include "libavcodec/apv.h"
+#include "avformat.h"
+#include "avio.h"
+#include "apv.h"
+#include "avio_internal.h"
+
+
+#define TILE_COLS_MAX 20
+#define TILE_ROWS_MAX 20
+#define NUM_TILES_MAX TILE_COLS_MAX*TILE_ROWS_MAX 
+
+#define NUM_COMP_MAX 4
+
+#define MB_WIDTH    16
+#define MB_HEIGHT   16
+
+/*****************************************************************************
+ * PBU types
+ *****************************************************************************/
+#define APV_PBU_TYPE_RESERVED          (0)
+#define APV_PBU_TYPE_PRIMARY_FRAME     (1)
+#define APV_PBU_TYPE_NON_PRIMARY_FRAME (2)
+#define APV_PBU_TYPE_PREVIEW_FRAME     (25)
+#define APV_PBU_TYPE_DEPTH_FRAME       (26)
+#define APV_PBU_TYPE_ALPHA_FRAME       (27)
+#define APV_PBU_TYPE_AU_INFO           (65)
+#define APV_PBU_TYPE_METADATA          (66)
+#define APV_PBU_TYPE_FILLER            (67)
+#define APV_PBU_TYPE_UNKNOWN           (-1)
+#define APV_PBU_NUMS                   (10)
+
+#define APV_FRAME_TYPE_PRIMARY_FRAME     (0)
+#define APV_FRAME_TYPE_NON_PRIMARY_FRAME (1)
+#define APV_FRAME_TYPE_PREVIEW_FRAME     (2)
+#define APV_FRAME_TYPE_DEPTH_FRAME       (3)
+#define APV_FRAME_TYPE_ALPHA_FRAME       (4)
+#define APV_FRAME_TYPE_NON_FRAME         (-1)
+#define APV_PBU_FRAME_TYPE_NUM           (5)
+#define CONFIGURATIONS_MAX                         (APV_PBU_FRAME_TYPE_NUM)
+
+typedef struct APVDecoderFrameInfo {
+    uint8_t reserved_zero_6bits;                    // 6 bits
+    uint8_t color_description_present_flag;         // 1 bit
+
+    // The variable indicates whether the capture_time_distance value in the APV bitstream's frame header should be ignored during playback.
+    // If capture_time_distance_ignored is set to true, the capture_time_distance information will not be utilized,
+    // and timing information for playback should be calculated using an alternative method.
+    // If set to false, the capture_time_distance value will be used as is from the frame header.
+    // It is recommended to set this variable to true, allowing the use of MP4 timestamps for playback and recording,
+    // which enables the conventional compression and playback methods based on the timestamp table defined by the ISO-based file format.
+    uint8_t capture_time_distance_ignored;          // 1-bit
+
+    uint8_t profile_idc;                            // 8 bits 
+    uint8_t level_idc;                              // 8 bits
+    uint8_t band_idc;                               // 8 bits
+    uint32_t frame_width;                           // 32 bits
+    uint32_t frame_height;                          // 32 bits
+    uint8_t chroma_format_idc;                      // 4 bits
+    uint8_t bit_depth_minus8;                       // 4 bits
+    uint8_t capture_time_distance;                  // 8 bits
+
+    // if (color_description_present_flag)
+    uint8_t color_primaries;                        // 8 bits
+    uint8_t transfer_characteristics;               // 8 bits
+    uint8_t matrix_coefficients;                    // 8 bits
+    uint8_t full_range_flag;                        // 1 bit
+    uint8_t reserved_zero_7bits;                    // 7 bits
+
+} APVDecoderFrameInfo;
+
+typedef struct APVDecoderConfigurationEntry {
+    uint8_t pbu_type;                   // 8 bits
+    uint8_t number_of_frame_info;       // 8 bits
+
+    APVDecoderFrameInfo** frame_info;   // An array of size number_of_frame_info storing elements of type APVDecoderFrameInfo*
+
+} APVDecoderConfigurationEntry;
+
+// ISOBMFF binding for APV
+// @see https://github.com/openapv/openapv/blob/main/readme/apv_isobmff.md
+typedef struct APVDecoderConfigurationRecord  {
+    uint8_t configurationVersion;           // 8 bits
+    uint8_t number_of_configuration_entry;  // 8 bits
+
+    APVDecoderConfigurationEntry configuration_entry[CONFIGURATIONS_MAX]; // table of size number_of_configuration_entry
+
+} APVDecoderConfigurationRecord ;
+
+// 5.3.6. Frame information
+// @see https://www.ietf.org/archive/id/draft-lim-apv-04.html#name-frame-information
+typedef struct frame_info_t {
+    uint8_t profile_idc;            // 8 bits
+    uint8_t level_idc;              // 8 bits
+    uint8_t band_idc;               // 3 bits
+    uint8_t reserved_zero_5bits;    // 5 bits
+
+    uint32_t frame_width;           // 24 bits
+    uint32_t frame_height;          // 24 bits
+
+    uint8_t chroma_format_idc;      // 4 bits
+    uint8_t bit_depth_minus8;       // 4 bits
+
+    uint8_t capture_time_distance;  // 8 bits
+    uint8_t reserved_zero_8bit;     // 8 bits
+} frame_info_t;
+
+// 5.3.7. Quantization matrix 
+// @see https://www.ietf.org/archive/id/draft-lim-apv-04.html#name-quantization-matrix
+typedef struct quantization_matrix_t {
+    uint8_t q_matrix[3][8][8]; // @todo use NumCmp instead of 3
+} quantization_matrix_t;
+
+// 5.3.8. Tile info
+// @see https://www.ietf.org/archive/id/draft-lim-apv-04.html#name-tile-info
+typedef struct tile_info_t {
+    uint32_t tile_width_in_mbs;                             // u(20)
+    uint32_t tile_height_in_mbs;                            // u(20)
+    uint8_t tile_size_present_in_fh_flag;                   // u(1)
+    uint32_t tile_size_in_fh[TILE_ROWS_MAX*TILE_COLS_MAX];  // u(32) -> size of table: NumTiles
+
+    // private
+    uint32_t ColStarts[TILE_COLS_MAX];
+    uint32_t RowStarts[TILE_ROWS_MAX];
+
+    uint32_t TileCols; // The value of TileCols MUST be less than or equal to TILE_COLS_MAX.
+    uint32_t TileRows; // The value of TileRows MUST be less than or equal to TILE_ROWS_MAX.
+    uint32_t NumTiles;
+
+} tile_info_t;
+
+
+// 5.3.17. Byte alignment
+// @see https://www.ietf.org/archive/id/draft-lim-apv-04.html#name-byte-alignment
+typedef struct byte_alignment_t {
+    uint8_t alignment_bit_equal_to_zero; /* equal to 0*/ // f(1)
+} byte_alignment_t;
+
+// 5.3.5. Frame header
+// @see https://www.ietf.org/archive/id/draft-lim-apv-04.html#name-frame-header
+typedef struct frame_header_t {
+    frame_info_t frame_info;
+    uint8_t reserved_zero_8bits;                // 8 bits
+    uint8_t color_description_present_flag;     // 1 bits
+
+    uint8_t color_primaries;                    // 8 bits
+    uint8_t transfer_characteristics;           // 8 bits
+    uint8_t matrix_coefficients;                // 8 bits
+    uint8_t full_range_flag;                    // 1 bit
+
+    uint8_t use_q_matrix;                       // 1 bit
+
+    quantization_matrix_t quantization_matrix;
+    tile_info_t tile_info;
+
+    uint8_t reserved_zero_8bits_2;              // 8 bits
+    byte_alignment_t byte_alignment_t;
+} frame_header_t;
+
+// 5.3.11. Filler
+// @see https://www.ietf.org/archive/id/draft-lim-apv-04.html#name-filler
+typedef struct filler_t {
+    uint8_t *ff_byte; /* is a byte equal to 0xFF */ // f(8)
+} filler_t;
+
+// 5.3.13. Tile header
+// @see https://www.ietf.org/archive/id/draft-lim-apv-04.html#name-tile-header
+typedef struct tile_header_t {
+    uint16_t tile_header_size;                  // u(16)
+    uint16_t tile_index;                        // u(16)
+    uint32_t tile_data_size[NUM_COMP_MAX];      // u(32) table of size NumComps MAX = 4
+    uint8_t tile_qp[NUM_COMP_MAX];              // u(8) table of size NumComps
+    uint8_t reserved_zero_8bits;                // u(8)
+    byte_alignment_t byte_alignment;
+
+} tile_header_t;
+
+// 5.3.16. AC coefficient coding
+// @see https://www.ietf.org/archive/id/draft-lim-apv-04.html#name-ac-coefficient-coding
+typedef struct   {
+    uint8_t coeff_zero_run;
+    uint8_t abs_ac_coeff_minus1;
+    uint8_t sign_ac_coeff;
+} ac_coeff_coding_t;
+
+// 5.3.15. Macroblock layer
+// @see https://www.ietf.org/archive/id/draft-lim-apv-04.html#name-macroblock-layer
+typedef struct macroblock_layer_t {
+    uint8_t abs_dc_coeff_diff;  // h(v)
+    uint8_t sign_dc_coeff_diff; // u(1)
+    ac_coeff_coding_t ac_coeff_coding;
+} macroblock_layer_t;
+
+// 5.3.14. Tile data
+// @see https://www.ietf.org/archive/id/draft-lim-apv-04.html#name-tile-data
+typedef struct tile_data_t {
+    macroblock_layer_t macroblock_layer;
+    byte_alignment_t byte_alignment;
+} tile_data_t;
+
+
+// 5.3.11. Filler
+// @see https://www.ietf.org/archive/id/draft-lim-apv-04.html#name-filler
+typedef struct tile_t {
+    tile_header_t tile_header;
+    uint8_t tile_data;
+    uint8_t ff_byte; /* is a byte equal to 0xFF */ // f(8)
+} tile_t;
+
+// 5.3.4 Frame
+// @see https://www.ietf.org/archive/id/draft-lim-apv-04.html#name-frame
+typedef struct frame_t {
+    frame_header_t frame_header;
+    uint32_t tile_size[NUM_TILES_MAX];           // table of NumTiles size
+    tile_t tile[NUM_TILES_MAX];                   // table of NumTiles size
+    filler_t filler; 
+} frame_t;
+
+// 5.3.3. Primitive bitstream unit header
+// @see https://www.ietf.org/archive/id/draft-lim-apv-04.html#name-primitive-bitstream-unit-he
+typedef struct pbu_header_t {
+    uint8_t pbu_type;               // 8 bits
+    uint16_t group_id;              // 16 bits
+    uint8_t reserved_zero_8bits;    // 8 bits
+} pbu_header_t;
+
+// 5.3.9. Access unit information 
+// @see https://www.ietf.org/archive/id/draft-lim-apv-04.html#name-access-unit-information
+typedef struct au_info_t {
+    uint16_t num_frames;            // 16 bits
+
+    uint8_t pbu_type;               // 8 bits
+    uint16_t group_id;              // 16 bits
+    uint8_t reserved_zero_8bits;    // 8 bits
+    frame_info_t frame_info;
+    
+    uint8_t reserved_zero_8bits_2;  // 8 bits
+    byte_alignment_t byte_alignment;
+
+} au_info_t;
+
+typedef struct {
+    uint64_t high;
+    uint64_t low;
+} uint128_t;
+
+// 5.3.10. Metadata 
+// @see https://www.ietf.org/archive/id/draft-lim-apv-04.html#name-metadata
+typedef struct metadata_t {
+    uint16_t metadata_size;             // 32 bits
+
+    /* is a byte equal to 0xFF */   
+    uint8_t *ff_byte;                   // f(8) table of size payloadSize
+    uint8_t metadata_payload_type;      // u(8)
+
+    /* is a byte equal to 0xFF */
+    uint8_t ff_byte_2;                  // f(8)
+    uint8_t metadata_payload_size;      // u(8)
+
+    // metadata ITU-T T.35
+    uint8_t itu_t_t35_country_code;             //  u(8)
+    uint8_t itu_t_t35_country_code_extension;   //  u(8)
+    uint8_t itu_t_t35_payload;                  //  u(8)
+
+    // metadata mdcv (Mastering display color volume metadata)
+    uint16_t primary_chromaticity_x[3]; // u(16)
+    uint16_t primary_chromaticity_y[3]; // u(16)
+    uint16_t white_point_chromaticity_x; // u(16)
+    uint16_t white_point_chromaticity_y; // u(16)
+    uint32_t max_mastering_luminance;    // u(32)
+    uint32_t min_mastering_luminance;    // u(32)
+
+    // metadata ccl (Content light level information metadata)
+    uint16_t max_cll;  // u(16)
+    uint16_t max_fall; // u(16)
+
+    // User defined metadata
+    uint128_t uuid;
+    uint8_t *user_defined_data_payload; // table of size payloadSize - 16
+
+    // Undefined metadata
+    uint8_t *undefined_metadata_payload_byte; // table of size payloadSize
+
+    filler_t filler;
+
+    uint8_t alignment_bit_equal_to_zero;
+} metadata_t;
+
+// 5.3.2. Primitive bitstream unit
+// @see https://www.ietf.org/archive/id/draft-lim-apv-04.html#name-primitive-bitstream-unit
+typedef struct pbu_t {
+    pbu_header_t pbu_header;
+    frame_t frame;
+    au_info_t au_info;
+    metadata_t metadata;
+    filler_t filler;
+} pbu_t;
+
+static int apv_read_pbu_header(GetBitContext *gb, pbu_header_t *pbu_header)
+{
+    pbu_header->pbu_type = get_bits(gb, 8);
+    pbu_header->group_id = get_bits(gb, 16);
+    pbu_header->reserved_zero_8bits = get_bits(gb, 8);
+
+    return 0;
+}
+
+// @see https://www.ietf.org/archive/id/draft-lim-apv-04.html#name-frame-information
+static int apv_read_frame_info(GetBitContext *gb, frame_info_t *frame_info)
+{
+    uint8_t reserved_zero_5bits;
+    uint8_t reserved_zero_8bits;
+
+    frame_info->profile_idc = get_bits(gb, 8);
+    frame_info->level_idc = get_bits(gb, 8);
+    frame_info->band_idc = get_bits(gb, 3);
+    
+    reserved_zero_5bits =  get_bits(gb, 5);
+
+    frame_info->frame_width = get_bits(gb,24);
+    frame_info->frame_height = get_bits(gb,24);
+
+    frame_info->chroma_format_idc = get_bits(gb, 4);
+    frame_info->bit_depth_minus8 = get_bits(gb, 4);
+
+    frame_info->capture_time_distance = get_bits(gb, 8);
+    reserved_zero_8bits = get_bits(gb, 8);
+    
+    if (reserved_zero_5bits != 0x00)
+        return -1;  
+
+    if (reserved_zero_8bits != 0x00)
+        return -1;  
+
+    return 0;
+}
+
+static int apv_read_frame_header(GetBitContext *gb, frame_header_t *frame_header)
+{
+    uint8_t reserved_zero_8bits[2];
+
+    apv_read_frame_info(gb, &frame_header->frame_info);
+    
+    reserved_zero_8bits[0] = get_bits(gb, 8);
+    if(reserved_zero_8bits[0]!=0) {
+        return -1;
+    }
+
+    frame_header->color_description_present_flag = get_bits(gb, 1);
+    if(frame_header->color_description_present_flag) {
+        frame_header->color_primaries = get_bits(gb, 8);
+        frame_header->transfer_characteristics = get_bits(gb, 8);
+        frame_header->matrix_coefficients = get_bits(gb, 8);
+        frame_header->full_range_flag = get_bits(gb, 1);
+    }
+
+    // The further parsing of AU is not needed
+
+    return 0;
+}
+
+static inline uint32_t apv_read_pbu_size(const uint8_t *bits, int bits_size)
+{
+    uint32_t pbu_size = 0;
+
+    if (bits_size < APV_PBU_SIZE_PREFIX_LENGTH) {
+        av_log(NULL, AV_LOG_ERROR, "Can't read PBU (primitive bitstream unit) size\n");
+        return 0;
+    }
+
+    pbu_size = AV_RB32(bits);
+
+    return pbu_size;
+}
+
+static int apv_read_au_info(GetBitContext *gb, au_info_t *au_info)
+{
+    int ret = 0;
+    au_info->num_frames = get_bits(gb, 16);
+
+    for(int idx=0; idx<au_info->num_frames; idx++) {
+        au_info->pbu_type = get_bits(gb, 8);
+        au_info->group_id = get_bits(gb, 16);
+        au_info->reserved_zero_8bits = get_bits(gb, 8);
+        
+        ret = apv_read_frame_info(gb, &au_info->frame_info);
+        if(ret!=0) return ret;
+    }
+    au_info->reserved_zero_8bits_2 = get_bits(gb, 8);
+    if(au_info->reserved_zero_8bits_2!=0) return -1;
+
+    // The further parsing of AU Info is not needed
+
+    return ret;
+}
+
+static int apv_read_frame(GetBitContext *gb, frame_t *frame)
+{
+    apv_read_frame_header(gb, &frame->frame_header);
+
+    // The further parsing of frame is not needed
+
+    return 0;
+}
+
+// primitive bytestream unit
+static int apv_read_pbu(const uint8_t *bs, int bs_size, pbu_t *pbu)
+{
+    GetBitContext gb;
+    
+    int ret = init_get_bits8(&gb, bs, bs_size);
+    if (ret < 0)
+        return ret;
+
+    ret = apv_read_pbu_header(&gb, &pbu->pbu_header);
+    if (ret < 0)
+        return ret;
+    if(( 1 <= pbu->pbu_header.pbu_type && pbu->pbu_header.pbu_type <= 2) ||
+       ( 25 <= pbu->pbu_header.pbu_type  && pbu->pbu_header.pbu_type  <= 27) ) {
+        ret = apv_read_frame(&gb, &pbu->frame);
+    } else if(pbu->pbu_header.pbu_type ==65) {
+        ret = apv_read_au_info(&gb, &pbu->au_info);
+    } else  {
+        ret = -1;
+    }
+
+    return ret;
+}
+
+static int apv_number_of_pbu_entry(const uint8_t *data, uint32_t au_size) {
+    uint32_t currReadSize = 0;
+    int ret = 0;
+    
+    do {
+        uint32_t pbu_size = apv_read_pbu_size(data + currReadSize, APV_PBU_SIZE_PREFIX_LENGTH);
+        if (pbu_size == 0) return -1;
+
+        currReadSize += APV_PBU_SIZE_PREFIX_LENGTH;
+        currReadSize += pbu_size;
+        ret++;
+    } while(au_size > currReadSize);
+
+    return ret;
+}
+
+static void apvc_init(APVDecoderConfigurationRecord * apvc)
+{
+    memset(apvc, 0, sizeof(APVDecoderConfigurationRecord ));
+    apvc->configurationVersion = 1;
+}
+
+static void apvc_close(APVDecoderConfigurationRecord *apvc)
+{
+    for(int i=0;i<apvc->number_of_configuration_entry;i++) {
+        for(int j=0;j<apvc->configuration_entry[i].number_of_frame_info;j++) {
+            free(apvc->configuration_entry[i].frame_info[j]);
+        } 
+        free(apvc->configuration_entry[i].frame_info);
+        apvc->configuration_entry[i].number_of_frame_info = 0;
+    }
+    apvc->number_of_configuration_entry = 0;
+}
+
+static int apvc_write(AVIOContext *pb, APVDecoderConfigurationRecord * apvc)
+{
+    av_log(NULL, AV_LOG_TRACE, "configurationVersion:                           %"PRIu8"\n", 
+    apvc->configurationVersion);
+    
+    av_log(NULL, AV_LOG_TRACE, "number_of_configuration_entry:                  %"PRIu8"\n", 
+    apvc->number_of_configuration_entry);
+
+    for(int i=0; i<apvc->number_of_configuration_entry;i++) {
+        av_log(NULL, AV_LOG_TRACE, "pbu_type:                                   %"PRIu8"\n", 
+        apvc->configuration_entry[i].pbu_type);
+
+        av_log(NULL, AV_LOG_TRACE, "number_of_frame_info:                       %"PRIu8"\n", 
+        apvc->configuration_entry[i].number_of_frame_info);
+
+        for(int j=0; j < apvc->configuration_entry[i].number_of_frame_info; j++) {
+            av_log(NULL, AV_LOG_TRACE, "color_description_present_flag:         %"PRIu8"\n", 
+            apvc->configuration_entry[i].frame_info[j]->color_description_present_flag);
+
+            av_log(NULL, AV_LOG_TRACE, "capture_time_distance_ignored:          %"PRIu8"\n", 
+            apvc->configuration_entry[i].frame_info[j]->capture_time_distance_ignored);
+
+            av_log(NULL, AV_LOG_TRACE, "profile_idc:                            %"PRIu8"\n", 
+            apvc->configuration_entry[i].frame_info[j]->profile_idc);
+
+            av_log(NULL, AV_LOG_TRACE, "level_idc:                              %"PRIu8"\n", 
+            apvc->configuration_entry[i].frame_info[j]->level_idc);
+
+            av_log(NULL, AV_LOG_TRACE, "band_idc:                               %"PRIu8"\n", 
+            apvc->configuration_entry[i].frame_info[j]->band_idc);
+
+            av_log(NULL, AV_LOG_TRACE, "frame_width:                            %"PRIu32"\n", 
+            apvc->configuration_entry[i].frame_info[j]->frame_width);
+
+            av_log(NULL, AV_LOG_TRACE, "frame_height:                           %"PRIu32"\n", 
+            apvc->configuration_entry[i].frame_info[j]->frame_height);
+
+            av_log(NULL, AV_LOG_TRACE, "chroma_format_idc:                      %"PRIu8"\n", 
+            apvc->configuration_entry[i].frame_info[j]->chroma_format_idc);
+
+            av_log(NULL, AV_LOG_TRACE, "bit_depth_minus8:                       %"PRIu8"\n", 
+            apvc->configuration_entry[i].frame_info[j]->bit_depth_minus8);
+
+            av_log(NULL, AV_LOG_TRACE, "capture_time_distance:                  %"PRIu8"\n", 
+            apvc->configuration_entry[i].frame_info[j]->capture_time_distance);
+
+            if(apvc->configuration_entry[i].frame_info[j]->color_description_present_flag) {
+          
+                av_log(NULL, AV_LOG_TRACE, "color_primaries:                    %"PRIu8"\n", 
+                apvc->configuration_entry[i].frame_info[j]->color_primaries);
+                
+                av_log(NULL, AV_LOG_TRACE, "transfer_characteristics:           %"PRIu8"\n", 
+                apvc->configuration_entry[i].frame_info[j]->transfer_characteristics);
+                
+                av_log(NULL, AV_LOG_TRACE, "matrix_coefficients:                %"PRIu8"\n", 
+                apvc->configuration_entry[i].frame_info[j]->matrix_coefficients);
+
+                av_log(NULL, AV_LOG_TRACE, "full_range_flag:                    %"PRIu8"\n", 
+                apvc->configuration_entry[i].frame_info[j]->full_range_flag);
+            }
+
+        }
+    }
+    
+    /* unsigned int(8) configurationVersion = 1; */
+    avio_w8(pb, apvc->configurationVersion);
+
+    avio_w8(pb, apvc->number_of_configuration_entry);
+    
+    for(int i=0; i<apvc->number_of_configuration_entry;i++) {
+        avio_w8(pb, apvc->configuration_entry[i].pbu_type);
+        avio_w8(pb, apvc->configuration_entry[i].number_of_frame_info);
+
+        for(int j=0; j < apvc->configuration_entry[i].number_of_frame_info; j++) {
+
+            /* unsigned int(6) reserved_zero_6bits
+            * unsigned int(1) color_description_present_flag
+            * unsigned int(1) capture_time_distance_ignored
+            */
+            avio_w8(pb, apvc->configuration_entry[i].frame_info[j]->reserved_zero_6bits << 2 |
+                        apvc->configuration_entry[i].frame_info[j]->color_description_present_flag << 1 | 
+                        apvc->configuration_entry[i].frame_info[j]->capture_time_distance_ignored);
+            
+            /* unsigned int(8) profile_idc */
+            avio_w8(pb, apvc->configuration_entry[i].frame_info[j]->profile_idc);
+
+            /* unsigned int(8) level_idc */
+            avio_w8(pb, apvc->configuration_entry[i].frame_info[j]->level_idc);
+
+            /* unsigned int(8) band_idc */
+            avio_w8(pb, apvc->configuration_entry[i].frame_info[j]->band_idc);
+            
+            /* unsigned int(32) frame_width_minus1 */
+            avio_wb32(pb, apvc->configuration_entry[i].frame_info[j]->frame_width);
+
+            /* unsigned int(32) frame_height_minus1 */
+            avio_wb32(pb, apvc->configuration_entry[i].frame_info[j]->frame_height);
+
+            /* unsigned int(4) chroma_format_idc */
+            /* unsigned int(4) bit_depth_minus8 */
+            avio_w8(pb, apvc->configuration_entry[i].frame_info[j]->chroma_format_idc << 4 | apvc->configuration_entry[i].frame_info[j]->bit_depth_minus8);
+
+            /* unsigned int(8) capture_time_distance */
+            avio_w8(pb, apvc->configuration_entry[i].frame_info[j]->capture_time_distance);
+
+            if(apvc->configuration_entry[i].frame_info[j]->color_description_present_flag) {
+                /* unsigned int(8) color_primaries */
+                avio_w8(pb, apvc->configuration_entry[i].frame_info[j]->color_primaries);
+                
+                /* unsigned int(8) transfer_characteristics */
+                avio_w8(pb, apvc->configuration_entry[i].frame_info[j]->transfer_characteristics);
+
+                /* unsigned int(8) matrix_coefficients */
+                avio_w8(pb, apvc->configuration_entry[i].frame_info[j]->matrix_coefficients);
+
+                /* unsigned int(1) full_range_flag */
+                avio_w8(pb, apvc->configuration_entry[i].frame_info[j]->full_range_flag << 7 |
+                            apvc->configuration_entry[i].frame_info[j]->reserved_zero_7bits);
+            }
+        }
+    }
+
+    return 0;
+}
+
+int ff_isom_write_apvc(AVIOContext *pb, const uint8_t *data,
+                       int size, int ps_array_completeness)
+{
+    APVDecoderConfigurationRecord *apvc = (APVDecoderConfigurationRecord *)data;
+    int ret = 0;
+    
+    if (size < 8) {
+        /* We can't write a valid apvC from the provided data */
+        return AVERROR_INVALIDDATA;
+    }
+
+    if(size!=sizeof(APVDecoderConfigurationRecord)) return -1;
+    ret = apvc_write(pb, apvc);
+
+    apvc_close(apvc);
+    return ret;
+}
+
+static int apv_add_frameinfo(APVDecoderConfigurationEntry *configuration_entry, APVDecoderFrameInfo *frame_info) {
+    APVDecoderFrameInfo **temp = NULL;
+    if(configuration_entry->number_of_frame_info == 0) {
+        temp = (APVDecoderFrameInfo **)malloc(sizeof(APVDecoderFrameInfo*));
+        if (temp == NULL) {
+            return AVERROR_INVALIDDATA;
+        }
+    } else {
+        temp = (APVDecoderFrameInfo **)realloc(configuration_entry->frame_info, (configuration_entry->number_of_frame_info + 1) * sizeof(APVDecoderFrameInfo*));
+        if (temp == NULL) {
+            return AVERROR_INVALIDDATA;
+        }
+    }
+    
+    temp[configuration_entry->number_of_frame_info] = (APVDecoderFrameInfo*)malloc(sizeof(APVDecoderFrameInfo));
+    memcpy(temp[configuration_entry->number_of_frame_info], frame_info, sizeof(APVDecoderFrameInfo));
+
+    configuration_entry->frame_info = temp;
+
+    configuration_entry->number_of_frame_info++;
+
+    return 0;
+}
+
+static bool apv_cmp_frameinfo(const APVDecoderFrameInfo *a, const APVDecoderFrameInfo *b) {
+    if (a->reserved_zero_6bits != b->reserved_zero_6bits) return false;
+    if (a->color_description_present_flag != b->color_description_present_flag) return false;
+    if (a->capture_time_distance_ignored != b->capture_time_distance_ignored) return false;
+    if (a->profile_idc != b->profile_idc) return false;
+    if (a->level_idc != b->level_idc) return false;
+    if (a->band_idc != b->band_idc) return false;
+    if (a->frame_width != b->frame_width) return false;
+    if (a->frame_height != b->frame_height) return false;
+    if (a->chroma_format_idc != b->chroma_format_idc) return false;
+    if (a->bit_depth_minus8 != b->bit_depth_minus8) return false;
+    if (a->capture_time_distance != b->capture_time_distance) return false;
+    if (a->color_primaries != b->color_primaries) return false;
+    if (a->transfer_characteristics != b->transfer_characteristics) return false;
+    if (a->matrix_coefficients != b->matrix_coefficients) return false;
+    if (a->full_range_flag != b->full_range_flag) return false;
+    if (a->reserved_zero_7bits != b->reserved_zero_7bits) return false;
+
+    return true;
+}
+
+static int apv_set_frameinfo(APVDecoderFrameInfo* frame_info, const pbu_t *pbu) {
+    frame_info->reserved_zero_6bits = 0;
+
+    frame_info->color_description_present_flag = pbu->frame.frame_header.color_description_present_flag;
+    frame_info->capture_time_distance_ignored = 1;
+
+    frame_info->profile_idc = pbu->frame.frame_header.frame_info.profile_idc;
+    frame_info->level_idc = pbu->frame.frame_header.frame_info.level_idc;
+    frame_info->band_idc = pbu->frame.frame_header.frame_info.band_idc;
+
+    frame_info->frame_width = pbu->frame.frame_header.frame_info.frame_width;
+    frame_info->frame_height = pbu->frame.frame_header.frame_info.frame_height;
+
+    frame_info->chroma_format_idc = pbu->frame.frame_header.frame_info.chroma_format_idc;
+    frame_info->bit_depth_minus8 = pbu->frame.frame_header.frame_info.bit_depth_minus8;
+
+    frame_info->capture_time_distance = pbu->frame.frame_header.frame_info.capture_time_distance;
+
+
+    if(frame_info->color_description_present_flag) {
+        frame_info->color_primaries = pbu->frame.frame_header.color_primaries;
+        frame_info->transfer_characteristics = pbu->frame.frame_header.transfer_characteristics;
+        frame_info->matrix_coefficients = pbu->frame.frame_header.matrix_coefficients;
+
+        frame_info->full_range_flag = pbu->frame.frame_header.full_range_flag;
+        frame_info->reserved_zero_7bits = 0;
+    }
+
+    return 0;
+}
+
+int ff_isom_create_apv_dconf_record(uint8_t **data, int *size) {
+    *size = sizeof(APVDecoderConfigurationRecord);
+    *data = (uint8_t*)av_malloc(sizeof(APVDecoderConfigurationRecord));
+    if(*data==NULL) {
+        *size = 0;
+         return AVERROR_INVALIDDATA;
+    }
+    apvc_init((APVDecoderConfigurationRecord*)*data);
+    return 0;
+}
+
+void ff_isom_free_apv_dconf_record(uint8_t **data) {
+    if (data != NULL && *data != NULL) {
+        APVDecoderConfigurationRecord* apvc = (APVDecoderConfigurationRecord*)*data;
+        apvc_close(apvc);
+
+        free(*data);
+        *data = NULL;
+    }
+}
+
+int ff_isom_fill_apv_dconf_record(const uint8_t *apvdcr, const uint8_t *data, int size) {
+
+    uint32_t au_size = 0;
+    uint32_t number_of_pbu_entry = 0;
+
+    uint32_t frame_type = -1;
+    APVDecoderFrameInfo frame_info;
+
+    int bytes_to_read = size;
+    int ret = 0;
+
+    APVDecoderConfigurationRecord* apvc = (APVDecoderConfigurationRecord*)apvdcr;
+    if (size < 8) {
+        /* We can't write a valid apvC from the provided data */
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (bytes_to_read > APV_AU_SIZE_PREFIX_LENGTH) {
+        au_size = apv_read_au_size(data, APV_AU_SIZE_PREFIX_LENGTH);
+        if (au_size == 0) {
+            ret = AVERROR_INVALIDDATA;
+            goto end;
+        }
+
+        data += APV_AU_SIZE_PREFIX_LENGTH;
+        bytes_to_read -= APV_AU_SIZE_PREFIX_LENGTH;
+
+        if (bytes_to_read < au_size) {
+            ret = AVERROR_INVALIDDATA;
+            goto end;
+        }
+
+        data += APV_SIGNATURE_LENGTH;
+        bytes_to_read -= APV_SIGNATURE_LENGTH;
+
+        // pbu (primitive bitstream units number)
+        //
+        number_of_pbu_entry = apv_number_of_pbu_entry(data, bytes_to_read);
+        if (number_of_pbu_entry <= 0) {
+            ret = AVERROR_INVALIDDATA;
+            goto end;
+        }
+
+        for(int i=0;i<number_of_pbu_entry;i++) {
+
+            pbu_t pbu;
+            uint32_t pbu_size = apv_read_pbu_size(data, APV_PBU_SIZE_PREFIX_LENGTH);
+
+            data += APV_AU_SIZE_PREFIX_LENGTH;
+
+            if(!apv_read_pbu(data, pbu_size, &pbu)) {
+
+                switch (pbu.pbu_header.pbu_type)
+                {
+                case APV_PBU_TYPE_PRIMARY_FRAME:
+                    frame_type = APV_FRAME_TYPE_PRIMARY_FRAME;
+                    break;
+                case APV_PBU_TYPE_NON_PRIMARY_FRAME:
+                    frame_type = APV_FRAME_TYPE_NON_PRIMARY_FRAME;
+                    break;
+                case APV_PBU_TYPE_PREVIEW_FRAME:
+                    frame_type = APV_FRAME_TYPE_PREVIEW_FRAME;
+                    break;
+                case APV_PBU_TYPE_DEPTH_FRAME:
+                    frame_type = APV_FRAME_TYPE_DEPTH_FRAME;
+                    break;
+                case APV_PBU_TYPE_ALPHA_FRAME:
+                    frame_type = APV_FRAME_TYPE_ALPHA_FRAME;
+                    break;
+                default:
+                    frame_type = APV_FRAME_TYPE_NON_FRAME;
+                    break;
+                };
+
+                if(frame_type == APV_FRAME_TYPE_NON_FRAME) continue;
+
+                apv_set_frameinfo(&frame_info, &pbu);
+
+                if(apvc->configuration_entry[frame_type].number_of_frame_info == 0) {
+                    apv_add_frameinfo(&apvc->configuration_entry[frame_type], &frame_info);
+                    apvc->number_of_configuration_entry++;
+                } else {
+                    for(i=0; i<apvc->configuration_entry[frame_type].number_of_frame_info;i++) {
+                        if(!apv_cmp_frameinfo(apvc->configuration_entry[frame_type].frame_info[i], &frame_info)) {
+                            apv_add_frameinfo(&apvc->configuration_entry[i], &frame_info);
+                            break;
+                        }
+                    }
+                }   
+            }
+            data += pbu_size;
+        }
+    }
+
+end:    
+    return ret;
+}
diff --git a/libavformat/apv.h b/libavformat/apv.h
new file mode 100644
index 0000000000..16252a66b5
--- /dev/null
+++ b/libavformat/apv.h
@@ -0,0 +1,94 @@
+/*
+ * APV helper functions for muxers
+ * Copyright (c) 2025 Dawid Kozinski <d.kozinski@samsung.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFORMAT_APV_H
+#define AVFORMAT_APV_H
+
+#include <stdint.h>
+
+#include "libavutil/intreadwrite.h"
+#include "libavutil/rational.h"
+#include "libavcodec/apv.h"
+#include "avio.h"
+
+static inline uint32_t apv_read_frame_data_size(const uint8_t *bits, int bits_size)
+{
+    if (bits_size >= APV_FRAME_DATA_SIZE_PREFIX_LENGTH)
+        return AV_RB32(bits);
+
+    return 0;
+}
+
+static inline uint32_t apv_read_au_size(const uint8_t *bits, int bits_size)
+{
+    if (bits_size >= APV_AU_SIZE_PREFIX_LENGTH)
+        return AV_RB32(bits);
+
+    return 0;
+}
+
+/**
+ * Writes APV sample metadata to the provided AVIOContext.
+ *
+ * @param pb pointer to the AVIOContext where the apv sample metadata shall be written
+ * @param buf input data buffer
+ * @param size size in bytes of the input data buffer
+ * @param ps_array_completeness
+ *
+ * @return 0 in case of success, a negative error code in case of failure
+ */
+int ff_isom_write_apvc(AVIOContext *pb, const uint8_t *data,
+                       int size, int ps_array_completeness);
+
+/**
+ * @brief Creates and allocates memory for an APV decoder configuration record.
+ *
+ * This function allocates memory for an APVDecoderConfigurationRecord and
+ * initializes it. The size of the record is returned through the `size` parameter.
+ *
+ * @param data Pointer to a pointer where the allocated data will be stored.
+ * @param size Pointer to an integer where the size of the allocated record will be stored.
+ * @return 0 on success, or AVERROR_INVALIDDATA if memory allocation fails.
+ */
+int ff_isom_create_apv_dconf_record(uint8_t **data, int *size);
+
+/**
+ * @brief Frees the memory allocated for the APV decoder configuration record.
+ * 
+ * @param data data to be freed
+ */
+void ff_isom_free_apv_dconf_record(uint8_t **data);
+
+/**
+ * @brief Fills an APV decoder configuration record with data.
+ *
+ * This function populates the APVDecoderConfigurationRecord pointed to by
+ * `apvdcr` with the data from `data`, which has a specified size. The data
+ * represents an access unit.
+ *
+ * @param apvdcr Pointer to the APVDecoderConfigurationRecord to be filled.
+ * @param data Pointer to the data to fill the record with.
+ * @param size Size of the data to be copied into the record.
+ * @return 0 on success, or a negative value on error.
+ */
+int ff_isom_fill_apv_dconf_record(const uint8_t *apvc, const uint8_t *data, int size);
+
+#endif // AVFORMAT_APV_H
diff --git a/libavformat/isom_tags.c b/libavformat/isom_tags.c
index f05762beec..6a806e09da 100644
--- a/libavformat/isom_tags.c
+++ b/libavformat/isom_tags.c
@@ -156,6 +156,8 @@ const AVCodecTag ff_codec_movvideo_tags[] = {
     { AV_CODEC_ID_H264, MKTAG('d', 'v', 'a', 'v') }, /* AVC-based Dolby Vision derived from avc3 */
 
     { AV_CODEC_ID_EVC,  MKTAG('e', 'v', 'c', '1') }, /* EVC/MPEG-5 */
+    { AV_CODEC_ID_APV,  MKTAG('a', 'p', 'v', '1') }, /* APV */
+
 
     { AV_CODEC_ID_VP8,  MKTAG('v', 'p', '0', '8') }, /* VP8 */
     { AV_CODEC_ID_VP9,  MKTAG('v', 'p', '0', '9') }, /* VP9 */
diff --git a/libavformat/movenc.c b/libavformat/movenc.c
index 4bc8bd1b2a..91d7dc23a8 100644
--- a/libavformat/movenc.c
+++ b/libavformat/movenc.c
@@ -37,6 +37,7 @@
 #include "av1.h"
 #include "avc.h"
 #include "evc.h"
+#include "apv.h"
 #include "libavcodec/ac3_parser_internal.h"
 #include "libavcodec/dnxhddata.h"
 #include "libavcodec/flac.h"
@@ -1647,6 +1648,24 @@ static int mov_write_vvcc_tag(AVIOContext *pb, MOVTrack *track)
     return update_size(pb, pos);
 }
 
+static int mov_write_apvc_tag(AVIOContext *pb, MOVTrack *track)
+{
+    int64_t pos = avio_tell(pb);
+
+    avio_wb32(pb, 0);
+    ffio_wfourcc(pb, "apvC");
+
+    avio_w8  (pb, 0); /* version */
+    avio_wb24(pb, 0); /* flags */
+    
+    if (track->tag == MKTAG('a','p','v','1'))
+        ff_isom_write_apvc(pb, track->vos_data, track->vos_len, 1);
+    else
+        ff_isom_write_apvc(pb, track->vos_data, track->vos_len, 0);
+
+    return update_size(pb, pos);
+}
+
 /* also used by all avid codecs (dv, imx, meridien) and their variants */
 static int mov_write_avid_tag(AVIOContext *pb, MOVTrack *track)
 {
@@ -1906,6 +1925,17 @@ static int mov_get_evc_codec_tag(AVFormatContext *s, MOVTrack *track)
     return tag;
 }
 
+static int mov_get_apv_codec_tag(AVFormatContext *s, MOVTrack *track)
+{
+    int tag = track->par->codec_tag;
+
+    if (!tag)
+        tag = MKTAG('a', 'p', 'v', '1');
+
+    return tag;
+}
+
+
 static const struct {
     enum AVPixelFormat pix_fmt;
     uint32_t tag;
@@ -1992,6 +2022,8 @@ static unsigned int mov_get_codec_tag(AVFormatContext *s, MOVTrack *track)
             tag = mov_get_h264_codec_tag(s, track);
         else if (track->par->codec_id == AV_CODEC_ID_EVC)
             tag = mov_get_evc_codec_tag(s, track);
+        else if (track->par->codec_id == AV_CODEC_ID_APV)
+            tag = mov_get_apv_codec_tag(s, track);
         else if (track->par->codec_id == AV_CODEC_ID_DNXHD)
             tag = mov_get_dnxhd_codec_tag(s, track);
         else if (track->par->codec_type == AVMEDIA_TYPE_VIDEO) {
@@ -2757,6 +2789,8 @@ static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex
     }
     else if (track->par->codec_id ==AV_CODEC_ID_EVC) {
         mov_write_evcc_tag(pb, track);
+    } else if (track->par->codec_id ==AV_CODEC_ID_APV) {
+        mov_write_apvc_tag(pb, track);
     } else if (track->par->codec_id == AV_CODEC_ID_VP9) {
         mov_write_vpcc_tag(mov->fc, pb, track);
     } else if (track->par->codec_id == AV_CODEC_ID_AV1) {
@@ -6713,6 +6747,18 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
         memset(trk->vos_data + size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
     }
 
+    if (par->codec_id == AV_CODEC_ID_APV && !trk->vos_len) {
+            ret = ff_isom_create_apv_dconf_record(&trk->vos_data, &trk->vos_len);
+            if (!trk->vos_data) {
+                ret = AVERROR(ENOMEM);
+                goto err;
+            }
+    }
+    
+    if (par->codec_id == AV_CODEC_ID_APV && trk->vos_len) {
+        ret = ff_isom_fill_apv_dconf_record(trk->vos_data, pkt->data, size);
+    }
+
     if (par->codec_id == AV_CODEC_ID_AAC && pkt->size > 2 &&
         (AV_RB16(pkt->data) & 0xfff0) == 0xfff0) {
         if (!trk->st->nb_frames) {
@@ -8651,6 +8697,7 @@ static const AVCodecTag codec_mp4_tags[] = {
     { AV_CODEC_ID_VVC,             MKTAG('v', 'v', 'c', '1') },
     { AV_CODEC_ID_VVC,             MKTAG('v', 'v', 'i', '1') },
     { AV_CODEC_ID_EVC,             MKTAG('e', 'v', 'c', '1') },
+    { AV_CODEC_ID_APV,             MKTAG('a', 'p', 'v', '1') },
     { AV_CODEC_ID_MPEG2VIDEO,      MKTAG('m', 'p', '4', 'v') },
     { AV_CODEC_ID_MPEG1VIDEO,      MKTAG('m', 'p', '4', 'v') },
     { AV_CODEC_ID_MJPEG,           MKTAG('m', 'p', '4', 'v') },
-- 
2.34.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [FFmpeg-devel] [PATCH v1 5/8] avformat/mov_muxer: Extended MOV muxer to handle APV video content
  2025-04-23 14:13 ` [FFmpeg-devel] [PATCH v1 5/8] avformat/mov_muxer: Extended MOV muxer to handle APV video content Dawid Kozinski
@ 2025-04-23 14:43   ` James Almer
  2025-04-24  8:59     ` Dawid Kozinski/Multimedia (PLT) /SRPOL/Staff Engineer/Samsung Electronics
  2025-04-23 21:08   ` Mark Thompson
  1 sibling, 1 reply; 7+ messages in thread
From: James Almer @ 2025-04-23 14:43 UTC (permalink / raw)
  To: ffmpeg-devel


[-- Attachment #1.1.1: Type: text/plain, Size: 1801 bytes --]

On 4/23/2025 11:13 AM, Dawid Kozinski wrote:
> @@ -2757,6 +2789,8 @@ static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex
>       }
>       else if (track->par->codec_id ==AV_CODEC_ID_EVC) {
>           mov_write_evcc_tag(pb, track);
> +    } else if (track->par->codec_id ==AV_CODEC_ID_APV) {
> +        mov_write_apvc_tag(pb, track);
>       } else if (track->par->codec_id == AV_CODEC_ID_VP9) {
>           mov_write_vpcc_tag(mov->fc, pb, track);
>       } else if (track->par->codec_id == AV_CODEC_ID_AV1) {
> @@ -6713,6 +6747,18 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
>           memset(trk->vos_data + size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
>       }
>   
> +    if (par->codec_id == AV_CODEC_ID_APV && !trk->vos_len) {
> +            ret = ff_isom_create_apv_dconf_record(&trk->vos_data, &trk->vos_len);
> +            if (!trk->vos_data) {
> +                ret = AVERROR(ENOMEM);
> +                goto err;
> +            }
> +    }
> +
> +    if (par->codec_id == AV_CODEC_ID_APV && trk->vos_len) {
> +        ret = ff_isom_fill_apv_dconf_record(trk->vos_data, pkt->data, size);
> +    }
> +
>       if (par->codec_id == AV_CODEC_ID_AAC && pkt->size > 2 &&
>           (AV_RB16(pkt->data) & 0xfff0) == 0xfff0) {
>           if (!trk->st->nb_frames) {

Instead of this, add APV to the list in 
https://git.videolan.org/?p=ffmpeg.git;a=blob;f=libavformat/movenc.c;h=4bc8bd1b2ab765c2b9f5f5dfc2dcb77361f2b944;hb=HEAD#l6697 
so the first packet is always copied to trk->vos_data in case 
par->extradata is not set.

After that, ff_isom_write_apvc() can either write the extradata as is if 
it's already a configuration record, or generate it if it's just a 
packet of PBUs (See ff_isom_write_hvcc()).


[-- Attachment #1.2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 495 bytes --]

[-- Attachment #2: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [FFmpeg-devel] [PATCH v1 5/8] avformat/mov_muxer: Extended MOV muxer to handle APV video content
  2025-04-23 14:13 ` [FFmpeg-devel] [PATCH v1 5/8] avformat/mov_muxer: Extended MOV muxer to handle APV video content Dawid Kozinski
  2025-04-23 14:43   ` James Almer
@ 2025-04-23 21:08   ` Mark Thompson
  2025-04-24 12:08     ` Dawid Kozinski/Multimedia (PLT) /SRPOL/Staff Engineer/Samsung Electronics
  1 sibling, 1 reply; 7+ messages in thread
From: Mark Thompson @ 2025-04-23 21:08 UTC (permalink / raw)
  To: ffmpeg-devel

On 23/04/2025 15:13, Dawid Kozinski wrote:
> - Changes in mov_write_video_tag function to handle APV elementary stream
> - Provided structure APVDecoderConfigurationRecord that specifies the decoder configuration information for APV video content
> 
> Signed-off-by: Dawid Kozinski <d.kozinski@samsung.com>
> ---
>  libavformat/Makefile    |   2 +-
>  libavformat/apv.c       | 827 ++++++++++++++++++++++++++++++++++++++++
>  libavformat/apv.h       |  94 +++++
>  libavformat/isom_tags.c |   2 +
>  libavformat/movenc.c    |  47 +++
>  5 files changed, 971 insertions(+), 1 deletion(-)

Hi,

Two thoughts here:

First, your AVPackets contain a raw_bitstream_access_unit().  I don't think this is the right approach - the packets should contain the codec data only, not the additional encapsulation.  (This is the method I followed.)

For this patch in particular, I think it results in writing the files incorrectly: the specification says "each sample shall contain one and only one access unit of APV coded data", which I interpret to mean one access_unit() syntax structure.

This also results in the size effectively appearing multiple times in the file for no good reason:

00000020  66 72 65 65 00 01 15 db  6d 64 61 74 00 01 15 cf  |free....mdat....|

                      ^ mdat size              ^ au_size

00000030  61 50 76 31 00 01 15 c7  01 00 01 00 21 21 40 00  |aPv1........!!@.|
          ^ signature ^ pbu_size   ^ pbu_type followed by header

The separate pbu_size makes sense if there is also metadata, but having the mdat box with a size immediately followed by the same size (well, minus twelve for mdat size + mdat + au size) again inside the box does not seem helpful.

Second, I think we need a consistent decision on what the extradata should be doing.  The APVDecoderConfigurationRecord makes sense as a thing for it to contain, but it's not clear to me that it needs to exist at all as it has no effect on anything inside ffmpeg (a decoder will always ignore it).

You currently make extradata from one of your demuxers but not other one or the encoder, and nothing requires it when consuming.  Why is it useful to have ever?

Thanks,

- Mark

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [FFmpeg-devel] [PATCH v1 5/8] avformat/mov_muxer: Extended MOV muxer to handle APV video content
  2025-04-23 14:43   ` James Almer
@ 2025-04-24  8:59     ` Dawid Kozinski/Multimedia (PLT) /SRPOL/Staff Engineer/Samsung Electronics
  0 siblings, 0 replies; 7+ messages in thread
From: Dawid Kozinski/Multimedia (PLT) /SRPOL/Staff Engineer/Samsung Electronics @ 2025-04-24  8:59 UTC (permalink / raw)
  To: 'FFmpeg development discussions and patches'




> -----Original Message-----
> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of James
> Almer
> Sent: środa, 23 kwietnia 2025 16:44
> To: ffmpeg-devel@ffmpeg.org
> Subject: Re: [FFmpeg-devel] [PATCH v1 5/8] avformat/mov_muxer: Extended
> MOV muxer to handle APV video content
> 
> On 4/23/2025 11:13 AM, Dawid Kozinski wrote:
> > @@ -2757,6 +2789,8 @@ static int mov_write_video_tag(AVFormatContext
> *s, AVIOContext *pb, MOVMuxContex
> >       }
> >       else if (track->par->codec_id ==AV_CODEC_ID_EVC) {
> >           mov_write_evcc_tag(pb, track);
> > +    } else if (track->par->codec_id ==AV_CODEC_ID_APV) {
> > +        mov_write_apvc_tag(pb, track);
> >       } else if (track->par->codec_id == AV_CODEC_ID_VP9) {
> >           mov_write_vpcc_tag(mov->fc, pb, track);
> >       } else if (track->par->codec_id == AV_CODEC_ID_AV1) { @@ -6713,6
> > +6747,18 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
> >           memset(trk->vos_data + size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
> >       }
> >
> > +    if (par->codec_id == AV_CODEC_ID_APV && !trk->vos_len) {
> > +            ret = ff_isom_create_apv_dconf_record(&trk->vos_data, &trk-
> >vos_len);
> > +            if (!trk->vos_data) {
> > +                ret = AVERROR(ENOMEM);
> > +                goto err;
> > +            }
> > +    }
> > +
> > +    if (par->codec_id == AV_CODEC_ID_APV && trk->vos_len) {
> > +        ret = ff_isom_fill_apv_dconf_record(trk->vos_data, pkt->data, size);
> > +    }
> > +
> >       if (par->codec_id == AV_CODEC_ID_AAC && pkt->size > 2 &&
> >           (AV_RB16(pkt->data) & 0xfff0) == 0xfff0) {
> >           if (!trk->st->nb_frames) {
> 
> Instead of this, add APV to the list in
> https://git.videolan.org/?p=ffmpeg.git;a=blob;f=libavformat/movenc.c;h=4bc8b
> d1b2ab765c2b9f5f5dfc2dcb77361f2b944;hb=HEAD#l6697
> so the first packet is always copied to trk->vos_data in case
> par->extradata is not set.
> 
> After that, ff_isom_write_apvc() can either write the extradata as is if it's
> already a configuration record, or generate it if it's just a packet of PBUs (See
> ff_isom_write_hvcc()).

Previously, I had this implemented as follows:

if ((par->codec_id == AV_CODEC_ID_DNXHD ||
     par->codec_id == AV_CODEC_ID_H264 ||
     par->codec_id == AV_CODEC_ID_HEVC || 
     par->codec_id == AV_CODEC_ID_VVC ||
     par->codec_id == AV_CODEC_ID_VP9 ||
     par->codec_id == AV_CODEC_ID_EVC ||
     par->codec_id == AV_CODEC_ID_APV ||
     par->codec_id == AV_CODEC_ID_TRUEHD) && !trk->vos_len &&

AV_CODEC_ID_APV was included in the list at
https://git.videolan.org/?p=ffmpeg.git;a=blob;f=libavformat/movenc.c;h=4bc8bd1b2ab765c2b9f5f5dfc2dcb77361f2b944;hb=HEAD#l6697.

It was exactly as you described, which caused the first packet to be copied to trk->vos_data when par->extradata was not set.

However, I had to change this because I was unable to fill the APVDecoderConfigurationRecord structure in the ff_isom_write_apvc() function based on the data from trk->vos_data (data from a single packet - data only from the first AU). The structure is defined as follows:
https://github.com/AcademySoftwareFoundation/openapv/blob/main/readme/apv_isobmff.md.

The structure contains the fields number_of_configuration_entry and number_of_frame_info[i]. It seems to me that filling in correctly the APVDecoderConfigurationRecord structure requires analyzing the entire stream.

number_of_configuration_entry - this value corresponds to the number of different PBU types in all AUs (PBU types for frames: 1 - primary frame; 2 - non-primary frame; 25 - preview frame; 26 - depth frame; 27 - alpha frame).
number_of_frame_info[i] - indicates the number of variations of the frame header information; for each configuration, there can be one or more variations of frame_info.

Examples:
--------------
Case 1:
======
If we have a stream containing only primary frames and if each primary frame has the same data in frame_info, then we have:

number_of_configuration_entry = 1
number_of_frame_info[0] = 1 (if all primary frames have the same frame_info)

Case 2:
======
If we have a stream containing both primary frames and non-primary frames, and if each primary and non-primary frame has the same data in frame_info, then we have:

number_of_configuration_entry = 2
number_of_frame_info[0] = 1 (if all primary frames have the same frame_info)
number_of_frame_info[1] = 1 (if all non-primary frames (type: 2) have the same frame_info)
Case 3:
======

number_of_configuration_entry = 1
number_of_frame_info[0] = 2 (if primary frames have 2 kinds of frame_info)

To correctly fill in number_of_configuration_entry and number_of_frame_info[i], we need to check all AUs one by one. The number_of_configuration_entry will equal the number of PBU types in the stream (only those that contain frame_info, such as 1, 2, 25, 26, 27). Therefore, if the stream contains PBU types 1 and 2, then number_of_configuration_entry = 2. For each of these types, there can be different number_of_frame_info. If in the stream we have a certain number of primary frame PBUs with Full HD resolution, followed by a certain number of primary frame PBUs with UHD resolution, then for the configuration related to the primary frame, we will have number_of_frame_info[i] = 2.

Please correct me if I'm wrong.


_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [FFmpeg-devel] [PATCH v1 5/8] avformat/mov_muxer: Extended MOV muxer to handle APV video content
  2025-04-23 21:08   ` Mark Thompson
@ 2025-04-24 12:08     ` Dawid Kozinski/Multimedia (PLT) /SRPOL/Staff Engineer/Samsung Electronics
  2025-04-24 19:02       ` Mark Thompson
  0 siblings, 1 reply; 7+ messages in thread
From: Dawid Kozinski/Multimedia (PLT) /SRPOL/Staff Engineer/Samsung Electronics @ 2025-04-24 12:08 UTC (permalink / raw)
  To: 'FFmpeg development discussions and patches'





> -----Original Message-----
> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of Mark
> Thompson
> Sent: środa, 23 kwietnia 2025 23:08
> To: ffmpeg-devel@ffmpeg.org
> Subject: Re: [FFmpeg-devel] [PATCH v1 5/8] avformat/mov_muxer: Extended
> MOV muxer to handle APV video content
> 
> On 23/04/2025 15:13, Dawid Kozinski wrote:
> > - Changes in mov_write_video_tag function to handle APV elementary
> > stream
> > - Provided structure APVDecoderConfigurationRecord that specifies the
> > decoder configuration information for APV video content
> >
> > Signed-off-by: Dawid Kozinski <d.kozinski@samsung.com>
> > ---
> >  libavformat/Makefile    |   2 +-
> >  libavformat/apv.c       | 827 ++++++++++++++++++++++++++++++++++++++++
> >  libavformat/apv.h       |  94 +++++
> >  libavformat/isom_tags.c |   2 +
> >  libavformat/movenc.c    |  47 +++
> >  5 files changed, 971 insertions(+), 1 deletion(-)
> 
> Hi,
> 
> Two thoughts here:
> 
> First, your AVPackets contain a raw_bitstream_access_unit().  I don't
think this is
> the right approach - the packets should contain the codec data only, not
the
> additional encapsulation.  (This is the method I followed.)
> 
> For this patch in particular, I think it results in writing the files
incorrectly: the
> specification says "each sample shall contain one and only one access unit
of
> APV coded data", which I interpret to mean one access_unit() syntax
structure.
> 
> This also results in the size effectively appearing multiple times in the
file for no
> good reason:
> 
> 00000020  66 72 65 65 00 01 15 db  6d 64 61 74 00 01 15 cf
|free....mdat....|
> 
>                       ^ mdat size              ^ au_size
> 
> 00000030  61 50 76 31 00 01 15 c7  01 00 01 00 21 21 40 00
|aPv1........!!@.|
>           ^ signature ^ pbu_size   ^ pbu_type followed by header
> 
> The separate pbu_size makes sense if there is also metadata, but having
the
> mdat box with a size immediately followed by the same size (well, minus
twelve
> for mdat size + mdat + au size) again inside the box does not seem
helpful.
> 

Hi Mark,

Indeed. AVPackets contains raw_bitstream_access_unit()

raw_bitstream_access_unit() {                                  
    au_size                                                   | u(32)
    access_unit(au_size)                                      |
}         

Such data comes out from the APV encoder.
Data from the encoder can go to the movmuxer, but it can also go to another
muxer like apvmuxer.

const FFOutputFormat ff_apv_muxer = {
    .p.name            = "apv",
    .p.long_name       = NULL_IF_CONFIG_SMALL("raw APV video"),
    .p.extensions      = "apv",
    .p.audio_codec     = AV_CODEC_ID_NONE,
    .p.video_codec     = AV_CODEC_ID_APV,
    .flags_internal    = FF_OFMT_FLAG_MAX_ONE_OF_EACH |
                         FF_OFMT_FLAG_ONLY_DEFAULT_CODECS,
    .write_packet      = ff_raw_write_packet,
    .p.flags           = AVFMT_NOTIMESTAMPS,
};

Thanks to the fact that the APV muxer receives such data packed in AVPacket,
I was able to use the default function ff_raw_write_packet() from rawenc.c
as FFOutputFormat::write_packet for ff_apv_muxer.

The APV muxer uses ff_raw_write_packet, which takes the data that came from
the encoder packed in AVPacket and writes the data to a file.

In the case of saving the elementary APV stream to a file, the data should
have the format [au_size (4Bytes)][access_unit][au_size
(4Bytes)][access_unit]...[au_size(4Bytes)][access_unit].

Regarding APV in movmuxer, we can eliminate the additional AU size in two
ways:

1.
We can change the output data format from the encoder so that the AVPacket
contains access_unit() without the 4-byte prefix containing information
about the length of the AU. However, this will require using a custom
.write_packet function in ff_apv_muxer that will add a prefix specifying the
size of the AU in the output stream before each access unit.


2. 
We will make changes in the mov muxer (movenc.c). We will not write the data
using the call avio_write(pb, pkt->data, size); but will implement custom
handling for APV (an additional if() in ff_mov_write_packet()):

if (par->codec_id == AV_CODEC_ID_APV) {
    avio_write(pb, pkt->data + 4, size - 4);
}

Please share your opinion.

BR
Dawid

> Second, I think we need a consistent decision on what the extradata should
be
> doing.  The APVDecoderConfigurationRecord makes sense as a thing for it to
> contain, but it's not clear to me that it needs to exist at all as it has
no effect on
> anything inside ffmpeg (a decoder will always ignore it).
> 
> You currently make extradata from one of your demuxers but not other one
or
> the encoder, and nothing requires it when consuming.  Why is it useful to
have
> ever?
> 
> Thanks,
> 
> - Mark
> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://protect2.fireeye.com/v1/url?k=fe563622-a1cdf846-fe57bd6d-
> 000babda0201-8b2ed12312438c45&q=1&e=8a63da42-a031-4561-b197-
> 0dcaaf458058&u=https%3A%2F%2Fffmpeg.org%2Fmailman%2Flistinfo%2Fffmp
> eg-devel
> 
> To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org
> with subject "unsubscribe".


_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [FFmpeg-devel] [PATCH v1 5/8] avformat/mov_muxer: Extended MOV muxer to handle APV video content
  2025-04-24 12:08     ` Dawid Kozinski/Multimedia (PLT) /SRPOL/Staff Engineer/Samsung Electronics
@ 2025-04-24 19:02       ` Mark Thompson
  2025-04-25  6:54         ` Dawid Kozinski/Multimedia (PLT) /SRPOL/Staff Engineer/Samsung Electronics
  0 siblings, 1 reply; 7+ messages in thread
From: Mark Thompson @ 2025-04-24 19:02 UTC (permalink / raw)
  To: ffmpeg-devel

On 24/04/2025 13:08, Dawid Kozinski/Multimedia (PLT) /SRPOL/Staff Engineer/Samsung Electronics wrote:
>> -----Original Message-----
>> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of Mark
>> Thompson
>> Sent: środa, 23 kwietnia 2025 23:08
>> To: ffmpeg-devel@ffmpeg.org
>> Subject: Re: [FFmpeg-devel] [PATCH v1 5/8] avformat/mov_muxer: Extended
>> MOV muxer to handle APV video content
>>
>> On 23/04/2025 15:13, Dawid Kozinski wrote:
>>> - Changes in mov_write_video_tag function to handle APV elementary
>>> stream
>>> - Provided structure APVDecoderConfigurationRecord that specifies the
>>> decoder configuration information for APV video content
>>>
>>> Signed-off-by: Dawid Kozinski <d.kozinski@samsung.com>
>>> ---
>>>  libavformat/Makefile    |   2 +-
>>>  libavformat/apv.c       | 827 ++++++++++++++++++++++++++++++++++++++++
>>>  libavformat/apv.h       |  94 +++++
>>>  libavformat/isom_tags.c |   2 +
>>>  libavformat/movenc.c    |  47 +++
>>>  5 files changed, 971 insertions(+), 1 deletion(-)
>>
>> Hi,
>>
>> Two thoughts here:
>>
>> First, your AVPackets contain a raw_bitstream_access_unit().  I don't
> think this is
>> the right approach - the packets should contain the codec data only, not
> the
>> additional encapsulation.  (This is the method I followed.)
>>
>> For this patch in particular, I think it results in writing the files
> incorrectly: the
>> specification says "each sample shall contain one and only one access unit
> of
>> APV coded data", which I interpret to mean one access_unit() syntax
> structure.
>>
>> This also results in the size effectively appearing multiple times in the
> file for no
>> good reason:
>>
>> 00000020  66 72 65 65 00 01 15 db  6d 64 61 74 00 01 15 cf
> |free....mdat....|
>>
>>                       ^ mdat size              ^ au_size
>>
>> 00000030  61 50 76 31 00 01 15 c7  01 00 01 00 21 21 40 00
> |aPv1........!!@.|
>>           ^ signature ^ pbu_size   ^ pbu_type followed by header
>>
>> The separate pbu_size makes sense if there is also metadata, but having
> the
>> mdat box with a size immediately followed by the same size (well, minus
> twelve
>> for mdat size + mdat + au size) again inside the box does not seem
> helpful.
>>
> 
> Hi Mark,
> 
> Indeed. AVPackets contains raw_bitstream_access_unit()
> 
> raw_bitstream_access_unit() {                                  
>     au_size                                                   | u(32)
>     access_unit(au_size)                                      |
> }         
> 
> Such data comes out from the APV encoder.
> Data from the encoder can go to the movmuxer, but it can also go to another
> muxer like apvmuxer.
> 
> const FFOutputFormat ff_apv_muxer = {
>     .p.name            = "apv",
>     .p.long_name       = NULL_IF_CONFIG_SMALL("raw APV video"),
>     .p.extensions      = "apv",
>     .p.audio_codec     = AV_CODEC_ID_NONE,
>     .p.video_codec     = AV_CODEC_ID_APV,
>     .flags_internal    = FF_OFMT_FLAG_MAX_ONE_OF_EACH |
>                          FF_OFMT_FLAG_ONLY_DEFAULT_CODECS,
>     .write_packet      = ff_raw_write_packet,
>     .p.flags           = AVFMT_NOTIMESTAMPS,
> };
> 
> Thanks to the fact that the APV muxer receives such data packed in AVPacket,
> I was able to use the default function ff_raw_write_packet() from rawenc.c
> as FFOutputFormat::write_packet for ff_apv_muxer.
> 
> The APV muxer uses ff_raw_write_packet, which takes the data that came from
> the encoder packed in AVPacket and writes the data to a file.
> 
> In the case of saving the elementary APV stream to a file, the data should
> have the format [au_size (4Bytes)][access_unit][au_size
> (4Bytes)][access_unit]...[au_size(4Bytes)][access_unit].
> 
> Regarding APV in movmuxer, we can eliminate the additional AU size in two
> ways:
> 
> 1.
> We can change the output data format from the encoder so that the AVPacket
> contains access_unit() without the 4-byte prefix containing information
> about the length of the AU. However, this will require using a custom
> .write_packet function in ff_apv_muxer that will add a prefix specifying the
> size of the AU in the output stream before each access unit.
> 
> 
> 2. 
> We will make changes in the mov muxer (movenc.c). We will not write the data
> using the call avio_write(pb, pkt->data, size); but will implement custom
> handling for APV (an additional if() in ff_mov_write_packet()):
> 
> if (par->codec_id == AV_CODEC_ID_APV) {
>     avio_write(pb, pkt->data + 4, size - 4);
> }
> 
> Please share your opinion.

I don't think the implementation details which you mention are relevant to the questions I am asking.


The primary point here is that the spec at <https://github.com/AcademySoftwareFoundation/openapv/blob/main/readme/apv_isobmff.md> says:

  "each sample shall contain one and only one access unit of APV coded data"

This states that it contains an "access unit", not a "raw bitstream access unit" as in your implementation.

Since your implementation and the spec disagree, can you comment on the status / intended finality of each of them?  (Is there another implementation of this which you are testing against, or is this the first one?)

To me the spec definition appears to be the more sensible one, since the raw bitstream format functions as a container and it does not make sense to nest containers.


Separately to that queston, there is the point that inside ffmpeg we may make our own choice about what an AVPacket should contain (and then use it consistently).

In this case my opinion is that having an AVPacket contain an access unit (the top-level syntax structure defined in the "syntax and semantics" section of the specification) is the choice which makes most sense, and it is also the one which I have followed consistently in my patch series.

>> Second, I think we need a consistent decision on what the extradata should
> be
>> doing.  The APVDecoderConfigurationRecord makes sense as a thing for it to
>> contain, but it's not clear to me that it needs to exist at all as it has
> no effect on
>> anything inside ffmpeg (a decoder will always ignore it).
>>
>> You currently make extradata from one of your demuxers but not other one
> or
>> the encoder, and nothing requires it when consuming.  Why is it useful to
> have
>> ever?

Your comment on this question would also be appreciated.

Thanks,

- Mark

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [FFmpeg-devel] [PATCH v1 5/8] avformat/mov_muxer: Extended MOV muxer to handle APV video content
  2025-04-24 19:02       ` Mark Thompson
@ 2025-04-25  6:54         ` Dawid Kozinski/Multimedia (PLT) /SRPOL/Staff Engineer/Samsung Electronics
  0 siblings, 0 replies; 7+ messages in thread
From: Dawid Kozinski/Multimedia (PLT) /SRPOL/Staff Engineer/Samsung Electronics @ 2025-04-25  6:54 UTC (permalink / raw)
  To: 'FFmpeg development discussions and patches'




> -----Original Message-----
> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of Mark
> Thompson
> Sent: czwartek, 24 kwietnia 2025 21:02
> To: ffmpeg-devel@ffmpeg.org
> Subject: Re: [FFmpeg-devel] [PATCH v1 5/8] avformat/mov_muxer: Extended
> MOV muxer to handle APV video content
> 
> On 24/04/2025 13:08, Dawid Kozinski/Multimedia (PLT) /SRPOL/Staff
> Engineer/Samsung Electronics wrote:
> >> -----Original Message-----
> >> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of
> >> Mark Thompson
> >> Sent: środa, 23 kwietnia 2025 23:08
> >> To: ffmpeg-devel@ffmpeg.org
> >> Subject: Re: [FFmpeg-devel] [PATCH v1 5/8] avformat/mov_muxer:
> >> Extended MOV muxer to handle APV video content
> >>
> >> On 23/04/2025 15:13, Dawid Kozinski wrote:
> >>> - Changes in mov_write_video_tag function to handle APV elementary
> >>> stream
> >>> - Provided structure APVDecoderConfigurationRecord that specifies
> >>> the decoder configuration information for APV video content
> >>>
> >>> Signed-off-by: Dawid Kozinski <d.kozinski@samsung.com>
> >>> ---
> >>>  libavformat/Makefile    |   2 +-
> >>>  libavformat/apv.c       | 827
> ++++++++++++++++++++++++++++++++++++++++
> >>>  libavformat/apv.h       |  94 +++++
> >>>  libavformat/isom_tags.c |   2 +
> >>>  libavformat/movenc.c    |  47 +++
> >>>  5 files changed, 971 insertions(+), 1 deletion(-)
> >>
> >> Hi,
> >>
> >> Two thoughts here:
> >>
> >> First, your AVPackets contain a raw_bitstream_access_unit().  I don't
> > think this is
> >> the right approach - the packets should contain the codec data only,
> >> not
> > the
> >> additional encapsulation.  (This is the method I followed.)
> >>
> >> For this patch in particular, I think it results in writing the files
> > incorrectly: the
> >> specification says "each sample shall contain one and only one access
> >> unit
> > of
> >> APV coded data", which I interpret to mean one access_unit() syntax
> > structure.
> >>
> >> This also results in the size effectively appearing multiple times in
> >> the
> > file for no
> >> good reason:
> >>
> >> 00000020  66 72 65 65 00 01 15 db  6d 64 61 74 00 01 15 cf
> > |free....mdat....|
> >>
> >>                       ^ mdat size              ^ au_size
> >>
> >> 00000030  61 50 76 31 00 01 15 c7  01 00 01 00 21 21 40 00
> > |aPv1........!!@.|
> >>           ^ signature ^ pbu_size   ^ pbu_type followed by header
> >>
> >> The separate pbu_size makes sense if there is also metadata, but
> >> having
> > the
> >> mdat box with a size immediately followed by the same size (well,
> >> minus
> > twelve
> >> for mdat size + mdat + au size) again inside the box does not seem
> > helpful.
> >>
> >
> > Hi Mark,
> >
> > Indeed. AVPackets contains raw_bitstream_access_unit()
> >
> > raw_bitstream_access_unit() {
> >     au_size                                                   | u(32)
> >     access_unit(au_size)                                      |
> > }
> >
> > Such data comes out from the APV encoder.
> > Data from the encoder can go to the movmuxer, but it can also go to
> > another muxer like apvmuxer.
> >
> > const FFOutputFormat ff_apv_muxer = {
> >     .p.name            = "apv",
> >     .p.long_name       = NULL_IF_CONFIG_SMALL("raw APV video"),
> >     .p.extensions      = "apv",
> >     .p.audio_codec     = AV_CODEC_ID_NONE,
> >     .p.video_codec     = AV_CODEC_ID_APV,
> >     .flags_internal    = FF_OFMT_FLAG_MAX_ONE_OF_EACH |
> >                          FF_OFMT_FLAG_ONLY_DEFAULT_CODECS,
> >     .write_packet      = ff_raw_write_packet,
> >     .p.flags           = AVFMT_NOTIMESTAMPS,
> > };
> >
> > Thanks to the fact that the APV muxer receives such data packed in
> > AVPacket, I was able to use the default function ff_raw_write_packet()
> > from rawenc.c as FFOutputFormat::write_packet for ff_apv_muxer.
> >
> > The APV muxer uses ff_raw_write_packet, which takes the data that came
> > from the encoder packed in AVPacket and writes the data to a file.
> >
> > In the case of saving the elementary APV stream to a file, the data
> > should have the format [au_size (4Bytes)][access_unit][au_size
> > (4Bytes)][access_unit]...[au_size(4Bytes)][access_unit].
> >
> > Regarding APV in movmuxer, we can eliminate the additional AU size in
> > two
> > ways:
> >
> > 1.
> > We can change the output data format from the encoder so that the
> > AVPacket contains access_unit() without the 4-byte prefix containing
> > information about the length of the AU. However, this will require
> > using a custom .write_packet function in ff_apv_muxer that will add a
> > prefix specifying the size of the AU in the output stream before each access
> unit.
> >
> >
> > 2.
> > We will make changes in the mov muxer (movenc.c). We will not write
> > the data using the call avio_write(pb, pkt->data, size); but will
> > implement custom handling for APV (an additional if() in
> ff_mov_write_packet()):
> >
> > if (par->codec_id == AV_CODEC_ID_APV) {
> >     avio_write(pb, pkt->data + 4, size - 4); }
> >
> > Please share your opinion.
> 
> I don't think the implementation details which you mention are relevant to the
> questions I am asking.
> 
> 
> The primary point here is that the spec at
> <https://protect2.fireeye.com/v1/url?k=3154f65d-50dfe36e-31557d12-
> 000babff9bb7-62e68669a9a2e365&q=1&e=25c0300b-6820-427e-9078-
> edf55c3bc81b&u=https%3A%2F%2Fgithub.com%2FAcademySoftwareFoundatio
> n%2Fopenapv%2Fblob%2Fmain%2Freadme%2Fapv_isobmff.md> says:
> 
>   "each sample shall contain one and only one access unit of APV coded data"
> 
> This states that it contains an "access unit", not a "raw bitstream access unit" as
> in your implementation.
> 
> Since your implementation and the spec disagree, can you comment on the
> status / intended finality of each of them?  (Is there another implementation of
> this which you are testing against, or is this the first one?)
> 
> To me the spec definition appears to be the more sensible one, since the raw
> bitstream format functions as a container and it does not make sense to nest
> containers.
> 
> 
> Separately to that queston, there is the point that inside ffmpeg we may make
> our own choice about what an AVPacket should contain (and then use it
> consistently).
> 
> In this case my opinion is that having an AVPacket contain an access unit (the
> top-level syntax structure defined in the "syntax and semantics" section of the
> specification) is the choice which makes most sense, and it is also the one which
> I have followed consistently in my patch series.
> 
Mark, thank you for your quick response and for sharing your insights. I appreciate your perspective on the implementation details, and I understand your concerns regarding the alignment between the specification and the current implementation (or rather, the lack of alignment between the current implementation and the spec).

You are right; the specification clearly states that "each sample shall contain one and only one access unit of APV coded data," and I agree with you that the implementation should be consistent with the specification.

I am currently working on a new version of the implementation that takes into account compliance with the specification, in which the AVPacket will contain the access unit, but it  requires testing.

> >> Second, I think we need a consistent decision on what the extradata
> >> should
> > be
> >> doing.  The APVDecoderConfigurationRecord makes sense as a thing for
> >> it to contain, but it's not clear to me that it needs to exist at all
> >> as it has
> > no effect on
> >> anything inside ffmpeg (a decoder will always ignore it).
> >>
> >> You currently make extradata from one of your demuxers but not other
> >> one
> > or
> >> the encoder, and nothing requires it when consuming.  Why is it
> >> useful to
> > have
> >> ever?
> 
> Your comment on this question would also be appreciated.
> 
I appreciate your insights regarding extradata.

I agree that we need to establish a consistent approach to how extradata is utilized.
I understand your concerns about the necessity of including the APVDecoderConfigurationRecord as part of the extradata, especially if it has no impact on the decoding process.

The current implementation, where the APVDecoderConfigurationRecord is generated by the movmuxer but the extradata is not used by the decoder, and nothing currently requires data from the extradata, indeed raises questions about its overall utility. 
If extradata is not required for consumption and does not influence the decoding process, we should carefully consider its significance.

In my view, if we decide to retain the extradata, we should ensure that it serves a clear purpose and is consistently implemented across all components. 
If we determine that it does not provide any significant benefits, it may be worth discussing its removal.

BR
Dawid

> Thanks,
> 
> - Mark
> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://protect2.fireeye.com/v1/url?k=aab97fc0-cb326af3-aab8f48f-
> 000babff9bb7-6fcdf9423671c6a7&q=1&e=25c0300b-6820-427e-9078-
> edf55c3bc81b&u=https%3A%2F%2Fffmpeg.org%2Fmailman%2Flistinfo%2Fffmp
> eg-devel
> 
> To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org
> with subject "unsubscribe".


_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2025-04-25  6:55 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <CGME20250423141306eucas1p229fa078339a2a993c609464e101c9c6d@eucas1p2.samsung.com>
2025-04-23 14:13 ` [FFmpeg-devel] [PATCH v1 5/8] avformat/mov_muxer: Extended MOV muxer to handle APV video content Dawid Kozinski
2025-04-23 14:43   ` James Almer
2025-04-24  8:59     ` Dawid Kozinski/Multimedia (PLT) /SRPOL/Staff Engineer/Samsung Electronics
2025-04-23 21:08   ` Mark Thompson
2025-04-24 12:08     ` Dawid Kozinski/Multimedia (PLT) /SRPOL/Staff Engineer/Samsung Electronics
2025-04-24 19:02       ` Mark Thompson
2025-04-25  6:54         ` Dawid Kozinski/Multimedia (PLT) /SRPOL/Staff Engineer/Samsung Electronics

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git