From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100]) by master.gitmailbox.com (Postfix) with ESMTP id 04145481C4 for ; Sat, 10 Feb 2024 22:59:17 +0000 (UTC) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id C02BC68D0DD; Sun, 11 Feb 2024 00:59:15 +0200 (EET) Received: from mail-il1-f174.google.com (mail-il1-f174.google.com [209.85.166.174]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id EAA6B68D08A for ; Sun, 11 Feb 2024 00:59:08 +0200 (EET) Received: by mail-il1-f174.google.com with SMTP id e9e14a558f8ab-363bd37434dso6120805ab.1 for ; Sat, 10 Feb 2024 14:59:08 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20230601; t=1707605946; x=1708210746; darn=ffmpeg.org; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=ObSSJ1+hUz5Rr3o7Jw77woxv5ed+9flrA4NYhZXm93w=; b=jSZh0omIOMcxH/ewG02sshIGSjOP6Tk21lXbK92UPgnpZGVs/b/z/s9UVqqWgRZb4o zOxzfsV7S9donbDqJOQ69n4Vh312VFcP1S3lwebuuvo8Nn6iZ1fC1ro+o1/9qJRBhRSY Zc7e58c+kwqyinhKpAxOndLbmg9j47TC0BrplLptmLs71+m7WuNEDlX/hlHCRBnWJkzL 0Vp3b+oJvetUF1JpdKYZTDBDs3dNweLdny3rt/ib7RETLvZbXiyFmaPerZYRDzhNF0Sx lv7NGzCqUe7JTkTdVuIwQ9Nbgkc0VGn4m4Z/78DP4OiK8D8gsiV/l/h2H4jJYJn6R2+R AqrQ== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1707605946; x=1708210746; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=ObSSJ1+hUz5Rr3o7Jw77woxv5ed+9flrA4NYhZXm93w=; b=DMSPLkCkO9wv6MXYrLtBzJ+ahWsyu67cScWreBXJtzIG3szMAtJ9rIi0Vl4OSG/CdL 1cJM2LBrNlNhDxsNPPxVbkLU42qazJOdw0GDFg6aDyfagj/+B6HeP35yJob8tHhOxRk0 a+I52nN4IOhzKuBhZnAA5Qe8zGorRHA/uywf91/bHJmBd/SGcM2VNyJmgTcXxzVLyGsI 4vLoD9nQm2aNLUpfPnPG3UFw81BWmYvwFejluW9XUH6hD+bu6IHtLB0ML197OImINzwq WK7oZnWibIdRPehfp8OwLpKM5wr/fq9qQayJ48MEG8n0Ihi8Do2ftQVr2MIwjcLqBm9D D6qQ== X-Gm-Message-State: AOJu0YzU7/40E/HPHPwXU7XU0dEr5MGHfs5zWkhJG1EUtZIvuaJQHkpc hy2QxYZlL4kHhTT3eXswpga3mnnigcgzCTQN8Wgc/MMwkbAkHM9LNe6Si65N X-Google-Smtp-Source: AGHT+IEo2U9QnM1D+yZjYGKwbelGMGS0WJiLuQe27Nu5WHGrsEfvHm6ByHgo1pPfWwKO6TFOzFK4Fg== X-Received: by 2002:a05:6e02:1d03:b0:363:ba45:c581 with SMTP id i3-20020a056e021d0300b00363ba45c581mr4766131ila.15.1707605946285; Sat, 10 Feb 2024 14:59:06 -0800 (PST) Received: from localhost (192-184-169-201.fiber.dynamic.sonic.net. [192.184.169.201]) by smtp.gmail.com with UTF8SMTPSA id q5-20020a170902c9c500b001d88f0359c1sm3461775pld.278.2024.02.10.14.59.05 (version=TLS1_3 cipher=TLS_AES_128_GCM_SHA256 bits=128/128); Sat, 10 Feb 2024 14:59:05 -0800 (PST) From: Connor Worley To: ffmpeg-devel@ffmpeg.org Date: Sat, 10 Feb 2024 14:58:46 -0800 Message-ID: <20240210225847.53228-2-connorbworley@gmail.com> X-Mailer: git-send-email 2.43.0 In-Reply-To: <20240210225847.53228-1-connorbworley@gmail.com> References: <20240210225847.53228-1-connorbworley@gmail.com> MIME-Version: 1.0 Subject: [FFmpeg-devel] [PATCH v2 2/3] lavc/dvx: use texdsp funcs for texture block decompression X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Cc: Connor Worley Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" Archived-At: List-Archive: List-Post: Signed-off-by: Connor Worley --- libavcodec/dxv.c | 289 ++++++++++++----------------------------------- 1 file changed, 75 insertions(+), 214 deletions(-) diff --git a/libavcodec/dxv.c b/libavcodec/dxv.c index 16c34fff3b..cd78de3e0d 100644 --- a/libavcodec/dxv.c +++ b/libavcodec/dxv.c @@ -38,15 +38,12 @@ typedef struct DXVContext { GetByteContext gbc; uint8_t *tex_data; // Compressed texture - uint8_t *ctex_data; // Compressed texture + uint8_t *ctex_data; // Compressed chroma texture int tex_rat; // Compression ratio int tex_step; // Distance between blocks int ctex_step; // Distance between blocks int64_t tex_size; // Texture size - int64_t ctex_size; // Texture size - - /* Optimal number of slices for parallel decoding */ - int slice_count; + int64_t ctex_size; // Chroma texture size uint8_t *op_data[4]; // Opcodes int64_t op_size[4]; // Opcodes size @@ -56,198 +53,8 @@ typedef struct DXVContext { int ctexture_block_w; int ctexture_block_h; - - /* Pointer to the selected decompression function */ - int (*tex_funct)(uint8_t *dst, ptrdiff_t stride, const uint8_t *block); - int (*tex_funct_planar[2])(uint8_t *plane0, ptrdiff_t stride0, - uint8_t *plane1, ptrdiff_t stride1, - const uint8_t *block); } DXVContext; -static void decompress_indices(uint8_t *dst, const uint8_t *src) -{ - int block, i; - - for (block = 0; block < 2; block++) { - int tmp = AV_RL24(src); - - /* Unpack 8x3 bit from last 3 byte block */ - for (i = 0; i < 8; i++) - dst[i] = (tmp >> (i * 3)) & 0x7; - - src += 3; - dst += 8; - } -} - -static int extract_component(int yo0, int yo1, int code) -{ - int yo; - - if (yo0 == yo1) { - yo = yo0; - } else if (code == 0) { - yo = yo0; - } else if (code == 1) { - yo = yo1; - } else { - if (yo0 > yo1) { - yo = (uint8_t) (((8 - code) * yo0 + - (code - 1) * yo1) / 7); - } else { - if (code == 6) { - yo = 0; - } else if (code == 7) { - yo = 255; - } else { - yo = (uint8_t) (((6 - code) * yo0 + - (code - 1) * yo1) / 5); - } - } - } - - return yo; -} - -static int cocg_block(uint8_t *plane0, ptrdiff_t stride0, - uint8_t *plane1, ptrdiff_t stride1, - const uint8_t *block) -{ - uint8_t co_indices[16]; - uint8_t cg_indices[16]; - uint8_t co0 = *(block); - uint8_t co1 = *(block + 1); - uint8_t cg0 = *(block + 8); - uint8_t cg1 = *(block + 9); - int x, y; - - decompress_indices(co_indices, block + 2); - decompress_indices(cg_indices, block + 10); - - for (y = 0; y < 4; y++) { - for (x = 0; x < 4; x++) { - int co_code = co_indices[x + y * 4]; - int cg_code = cg_indices[x + y * 4]; - - plane0[x] = extract_component(cg0, cg1, cg_code); - plane1[x] = extract_component(co0, co1, co_code); - } - plane0 += stride0; - plane1 += stride1; - } - - return 16; -} - -static void yao_subblock(uint8_t *dst, uint8_t *yo_indices, - ptrdiff_t stride, const uint8_t *block) -{ - uint8_t yo0 = *(block); - uint8_t yo1 = *(block + 1); - int x, y; - - decompress_indices(yo_indices, block + 2); - - for (y = 0; y < 4; y++) { - for (x = 0; x < 4; x++) { - int yo_code = yo_indices[x + y * 4]; - - dst[x] = extract_component(yo0, yo1, yo_code); - } - dst += stride; - } -} - -static int yo_block(uint8_t *dst, ptrdiff_t stride, - uint8_t *unused0, ptrdiff_t unused1, - const uint8_t *block) -{ - uint8_t yo_indices[16]; - - yao_subblock(dst, yo_indices, stride, block); - yao_subblock(dst + 4, yo_indices, stride, block + 8); - yao_subblock(dst + 8, yo_indices, stride, block + 16); - yao_subblock(dst + 12, yo_indices, stride, block + 24); - - return 32; -} - -static int yao_block(uint8_t *plane0, ptrdiff_t stride0, - uint8_t *plane3, ptrdiff_t stride1, - const uint8_t *block) -{ - uint8_t yo_indices[16]; - uint8_t a_indices[16]; - - yao_subblock(plane0, yo_indices, stride0, block); - yao_subblock(plane3, a_indices, stride1, block + 8); - yao_subblock(plane0 + 4, yo_indices, stride0, block + 16); - yao_subblock(plane3 + 4, a_indices, stride1, block + 24); - yao_subblock(plane0 + 8, yo_indices, stride0, block + 32); - yao_subblock(plane3 + 8, a_indices, stride1, block + 40); - yao_subblock(plane0 + 12, yo_indices, stride0, block + 48); - yao_subblock(plane3 + 12, a_indices, stride1, block + 56); - - return 64; -} - -static int decompress_texture_thread(AVCodecContext *avctx, void *arg, - int slice, int thread_nb) -{ - const DXVContext *ctx = avctx->priv_data; - AVFrame *frame = arg; - const uint8_t *d = ctx->tex_data; - int w_block = avctx->coded_width / ctx->texture_block_w; - int h_block = avctx->coded_height / ctx->texture_block_h; - int x, y; - int start_slice, end_slice; - - start_slice = h_block * slice / ctx->slice_count; - end_slice = h_block * (slice + 1) / ctx->slice_count; - - if (ctx->tex_funct) { - for (y = start_slice; y < end_slice; y++) { - uint8_t *p = frame->data[0] + y * frame->linesize[0] * ctx->texture_block_h; - int off = y * w_block; - for (x = 0; x < w_block; x++) { - ctx->tex_funct(p + x * 4 * ctx->texture_block_w, frame->linesize[0], - d + (off + x) * ctx->tex_step); - } - } - } else { - const uint8_t *c = ctx->ctex_data; - - for (y = start_slice; y < end_slice; y++) { - uint8_t *p0 = frame->data[0] + y * frame->linesize[0] * ctx->texture_block_h; - uint8_t *p3 = ctx->tex_step != 64 ? NULL : frame->data[3] + y * frame->linesize[3] * ctx->texture_block_h; - int off = y * w_block; - for (x = 0; x < w_block; x++) { - ctx->tex_funct_planar[0](p0 + x * ctx->texture_block_w, frame->linesize[0], - p3 != NULL ? p3 + x * ctx->texture_block_w : NULL, frame->linesize[3], - d + (off + x) * ctx->tex_step); - } - } - - w_block = (avctx->coded_width / 2) / ctx->ctexture_block_w; - h_block = (avctx->coded_height / 2) / ctx->ctexture_block_h; - start_slice = h_block * slice / ctx->slice_count; - end_slice = h_block * (slice + 1) / ctx->slice_count; - - for (y = start_slice; y < end_slice; y++) { - uint8_t *p0 = frame->data[1] + y * frame->linesize[1] * ctx->ctexture_block_h; - uint8_t *p1 = frame->data[2] + y * frame->linesize[2] * ctx->ctexture_block_h; - int off = y * w_block; - for (x = 0; x < w_block; x++) { - ctx->tex_funct_planar[1](p0 + x * ctx->ctexture_block_w, frame->linesize[1], - p1 + x * ctx->ctexture_block_w, frame->linesize[2], - c + (off + x) * ctx->ctex_step); - } - } - } - - return 0; -} - /* This scheme addresses already decoded elements depending on 2-bit status: * 0 -> copy new element * 1 -> copy one element from position -x @@ -1044,6 +851,8 @@ static int dxv_decode(AVCodecContext *avctx, AVFrame *frame, { DXVContext *ctx = avctx->priv_data; GetByteContext *gbc = &ctx->gbc; + AVCodecContext cavctx = *avctx; + TextureDSPThreadContext texdsp_ctx, ctexdsp_ctx; int (*decompress_tex)(AVCodecContext *avctx); const char *msgcomp, *msgtext; uint32_t tag; @@ -1053,21 +862,22 @@ static int dxv_decode(AVCodecContext *avctx, AVFrame *frame, bytestream2_init(gbc, avpkt->data, avpkt->size); + cavctx.coded_height = avctx->coded_height / 2; + cavctx.coded_width = avctx->coded_width / 2; + ctx->texture_block_h = 4; ctx->texture_block_w = 4; avctx->pix_fmt = AV_PIX_FMT_RGBA; avctx->colorspace = AVCOL_SPC_RGB; - ctx->tex_funct = NULL; - ctx->tex_funct_planar[0] = NULL; - ctx->tex_funct_planar[1] = NULL; - tag = bytestream2_get_le32(gbc); switch (tag) { case DXV_FMT_DXT1: decompress_tex = dxv_decompress_dxt1; - ctx->tex_funct = ctx->texdsp.dxt1_block; + texdsp_ctx.tex_funct = ctx->texdsp.dxt1_block; + texdsp_ctx.tex_ratio = 8; + texdsp_ctx.raw_ratio = 16; ctx->tex_rat = 8; ctx->tex_step = 8; msgcomp = "DXTR1"; @@ -1076,7 +886,9 @@ static int dxv_decode(AVCodecContext *avctx, AVFrame *frame, case DXV_FMT_DXT5: decompress_tex = dxv_decompress_dxt5; /* DXV misnomers DXT5, alpha is premultiplied so use DXT4 instead */ - ctx->tex_funct = ctx->texdsp.dxt4_block; + texdsp_ctx.tex_funct = ctx->texdsp.dxt4_block; + texdsp_ctx.tex_ratio = 16; + texdsp_ctx.raw_ratio = 16; ctx->tex_rat = 4; ctx->tex_step = 16; msgcomp = "DXTR5"; @@ -1084,8 +896,12 @@ static int dxv_decode(AVCodecContext *avctx, AVFrame *frame, break; case DXV_FMT_YCG6: decompress_tex = dxv_decompress_ycg6; - ctx->tex_funct_planar[0] = yo_block; - ctx->tex_funct_planar[1] = cocg_block; + texdsp_ctx.tex_funct = ctx->texdsp.rgtc1u_gray_block; + texdsp_ctx.tex_ratio = 8; + texdsp_ctx.raw_ratio = 4; + ctexdsp_ctx.tex_funct = ctx->texdsp.rgtc1u_gray_block; + ctexdsp_ctx.tex_ratio = 16; + ctexdsp_ctx.raw_ratio = 4; ctx->tex_rat = 8; ctx->tex_step = 32; ctx->ctex_step = 16; @@ -1101,8 +917,12 @@ static int dxv_decode(AVCodecContext *avctx, AVFrame *frame, break; case DXV_FMT_YG10: decompress_tex = dxv_decompress_yg10; - ctx->tex_funct_planar[0] = yao_block; - ctx->tex_funct_planar[1] = cocg_block; + texdsp_ctx.tex_funct = ctx->texdsp.rgtc1u_gray_block; + texdsp_ctx.tex_ratio = 16; + texdsp_ctx.raw_ratio = 4; + ctexdsp_ctx.tex_funct = ctx->texdsp.rgtc1u_gray_block; + ctexdsp_ctx.tex_ratio = 16; + ctexdsp_ctx.raw_ratio = 4; ctx->tex_rat = 4; ctx->tex_step = 64; ctx->ctex_step = 16; @@ -1131,14 +951,20 @@ static int dxv_decode(AVCodecContext *avctx, AVFrame *frame, } if (old_type & 0x40) { + tag = DXV_FMT_DXT5; msgtext = "DXT5"; - ctx->tex_funct = ctx->texdsp.dxt5_block; + texdsp_ctx.tex_funct = ctx->texdsp.dxt5_block; + texdsp_ctx.tex_ratio = 16; + texdsp_ctx.raw_ratio = 16; ctx->tex_step = 16; } else if (old_type & 0x20 || version_major == 1) { + tag = DXV_FMT_DXT1; msgtext = "DXT1"; - ctx->tex_funct = ctx->texdsp.dxt1_block; + texdsp_ctx.tex_funct = ctx->texdsp.dxt1_block; + texdsp_ctx.tex_ratio = 8; + texdsp_ctx.raw_ratio = 16; ctx->tex_step = 8; } else { av_log(avctx, AV_LOG_ERROR, "Unsupported header (0x%08"PRIX32")\n.", tag); @@ -1148,10 +974,10 @@ static int dxv_decode(AVCodecContext *avctx, AVFrame *frame, break; } - ctx->slice_count = av_clip(avctx->thread_count, 1, - avctx->coded_height / FFMAX(ctx->texture_block_h, - ctx->ctexture_block_h)); - + texdsp_ctx.slice_count = av_clip(avctx->thread_count, 1, + avctx->coded_height / TEXTURE_BLOCK_H); + ctexdsp_ctx.slice_count = av_clip(avctx->thread_count, 1, + cavctx.coded_height / TEXTURE_BLOCK_H); /* New header is 12 bytes long. */ if (!old_type) { version_major = bytestream2_get_byte(gbc) - 1; @@ -1216,9 +1042,44 @@ static int dxv_decode(AVCodecContext *avctx, AVFrame *frame, if (ret < 0) return ret; - /* Now decompress the texture with the standard functions. */ - avctx->execute2(avctx, decompress_texture_thread, - frame, NULL, ctx->slice_count); + switch (tag) { + case DXV_FMT_YG10: + /* BC5 texture with alpha in the second half of each block */ + texdsp_ctx.tex_data.in = ctx->tex_data + texdsp_ctx.tex_ratio / 2; + texdsp_ctx.frame_data.out = frame->data[3]; + texdsp_ctx.stride = frame->linesize[3]; + ret = ff_texturedsp_exec_decompress_threads(avctx, &texdsp_ctx); + if (ret < 0) + return ret; + /* fallthrough */ + case DXV_FMT_YCG6: + /* BC5 texture with Co in the first half of each block and Cg in the second */ + ctexdsp_ctx.tex_data.in = ctx->ctex_data; + ctexdsp_ctx.frame_data.out = frame->data[2]; + ctexdsp_ctx.stride = frame->linesize[2]; + ret = ff_texturedsp_exec_decompress_threads(&cavctx, &ctexdsp_ctx); + if (ret < 0) + return ret; + ctexdsp_ctx.tex_data.in = ctx->ctex_data + ctexdsp_ctx.tex_ratio / 2; + ctexdsp_ctx.frame_data.out = frame->data[1]; + ctexdsp_ctx.stride = frame->linesize[1]; + ret = ff_texturedsp_exec_decompress_threads(&cavctx, &ctexdsp_ctx); + if (ret < 0) + return ret; + /* fallthrough */ + case DXV_FMT_DXT1: + case DXV_FMT_DXT5: + /* For DXT1 and DXT5, self explanatory + * For YCG6, BC4 texture for Y + * For YG10, BC5 texture with Y in the first half of each block */ + texdsp_ctx.tex_data.in = ctx->tex_data; + texdsp_ctx.frame_data.out = frame->data[0]; + texdsp_ctx.stride = frame->linesize[0]; + ret = ff_texturedsp_exec_decompress_threads(avctx, &texdsp_ctx); + if (ret < 0) + return ret; + break; + } /* Frame is ready to be output. */ frame->pict_type = AV_PICTURE_TYPE_I; -- 2.43.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".