From: Lynne <dev@lynne.ee> To: Ffmpeg Devel <ffmpeg-devel@ffmpeg.org> Subject: [FFmpeg-devel] [PATCH] swscale/ppc: remove hScale8To19_vsx Date: Thu, 18 May 2023 14:52:52 +0200 (CEST) Message-ID: <NViuta7--B-9@lynne.ee> (raw) [-- Attachment #1: Type: text/plain, Size: 212 bytes --] Fails checkasm on a Power9 DD2.2 02CY771 system. The assembly doesn't seem to have been independently tested at all. https://paste.sr.ht/~ky0ko/fe255ff73fab49b0c6d335437d894c1db626289e Patch attached. [-- Attachment #2: 0001-swscale-ppc-remove-hScale8To19_vsx.patch --] [-- Type: text/x-diff, Size: 3777 bytes --] From 0ba39b07e85d866ef43c38e1bcf352af2bedacb9 Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Thu, 18 May 2023 14:42:14 +0200 Subject: [PATCH] swscale/ppc: remove hScale8To19_vsx Fails checkasm on a Power9 system. --- libswscale/ppc/swscale_vsx.c | 60 ------------------------------------ 1 file changed, 60 deletions(-) diff --git a/libswscale/ppc/swscale_vsx.c b/libswscale/ppc/swscale_vsx.c index 8152ce7f10..7080a16aee 100644 --- a/libswscale/ppc/swscale_vsx.c +++ b/libswscale/ppc/swscale_vsx.c @@ -1858,64 +1858,6 @@ static void hcscale_fast_vsx(SwsContext *c, int16_t *dst1, int16_t *dst2, #undef HCSCALE -static void hScale8To19_vsx(SwsContext *c, int16_t *_dst, int dstW, - const uint8_t *src, const int16_t *filter, - const int32_t *filterPos, int filterSize) -{ - int i, j; - int32_t *dst = (int32_t *) _dst; - vec_s16 vfilter, vin; - vec_u8 vin8; - vec_s32 vout; - const vec_u8 vzero = vec_splat_u8(0); - const vec_u8 vunusedtab[8] = { - (vec_u8) {0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, - 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf}, - (vec_u8) {0x0, 0x1, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, - 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10}, - (vec_u8) {0x0, 0x1, 0x2, 0x3, 0x10, 0x10, 0x10, 0x10, - 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10}, - (vec_u8) {0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x10, 0x10, - 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10}, - (vec_u8) {0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, - 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10}, - (vec_u8) {0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, - 0x8, 0x9, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10}, - (vec_u8) {0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, - 0x8, 0x9, 0xa, 0xb, 0x10, 0x10, 0x10, 0x10}, - (vec_u8) {0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, - 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0x10, 0x10}, - }; - const vec_u8 vunused = vunusedtab[filterSize % 8]; - - if (filterSize == 1) { - for (i = 0; i < dstW; i++) { - int srcPos = filterPos[i]; - int val = 0; - for (j = 0; j < filterSize; j++) { - val += ((int)src[srcPos + j]) * filter[filterSize * i + j]; - } - dst[i] = FFMIN(val >> 3, (1 << 19) - 1); // the cubic equation does overflow ... - } - } else { - for (i = 0; i < dstW; i++) { - const int srcPos = filterPos[i]; - vout = vec_splat_s32(0); - for (j = 0; j < filterSize; j += 8) { - vin8 = vec_vsx_ld(0, &src[srcPos + j]); - vin = (vec_s16) vec_mergeh(vin8, vzero); - if (j + 8 > filterSize) // Remove the unused elements on the last round - vin = vec_perm(vin, (vec_s16) vzero, vunused); - - vfilter = vec_vsx_ld(0, &filter[filterSize * i + j]); - vout = vec_msums(vin, vfilter, vout); - } - vout = vec_sums(vout, (vec_s32) vzero); - dst[i] = FFMIN(vout[3] >> 3, (1 << 19) - 1); - } - } -} - static void hScale16To19_vsx(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src, const int16_t *filter, const int32_t *filterPos, int filterSize) @@ -2092,8 +2034,6 @@ av_cold void ff_sws_init_swscale_vsx(SwsContext *c) c->hyscale_fast = hyscale_fast_vsx; c->hcscale_fast = hcscale_fast_vsx; } - } else { - c->hyScale = c->hcScale = hScale8To19_vsx; } } else { if (power8) { -- 2.40.0 [-- Attachment #3: Type: text/plain, Size: 251 bytes --] _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
next reply other threads:[~2023-05-18 12:53 UTC|newest] Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top 2023-05-18 12:52 Lynne [this message] [not found] ` <NViuta7--B-9@lynne.ee-NViuxNy--3-9> 2023-05-20 18:38 ` Lynne 2023-05-22 7:45 ` Martin Storsjö
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=NViuta7--B-9@lynne.ee \ --to=dev@lynne.ee \ --cc=ffmpeg-devel@ffmpeg.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git