* [FFmpeg-devel] [PATCH 1/2] swscale/ppc/swscale_ppc_template: Remove code not passing checkasm @ 2024-04-04 2:57 Andreas Rheinhardt 2024-04-04 2:58 ` [FFmpeg-devel] [PATCH 2/2] swscale/ppc/swscale_ppc_template: Reindent after the previous commit Andreas Rheinhardt 0 siblings, 1 reply; 3+ messages in thread From: Andreas Rheinhardt @ 2024-04-04 2:57 UTC (permalink / raw) To: ffmpeg-devel; +Cc: Andreas Rheinhardt Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> --- libswscale/ppc/swscale_ppc_template.c | 62 ++++----------------------- 1 file changed, 9 insertions(+), 53 deletions(-) diff --git a/libswscale/ppc/swscale_ppc_template.c b/libswscale/ppc/swscale_ppc_template.c index 84641f3a8b..e9abd33cbf 100644 --- a/libswscale/ppc/swscale_ppc_template.c +++ b/libswscale/ppc/swscale_ppc_template.c @@ -104,16 +104,6 @@ static void FUNC(hScale_real)(SwsContext *c, int16_t *dst, int dstW, register int i; LOCAL_ALIGNED(16, int, tempo, [4]); - if (filterSize % 4) { - for (i = 0; i < dstW; i++) { - register int j; - register int srcPos = filterPos[i]; - register int val = 0; - for (j = 0; j < filterSize; j++) - val += ((int)src[srcPos + j]) * filter[filterSize * i + j]; - dst[i] = FFMIN(val >> 7, (1 << 15) - 1); - } - } else switch (filterSize) { case 4: for (i = 0; i < dstW; i++) { @@ -175,48 +165,14 @@ static void FUNC(hScale_real)(SwsContext *c, int16_t *dst, int dstW, break; default: - for (i = 0; i < dstW; i++) { - register int j, av_unused offset = i * 2 * filterSize; - register int srcPos = filterPos[i]; - - vector signed int val_s, val_v = (vector signed int)vzero; - vector signed short av_unused filter_v0R; - vector unsigned char av_unused permF, av_unused src_v0, av_unused permS; - FIRST_LOAD(filter_v0R, offset, filter, permF); - FIRST_LOAD(src_v0, srcPos, src, permS); - - for (j = 0; j < filterSize - 15; j += 16) { - vector unsigned char av_unused src_v1, src_vF; - vector signed short av_unused filter_v1R, av_unused filter_v2R, - filter_v0, filter_v1, src_vA, src_vB; - vector signed int val_acc; - LOAD_SRCV(srcPos, j, src, permS, src_v0, src_v1, src_vF); - src_vA = // vec_unpackh sign-extends... - (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF)); - src_vB = // vec_unpackh sign-extends... - (vector signed short)(VEC_MERGEL((vector unsigned char)vzero, src_vF)); - GET_VFD(i, j, filter, filter_v0R, filter_v1R, permF, filter_v0, 0); - GET_VFD(i, j, filter, filter_v1R, filter_v2R, permF, filter_v1, 16); - - val_acc = vec_msums(src_vA, filter_v0, val_v); - val_v = vec_msums(src_vB, filter_v1, val_acc); - UPDATE_PTR(filter_v2R, filter_v0R, src_v1, src_v0); - } - - if (j < filterSize - 7) { - // loading src_v0 is useless, it's already done above - vector unsigned char av_unused src_v1, src_vF; - vector signed short src_v, av_unused filter_v1R, filter_v; - LOAD_SRCV8(srcPos, j, src, permS, src_v0, src_v1, src_vF); - src_v = // vec_unpackh sign-extends... - (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF)); - GET_VFD(i, j, filter, filter_v0R, filter_v1R, permF, filter_v, 0); - val_v = vec_msums(src_v, filter_v, val_v); - } - val_s = vec_sums(val_v, vzero); - - VEC_ST(val_s, 0, tempo); - dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1); - } + for (register int i = 0; i < dstW; i++) { + register int j; + register int srcPos = filterPos[i]; + register int val = 0; + for (j = 0; j < filterSize; j++) + val += ((int)src[srcPos + j]) * filter[filterSize * i + j]; + dst[i] = FFMIN(val >> 7, (1 << 15) - 1); + } + break; } } -- 2.40.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 3+ messages in thread
* [FFmpeg-devel] [PATCH 2/2] swscale/ppc/swscale_ppc_template: Reindent after the previous commit 2024-04-04 2:57 [FFmpeg-devel] [PATCH 1/2] swscale/ppc/swscale_ppc_template: Remove code not passing checkasm Andreas Rheinhardt @ 2024-04-04 2:58 ` Andreas Rheinhardt 2024-04-04 4:01 ` Lynne 0 siblings, 1 reply; 3+ messages in thread From: Andreas Rheinhardt @ 2024-04-04 2:58 UTC (permalink / raw) To: ffmpeg-devel; +Cc: Andreas Rheinhardt Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> --- libswscale/ppc/swscale_ppc_template.c | 107 +++++++++++++------------- 1 file changed, 53 insertions(+), 54 deletions(-) diff --git a/libswscale/ppc/swscale_ppc_template.c b/libswscale/ppc/swscale_ppc_template.c index e9abd33cbf..3c2addd4a4 100644 --- a/libswscale/ppc/swscale_ppc_template.c +++ b/libswscale/ppc/swscale_ppc_template.c @@ -101,70 +101,69 @@ static void FUNC(hScale_real)(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize) { - register int i; LOCAL_ALIGNED(16, int, tempo, [4]); - switch (filterSize) { - case 4: - for (i = 0; i < dstW; i++) { - register int srcPos = filterPos[i]; - - vector unsigned char src_vF = unaligned_load(srcPos, src); - vector signed short src_v, filter_v; - vector signed int val_vEven, val_s; - src_v = // vec_unpackh sign-extends... - (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF)); - // now put our elements in the even slots - src_v = vec_mergeh(src_v, (vector signed short)vzero); - GET_VF4(i, filter_v, filter); - val_vEven = vec_mule(src_v, filter_v); - val_s = vec_sums(val_vEven, vzero); - vec_st(val_s, 0, tempo); - dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1); - } + switch (filterSize) { + case 4: + for (register int i = 0; i < dstW; i++) { + register int srcPos = filterPos[i]; + + vector unsigned char src_vF = unaligned_load(srcPos, src); + vector signed short src_v, filter_v; + vector signed int val_vEven, val_s; + src_v = // vec_unpackh sign-extends... + (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF)); + // now put our elements in the even slots + src_v = vec_mergeh(src_v, (vector signed short)vzero); + GET_VF4(i, filter_v, filter); + val_vEven = vec_mule(src_v, filter_v); + val_s = vec_sums(val_vEven, vzero); + vec_st(val_s, 0, tempo); + dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1); + } break; - case 8: - for (i = 0; i < dstW; i++) { - register int srcPos = filterPos[i]; - vector unsigned char src_vF, av_unused src_v0, av_unused src_v1; - vector unsigned char av_unused permS; - vector signed short src_v, filter_v; - vector signed int val_v, val_s; - FIRST_LOAD(src_v0, srcPos, src, permS); - LOAD_SRCV8(srcPos, 0, src, permS, src_v0, src_v1, src_vF); - src_v = // vec_unpackh sign-extends... - (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF)); - filter_v = vec_ld(i << 4, filter); - val_v = vec_msums(src_v, filter_v, (vector signed int)vzero); - val_s = vec_sums(val_v, vzero); - vec_st(val_s, 0, tempo); - dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1); - } + case 8: + for (register int i = 0; i < dstW; i++) { + register int srcPos = filterPos[i]; + vector unsigned char src_vF, av_unused src_v0, av_unused src_v1; + vector unsigned char av_unused permS; + vector signed short src_v, filter_v; + vector signed int val_v, val_s; + FIRST_LOAD(src_v0, srcPos, src, permS); + LOAD_SRCV8(srcPos, 0, src, permS, src_v0, src_v1, src_vF); + src_v = // vec_unpackh sign-extends... + (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF)); + filter_v = vec_ld(i << 4, filter); + val_v = vec_msums(src_v, filter_v, (vector signed int)vzero); + val_s = vec_sums(val_v, vzero); + vec_st(val_s, 0, tempo); + dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1); + } break; - case 16: - for (i = 0; i < dstW; i++) { - register int srcPos = filterPos[i]; + case 16: + for (register int i = 0; i < dstW; i++) { + register int srcPos = filterPos[i]; - vector unsigned char src_vF = unaligned_load(srcPos, src); - vector signed short src_vA = // vec_unpackh sign-extends... - (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF)); - vector signed short src_vB = // vec_unpackh sign-extends... - (vector signed short)(VEC_MERGEL((vector unsigned char)vzero, src_vF)); - vector signed short filter_v0 = vec_ld(i << 5, filter); - vector signed short filter_v1 = vec_ld((i << 5) + 16, filter); + vector unsigned char src_vF = unaligned_load(srcPos, src); + vector signed short src_vA = // vec_unpackh sign-extends... + (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF)); + vector signed short src_vB = // vec_unpackh sign-extends... + (vector signed short)(VEC_MERGEL((vector unsigned char)vzero, src_vF)); + vector signed short filter_v0 = vec_ld(i << 5, filter); + vector signed short filter_v1 = vec_ld((i << 5) + 16, filter); - vector signed int val_acc = vec_msums(src_vA, filter_v0, (vector signed int)vzero); - vector signed int val_v = vec_msums(src_vB, filter_v1, val_acc); + vector signed int val_acc = vec_msums(src_vA, filter_v0, (vector signed int)vzero); + vector signed int val_v = vec_msums(src_vB, filter_v1, val_acc); - vector signed int val_s = vec_sums(val_v, vzero); + vector signed int val_s = vec_sums(val_v, vzero); - VEC_ST(val_s, 0, tempo); - dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1); - } + VEC_ST(val_s, 0, tempo); + dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1); + } break; - default: + default: for (register int i = 0; i < dstW; i++) { register int j; register int srcPos = filterPos[i]; @@ -174,5 +173,5 @@ static void FUNC(hScale_real)(SwsContext *c, int16_t *dst, int dstW, dst[i] = FFMIN(val >> 7, (1 << 15) - 1); } break; - } + } } -- 2.40.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [FFmpeg-devel] [PATCH 2/2] swscale/ppc/swscale_ppc_template: Reindent after the previous commit 2024-04-04 2:58 ` [FFmpeg-devel] [PATCH 2/2] swscale/ppc/swscale_ppc_template: Reindent after the previous commit Andreas Rheinhardt @ 2024-04-04 4:01 ` Lynne 0 siblings, 0 replies; 3+ messages in thread From: Lynne @ 2024-04-04 4:01 UTC (permalink / raw) To: FFmpeg development discussions and patches Apr 4, 2024, 04:58 by andreas.rheinhardt@outlook.com: > Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> > --- > libswscale/ppc/swscale_ppc_template.c | 107 +++++++++++++------------- > 1 file changed, 53 insertions(+), 54 deletions(-) > > diff --git a/libswscale/ppc/swscale_ppc_template.c b/libswscale/ppc/swscale_ppc_template.c > index e9abd33cbf..3c2addd4a4 100644 > --- a/libswscale/ppc/swscale_ppc_template.c > +++ b/libswscale/ppc/swscale_ppc_template.c > @@ -101,70 +101,69 @@ static void FUNC(hScale_real)(SwsContext *c, int16_t *dst, int dstW, > const uint8_t *src, const int16_t *filter, > const int32_t *filterPos, int filterSize) > { > - register int i; > LOCAL_ALIGNED(16, int, tempo, [4]); > > - switch (filterSize) { > - case 4: > - for (i = 0; i < dstW; i++) { > - register int srcPos = filterPos[i]; > - > - vector unsigned char src_vF = unaligned_load(srcPos, src); > - vector signed short src_v, filter_v; > - vector signed int val_vEven, val_s; > - src_v = // vec_unpackh sign-extends... > - (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF)); > - // now put our elements in the even slots > - src_v = vec_mergeh(src_v, (vector signed short)vzero); > - GET_VF4(i, filter_v, filter); > - val_vEven = vec_mule(src_v, filter_v); > - val_s = vec_sums(val_vEven, vzero); > - vec_st(val_s, 0, tempo); > - dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1); > - } > + switch (filterSize) { > + case 4: > + for (register int i = 0; i < dstW; i++) { > + register int srcPos = filterPos[i]; > + > + vector unsigned char src_vF = unaligned_load(srcPos, src); > + vector signed short src_v, filter_v; > + vector signed int val_vEven, val_s; > + src_v = // vec_unpackh sign-extends... > + (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF)); > + // now put our elements in the even slots > + src_v = vec_mergeh(src_v, (vector signed short)vzero); > + GET_VF4(i, filter_v, filter); > + val_vEven = vec_mule(src_v, filter_v); > + val_s = vec_sums(val_vEven, vzero); > + vec_st(val_s, 0, tempo); > + dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1); > + } > break; > - case 8: > - for (i = 0; i < dstW; i++) { > - register int srcPos = filterPos[i]; > - vector unsigned char src_vF, av_unused src_v0, av_unused src_v1; > - vector unsigned char av_unused permS; > - vector signed short src_v, filter_v; > - vector signed int val_v, val_s; > - FIRST_LOAD(src_v0, srcPos, src, permS); > - LOAD_SRCV8(srcPos, 0, src, permS, src_v0, src_v1, src_vF); > - src_v = // vec_unpackh sign-extends... > - (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF)); > - filter_v = vec_ld(i << 4, filter); > - val_v = vec_msums(src_v, filter_v, (vector signed int)vzero); > - val_s = vec_sums(val_v, vzero); > - vec_st(val_s, 0, tempo); > - dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1); > - } > + case 8: > + for (register int i = 0; i < dstW; i++) { > + register int srcPos = filterPos[i]; > + vector unsigned char src_vF, av_unused src_v0, av_unused src_v1; > + vector unsigned char av_unused permS; > + vector signed short src_v, filter_v; > + vector signed int val_v, val_s; > + FIRST_LOAD(src_v0, srcPos, src, permS); > + LOAD_SRCV8(srcPos, 0, src, permS, src_v0, src_v1, src_vF); > + src_v = // vec_unpackh sign-extends... > + (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF)); > + filter_v = vec_ld(i << 4, filter); > + val_v = vec_msums(src_v, filter_v, (vector signed int)vzero); > + val_s = vec_sums(val_v, vzero); > + vec_st(val_s, 0, tempo); > + dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1); > + } > break; > > - case 16: > - for (i = 0; i < dstW; i++) { > - register int srcPos = filterPos[i]; > + case 16: > + for (register int i = 0; i < dstW; i++) { > + register int srcPos = filterPos[i]; > > - vector unsigned char src_vF = unaligned_load(srcPos, src); > - vector signed short src_vA = // vec_unpackh sign-extends... > - (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF)); > - vector signed short src_vB = // vec_unpackh sign-extends... > - (vector signed short)(VEC_MERGEL((vector unsigned char)vzero, src_vF)); > - vector signed short filter_v0 = vec_ld(i << 5, filter); > - vector signed short filter_v1 = vec_ld((i << 5) + 16, filter); > + vector unsigned char src_vF = unaligned_load(srcPos, src); > + vector signed short src_vA = // vec_unpackh sign-extends... > + (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF)); > + vector signed short src_vB = // vec_unpackh sign-extends... > + (vector signed short)(VEC_MERGEL((vector unsigned char)vzero, src_vF)); > + vector signed short filter_v0 = vec_ld(i << 5, filter); > + vector signed short filter_v1 = vec_ld((i << 5) + 16, filter); > > - vector signed int val_acc = vec_msums(src_vA, filter_v0, (vector signed int)vzero); > - vector signed int val_v = vec_msums(src_vB, filter_v1, val_acc); > + vector signed int val_acc = vec_msums(src_vA, filter_v0, (vector signed int)vzero); > + vector signed int val_v = vec_msums(src_vB, filter_v1, val_acc); > > - vector signed int val_s = vec_sums(val_v, vzero); > + vector signed int val_s = vec_sums(val_v, vzero); > > - VEC_ST(val_s, 0, tempo); > - dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1); > - } > + VEC_ST(val_s, 0, tempo); > + dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1); > + } > break; > > - default: > + default: > for (register int i = 0; i < dstW; i++) { > register int j; > register int srcPos = filterPos[i]; > @@ -174,5 +173,5 @@ static void FUNC(hScale_real)(SwsContext *c, int16_t *dst, int dstW, > dst[i] = FFMIN(val >> 7, (1 << 15) - 1); > } > break; > - } > + } > } > Patchset LGTM. I missed those in the previous broken PPC code removal I did. _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2024-04-04 4:01 UTC | newest] Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2024-04-04 2:57 [FFmpeg-devel] [PATCH 1/2] swscale/ppc/swscale_ppc_template: Remove code not passing checkasm Andreas Rheinhardt 2024-04-04 2:58 ` [FFmpeg-devel] [PATCH 2/2] swscale/ppc/swscale_ppc_template: Reindent after the previous commit Andreas Rheinhardt 2024-04-04 4:01 ` Lynne
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git