* [FFmpeg-devel] [PATCH 1/4] libswscale: Re-factor ff_shuffle_filter_coefficients.
@ 2022-01-10 14:58 Alan Kelly
2022-01-10 14:58 ` [FFmpeg-devel] [PATCH 2/4] libswscale: Avx2 hscale can process any input of size which is a multiple of 4 Alan Kelly
` (4 more replies)
0 siblings, 5 replies; 7+ messages in thread
From: Alan Kelly @ 2022-01-10 14:58 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Alan Kelly
Make the code more readable, follow the style guide and propagate memory
allocation errors.
---
libswscale/swscale_internal.h | 2 +-
libswscale/utils.c | 68 ++++++++++++++++++++---------------
2 files changed, 40 insertions(+), 30 deletions(-)
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 3a78d95ba6..26d28d42e6 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -1144,5 +1144,5 @@ void ff_sws_slice_worker(void *priv, int jobnr, int threadnr,
#define MAX_LINES_AHEAD 4
//shuffle filter and filterPos for hyScale and hcScale filters in avx2
-void ff_shuffle_filter_coefficients(SwsContext *c, int* filterPos, int filterSize, int16_t *filter, int dstW);
+int ff_shuffle_filter_coefficients(SwsContext *c, int* filterPos, int filterSize, int16_t *filter, int dstW);
#endif /* SWSCALE_SWSCALE_INTERNAL_H */
diff --git a/libswscale/utils.c b/libswscale/utils.c
index c5ea8853d5..52f07e1661 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -278,39 +278,47 @@ static const FormatEntry format_entries[] = {
[AV_PIX_FMT_P416LE] = { 1, 1 },
};
-void ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos, int filterSize, int16_t *filter, int dstW){
+int ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos,
+ int filterSize, int16_t *filter,
+ int dstW)
+{
#if ARCH_X86_64
- int i, j, k, l;
+ int i = 0, j = 0, k = 0;
int cpu_flags = av_get_cpu_flags();
+ if (!filter || dstW % 16 != 0) return 0;
if (EXTERNAL_AVX2_FAST(cpu_flags) && !(cpu_flags & AV_CPU_FLAG_SLOW_GATHER)) {
- if ((c->srcBpc == 8) && (c->dstBpc <= 14)){
- if (dstW % 16 == 0){
- if (filter != NULL){
- for (i = 0; i < dstW; i += 8){
- FFSWAP(int, filterPos[i + 2], filterPos[i+4]);
- FFSWAP(int, filterPos[i + 3], filterPos[i+5]);
- }
- if (filterSize > 4){
- int16_t *tmp2 = av_malloc(dstW * filterSize * 2);
- memcpy(tmp2, filter, dstW * filterSize * 2);
- for (i = 0; i < dstW; i += 16){//pixel
- for (k = 0; k < filterSize / 4; ++k){//fcoeff
- for (j = 0; j < 16; ++j){//inner pixel
- for (l = 0; l < 4; ++l){//coeff
- int from = i * filterSize + j * filterSize + k * 4 + l;
- int to = (i) * filterSize + j * 4 + l + k * 64;
- filter[to] = tmp2[from];
- }
- }
- }
- }
- av_free(tmp2);
- }
- }
- }
+ if ((c->srcBpc == 8) && (c->dstBpc <= 14)) {
+ int16_t *filterCopy = NULL;
+ if (filterSize > 4) {
+ if (!FF_ALLOC_TYPED_ARRAY(filterCopy, dstW * filterSize))
+ return AVERROR(ENOMEM);
+ memcpy(filterCopy, filter, dstW * filterSize * sizeof(int16_t));
+ }
+ // Do not swap filterPos for pixels which won't be processed by
+ // the main loop.
+ for (i = 0; i + 8 <= dstW; i += 8) {
+ FFSWAP(int, filterPos[i + 2], filterPos[i + 4]);
+ FFSWAP(int, filterPos[i + 3], filterPos[i + 5]);
+ }
+ if (filterSize > 4) {
+ // 16 pixels are processed at a time.
+ for (i = 0; i + 16 <= dstW; i += 16) {
+ // 4 filter coeffs are processed at a time.
+ for (k = 0; k + 4 <= filterSize; k += 4) {
+ for (j = 0; j < 16; ++j) {
+ int from = (i + j) * filterSize + k;
+ int to = i * filterSize + j * 4 + k * 16;
+ memcpy(&filter[to], &filterCopy[from], 4 * sizeof(int16_t));
+ }
+ }
+ }
+ }
+ if (filterCopy)
+ av_free(filterCopy);
}
}
#endif
+ return 0;
}
int sws_isSupportedInput(enum AVPixelFormat pix_fmt)
@@ -1836,7 +1844,8 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
get_local_pos(c, 0, 0, 0),
get_local_pos(c, 0, 0, 0))) < 0)
goto fail;
- ff_shuffle_filter_coefficients(c, c->hLumFilterPos, c->hLumFilterSize, c->hLumFilter, dstW);
+ if ((ret = ff_shuffle_filter_coefficients(c, c->hLumFilterPos, c->hLumFilterSize, c->hLumFilter, dstW)) != 0)
+ goto nomem;
if ((ret = initFilter(&c->hChrFilter, &c->hChrFilterPos,
&c->hChrFilterSize, c->chrXInc,
c->chrSrcW, c->chrDstW, filterAlign, 1 << 14,
@@ -1846,7 +1855,8 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
get_local_pos(c, c->chrSrcHSubSample, c->src_h_chr_pos, 0),
get_local_pos(c, c->chrDstHSubSample, c->dst_h_chr_pos, 0))) < 0)
goto fail;
- ff_shuffle_filter_coefficients(c, c->hChrFilterPos, c->hChrFilterSize, c->hChrFilter, c->chrDstW);
+ if ((ret = ff_shuffle_filter_coefficients(c, c->hChrFilterPos, c->hChrFilterSize, c->hChrFilter, c->chrDstW)) != 0)
+ goto nomem;
}
} // initialize horizontal stuff
--
2.34.1.575.g55b058a8bb-goog
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 7+ messages in thread
* [FFmpeg-devel] [PATCH 2/4] libswscale: Avx2 hscale can process any input of size which is a multiple of 4.
2022-01-10 14:58 [FFmpeg-devel] [PATCH 1/4] libswscale: Re-factor ff_shuffle_filter_coefficients Alan Kelly
@ 2022-01-10 14:58 ` Alan Kelly
2022-01-10 14:58 ` [FFmpeg-devel] [PATCH 3/4] libswscale: Enable hscale_avx2 for input sizes which ar emultiples " Alan Kelly
` (3 subsequent siblings)
4 siblings, 0 replies; 7+ messages in thread
From: Alan Kelly @ 2022-01-10 14:58 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Alan Kelly
The main loop processes blocks of 16 pixels. The tail processes blocks
of size 4.
---
libswscale/x86/scale_avx2.asm | 48 +++++++++++++++++++++++++++++++++--
1 file changed, 46 insertions(+), 2 deletions(-)
diff --git a/libswscale/x86/scale_avx2.asm b/libswscale/x86/scale_avx2.asm
index 20acdbd633..dc42abb100 100644
--- a/libswscale/x86/scale_avx2.asm
+++ b/libswscale/x86/scale_avx2.asm
@@ -53,6 +53,9 @@ cglobal hscale8to15_%1, 7, 9, 16, pos0, dst, w, srcmem, filter, fltpos, fltsize,
mova m14, [four]
shr fltsized, 2
%endif
+ cmp wq, 16
+ jl .tail_loop
+ mov countq, 0x10
.loop:
movu m1, [fltposq]
movu m2, [fltposq+32]
@@ -97,11 +100,52 @@ cglobal hscale8to15_%1, 7, 9, 16, pos0, dst, w, srcmem, filter, fltpos, fltsize,
vpsrad m6, 7
vpackssdw m5, m5, m6
vpermd m5, m15, m5
- vmovdqu [dstq + countq * 2], m5
+ vmovdqu [dstq], m5
+ add dstq, 0x20
add fltposq, 0x40
add countq, 0x10
cmp countq, wq
- jl .loop
+ jle .loop
+
+ sub countq, 0x10
+ cmp countq, wq
+ jge .end
+
+.tail_loop:
+ movu xm1, [fltposq]
+%ifidn %1, X4
+ pxor xm9, xm9
+ pxor xm10, xm10
+ xor innerq, innerq
+.tail_innerloop:
+%endif
+ vpcmpeqd xm13, xm13
+ vpgatherdd xm3,[srcmemq + xm1], xm13
+ vpunpcklbw xm5, xm3, xm0
+ vpunpckhbw xm6, xm3, xm0
+ vpmaddwd xm5, xm5, [filterq]
+ vpmaddwd xm6, xm6, [filterq + 16]
+ add filterq, 0x20
+%ifidn %1, X4
+ paddd xm9, xm5
+ paddd xm10, xm6
+ paddd xm1, xm14
+ add innerq, 1
+ cmp innerq, fltsizeq
+ jl .tail_innerloop
+ vphaddd xm5, xm9, xm10
+%else
+ vphaddd xm5, xm5, xm6
+%endif
+ vpsrad xm5, 7
+ vpackssdw xm5, xm5, xm5
+ vmovq [dstq], xm5
+ add dstq, 0x8
+ add fltposq, 0x10
+ add countq, 0x4
+ cmp countq, wq
+ jl .tail_loop
+.end:
REP_RET
%endmacro
--
2.34.1.575.g55b058a8bb-goog
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 7+ messages in thread
* [FFmpeg-devel] [PATCH 3/4] libswscale: Enable hscale_avx2 for input sizes which ar emultiples of 4.
2022-01-10 14:58 [FFmpeg-devel] [PATCH 1/4] libswscale: Re-factor ff_shuffle_filter_coefficients Alan Kelly
2022-01-10 14:58 ` [FFmpeg-devel] [PATCH 2/4] libswscale: Avx2 hscale can process any input of size which is a multiple of 4 Alan Kelly
@ 2022-01-10 14:58 ` Alan Kelly
2022-01-10 14:58 ` [FFmpeg-devel] [PATCH 4/4] checkasm/sw_scale: hscale does not requires cpuflag test Alan Kelly
` (2 subsequent siblings)
4 siblings, 0 replies; 7+ messages in thread
From: Alan Kelly @ 2022-01-10 14:58 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Alan Kelly
ff_shuffle_filter_coefficients shuffles the tail as required.
---
libswscale/utils.c | 17 +++++++++++++++--
libswscale/x86/swscale.c | 4 ++--
2 files changed, 17 insertions(+), 4 deletions(-)
diff --git a/libswscale/utils.c b/libswscale/utils.c
index 52f07e1661..7e1e9c3834 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -285,7 +285,7 @@ int ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos,
#if ARCH_X86_64
int i = 0, j = 0, k = 0;
int cpu_flags = av_get_cpu_flags();
- if (!filter || dstW % 16 != 0) return 0;
+ if (!filter || (dstW % 4 != 0)) return 0;
if (EXTERNAL_AVX2_FAST(cpu_flags) && !(cpu_flags & AV_CPU_FLAG_SLOW_GATHER)) {
if ((c->srcBpc == 8) && (c->dstBpc <= 14)) {
int16_t *filterCopy = NULL;
@@ -296,9 +296,11 @@ int ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos,
}
// Do not swap filterPos for pixels which won't be processed by
// the main loop.
- for (i = 0; i + 8 <= dstW; i += 8) {
+ for (i = 0; i + 16 <= dstW; i += 16) {
FFSWAP(int, filterPos[i + 2], filterPos[i + 4]);
FFSWAP(int, filterPos[i + 3], filterPos[i + 5]);
+ FFSWAP(int, filterPos[i + 10], filterPos[i + 12]);
+ FFSWAP(int, filterPos[i + 11], filterPos[i + 13]);
}
if (filterSize > 4) {
// 16 pixels are processed at a time.
@@ -312,6 +314,17 @@ int ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos,
}
}
}
+ // 4 pixels are processed at a time in the tail.
+ for (; i + 4 <= dstW; i += 4) {
+ // 4 filter coeffs are processed at a time.
+ for (k = 0; k + 4 <= filterSize; k += 4) {
+ for (j = 0; j < 4; ++j) {
+ int from = (i + j) * filterSize + k;
+ int to = i * filterSize + j * 4 + k * 4;
+ memcpy(&filter[to], &filterCopy[from], 4 * sizeof(int16_t));
+ }
+ }
+ }
}
if (filterCopy)
av_free(filterCopy);
diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c
index fdc93866a6..1d8f19aa5a 100644
--- a/libswscale/x86/swscale.c
+++ b/libswscale/x86/swscale.c
@@ -580,9 +580,9 @@ switch(c->dstBpc){ \
if (EXTERNAL_AVX2_FAST(cpu_flags) && !(cpu_flags & AV_CPU_FLAG_SLOW_GATHER)) {
if ((c->srcBpc == 8) && (c->dstBpc <= 14)) {
- if (c->chrDstW % 16 == 0)
+ if (c->chrDstW % 4 == 0)
ASSIGN_AVX2_SCALE_FUNC(c->hcScale, c->hChrFilterSize);
- if (c->dstW % 16 == 0)
+ if (c->dstW % 4 == 0)
ASSIGN_AVX2_SCALE_FUNC(c->hyScale, c->hLumFilterSize);
}
}
--
2.34.1.575.g55b058a8bb-goog
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 7+ messages in thread
* [FFmpeg-devel] [PATCH 4/4] checkasm/sw_scale: hscale does not requires cpuflag test.
2022-01-10 14:58 [FFmpeg-devel] [PATCH 1/4] libswscale: Re-factor ff_shuffle_filter_coefficients Alan Kelly
2022-01-10 14:58 ` [FFmpeg-devel] [PATCH 2/4] libswscale: Avx2 hscale can process any input of size which is a multiple of 4 Alan Kelly
2022-01-10 14:58 ` [FFmpeg-devel] [PATCH 3/4] libswscale: Enable hscale_avx2 for input sizes which ar emultiples " Alan Kelly
@ 2022-01-10 14:58 ` Alan Kelly
2022-02-02 12:21 ` [FFmpeg-devel] [PATCH 1/4] libswscale: Re-factor ff_shuffle_filter_coefficients Alan Kelly
2022-02-03 14:11 ` Michael Niedermayer
4 siblings, 0 replies; 7+ messages in thread
From: Alan Kelly @ 2022-01-10 14:58 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Alan Kelly
This is done in ff_shuffle_filter_coefficients.
---
tests/checkasm/sw_scale.c | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/tests/checkasm/sw_scale.c b/tests/checkasm/sw_scale.c
index 3c0a083b42..e7f916d3a8 100644
--- a/tests/checkasm/sw_scale.c
+++ b/tests/checkasm/sw_scale.c
@@ -168,8 +168,6 @@ static void check_hscale(void)
const uint8_t *src, const int16_t *filter,
const int32_t *filterPos, int filterSize);
- int cpu_flags = av_get_cpu_flags();
-
ctx = sws_alloc_context();
if (sws_init_context(ctx, NULL, NULL) < 0)
fail();
@@ -215,10 +213,10 @@ static void check_hscale(void)
filter[SRC_PIXELS * width + i] = rnd();
}
+
ff_sws_init_scale(ctx);
memcpy(filterAvx2, filter, sizeof(uint16_t) * (SRC_PIXELS * MAX_FILTER_WIDTH + MAX_FILTER_WIDTH));
- if ((cpu_flags & AV_CPU_FLAG_AVX2) && !(cpu_flags & AV_CPU_FLAG_SLOW_GATHER))
- ff_shuffle_filter_coefficients(ctx, filterPosAvx, width, filterAvx2, SRC_PIXELS);
+ ff_shuffle_filter_coefficients(ctx, filterPosAvx, width, filterAvx2, SRC_PIXELS);
if (check_func(ctx->hcScale, "hscale_%d_to_%d_width%d", ctx->srcBpc, ctx->dstBpc + 1, width)) {
memset(dst0, 0, SRC_PIXELS * sizeof(dst0[0]));
--
2.34.1.575.g55b058a8bb-goog
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [FFmpeg-devel] [PATCH 1/4] libswscale: Re-factor ff_shuffle_filter_coefficients.
2022-01-10 14:58 [FFmpeg-devel] [PATCH 1/4] libswscale: Re-factor ff_shuffle_filter_coefficients Alan Kelly
` (2 preceding siblings ...)
2022-01-10 14:58 ` [FFmpeg-devel] [PATCH 4/4] checkasm/sw_scale: hscale does not requires cpuflag test Alan Kelly
@ 2022-02-02 12:21 ` Alan Kelly
2022-02-03 14:11 ` Michael Niedermayer
4 siblings, 0 replies; 7+ messages in thread
From: Alan Kelly @ 2022-02-02 12:21 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Hi,
Is anybody interested in this patch set?
Thanks!
On Mon, Jan 10, 2022, 15:58 Alan Kelly <alankelly@google.com> wrote:
> Make the code more readable, follow the style guide and propagate memory
> allocation errors.
> ---
> libswscale/swscale_internal.h | 2 +-
> libswscale/utils.c | 68 ++++++++++++++++++++---------------
> 2 files changed, 40 insertions(+), 30 deletions(-)
>
> diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
> index 3a78d95ba6..26d28d42e6 100644
> --- a/libswscale/swscale_internal.h
> +++ b/libswscale/swscale_internal.h
> @@ -1144,5 +1144,5 @@ void ff_sws_slice_worker(void *priv, int jobnr, int
> threadnr,
> #define MAX_LINES_AHEAD 4
>
> //shuffle filter and filterPos for hyScale and hcScale filters in avx2
> -void ff_shuffle_filter_coefficients(SwsContext *c, int* filterPos, int
> filterSize, int16_t *filter, int dstW);
> +int ff_shuffle_filter_coefficients(SwsContext *c, int* filterPos, int
> filterSize, int16_t *filter, int dstW);
> #endif /* SWSCALE_SWSCALE_INTERNAL_H */
> diff --git a/libswscale/utils.c b/libswscale/utils.c
> index c5ea8853d5..52f07e1661 100644
> --- a/libswscale/utils.c
> +++ b/libswscale/utils.c
> @@ -278,39 +278,47 @@ static const FormatEntry format_entries[] = {
> [AV_PIX_FMT_P416LE] = { 1, 1 },
> };
>
> -void ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos, int
> filterSize, int16_t *filter, int dstW){
> +int ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos,
> + int filterSize, int16_t *filter,
> + int dstW)
> +{
> #if ARCH_X86_64
> - int i, j, k, l;
> + int i = 0, j = 0, k = 0;
> int cpu_flags = av_get_cpu_flags();
> + if (!filter || dstW % 16 != 0) return 0;
> if (EXTERNAL_AVX2_FAST(cpu_flags) && !(cpu_flags &
> AV_CPU_FLAG_SLOW_GATHER)) {
> - if ((c->srcBpc == 8) && (c->dstBpc <= 14)){
> - if (dstW % 16 == 0){
> - if (filter != NULL){
> - for (i = 0; i < dstW; i += 8){
> - FFSWAP(int, filterPos[i + 2], filterPos[i+4]);
> - FFSWAP(int, filterPos[i + 3], filterPos[i+5]);
> - }
> - if (filterSize > 4){
> - int16_t *tmp2 = av_malloc(dstW * filterSize * 2);
> - memcpy(tmp2, filter, dstW * filterSize * 2);
> - for (i = 0; i < dstW; i += 16){//pixel
> - for (k = 0; k < filterSize / 4; ++k){//fcoeff
> - for (j = 0; j < 16; ++j){//inner pixel
> - for (l = 0; l < 4; ++l){//coeff
> - int from = i * filterSize + j *
> filterSize + k * 4 + l;
> - int to = (i) * filterSize + j * 4
> + l + k * 64;
> - filter[to] = tmp2[from];
> - }
> - }
> - }
> - }
> - av_free(tmp2);
> - }
> - }
> - }
> + if ((c->srcBpc == 8) && (c->dstBpc <= 14)) {
> + int16_t *filterCopy = NULL;
> + if (filterSize > 4) {
> + if (!FF_ALLOC_TYPED_ARRAY(filterCopy, dstW * filterSize))
> + return AVERROR(ENOMEM);
> + memcpy(filterCopy, filter, dstW * filterSize *
> sizeof(int16_t));
> + }
> + // Do not swap filterPos for pixels which won't be processed by
> + // the main loop.
> + for (i = 0; i + 8 <= dstW; i += 8) {
> + FFSWAP(int, filterPos[i + 2], filterPos[i + 4]);
> + FFSWAP(int, filterPos[i + 3], filterPos[i + 5]);
> + }
> + if (filterSize > 4) {
> + // 16 pixels are processed at a time.
> + for (i = 0; i + 16 <= dstW; i += 16) {
> + // 4 filter coeffs are processed at a time.
> + for (k = 0; k + 4 <= filterSize; k += 4) {
> + for (j = 0; j < 16; ++j) {
> + int from = (i + j) * filterSize + k;
> + int to = i * filterSize + j * 4 + k * 16;
> + memcpy(&filter[to], &filterCopy[from], 4 *
> sizeof(int16_t));
> + }
> + }
> + }
> + }
> + if (filterCopy)
> + av_free(filterCopy);
> }
> }
> #endif
> + return 0;
> }
>
> int sws_isSupportedInput(enum AVPixelFormat pix_fmt)
> @@ -1836,7 +1844,8 @@ av_cold int sws_init_context(SwsContext *c,
> SwsFilter *srcFilter,
> get_local_pos(c, 0, 0, 0),
> get_local_pos(c, 0, 0, 0))) < 0)
> goto fail;
> - ff_shuffle_filter_coefficients(c, c->hLumFilterPos,
> c->hLumFilterSize, c->hLumFilter, dstW);
> + if ((ret = ff_shuffle_filter_coefficients(c,
> c->hLumFilterPos, c->hLumFilterSize, c->hLumFilter, dstW)) != 0)
> + goto nomem;
> if ((ret = initFilter(&c->hChrFilter, &c->hChrFilterPos,
> &c->hChrFilterSize, c->chrXInc,
> c->chrSrcW, c->chrDstW, filterAlign, 1 << 14,
> @@ -1846,7 +1855,8 @@ av_cold int sws_init_context(SwsContext *c,
> SwsFilter *srcFilter,
> get_local_pos(c, c->chrSrcHSubSample,
> c->src_h_chr_pos, 0),
> get_local_pos(c, c->chrDstHSubSample,
> c->dst_h_chr_pos, 0))) < 0)
> goto fail;
> - ff_shuffle_filter_coefficients(c, c->hChrFilterPos,
> c->hChrFilterSize, c->hChrFilter, c->chrDstW);
> + if ((ret = ff_shuffle_filter_coefficients(c,
> c->hChrFilterPos, c->hChrFilterSize, c->hChrFilter, c->chrDstW)) != 0)
> + goto nomem;
> }
> } // initialize horizontal stuff
>
> --
> 2.34.1.575.g55b058a8bb-goog
>
>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [FFmpeg-devel] [PATCH 1/4] libswscale: Re-factor ff_shuffle_filter_coefficients.
2022-01-10 14:58 [FFmpeg-devel] [PATCH 1/4] libswscale: Re-factor ff_shuffle_filter_coefficients Alan Kelly
` (3 preceding siblings ...)
2022-02-02 12:21 ` [FFmpeg-devel] [PATCH 1/4] libswscale: Re-factor ff_shuffle_filter_coefficients Alan Kelly
@ 2022-02-03 14:11 ` Michael Niedermayer
2022-02-09 9:17 ` Alan Kelly
4 siblings, 1 reply; 7+ messages in thread
From: Michael Niedermayer @ 2022-02-03 14:11 UTC (permalink / raw)
To: FFmpeg development discussions and patches
[-- Attachment #1.1: Type: text/plain, Size: 2784 bytes --]
On Mon, Jan 10, 2022 at 03:58:33PM +0100, Alan Kelly wrote:
> Make the code more readable, follow the style guide and propagate memory
> allocation errors.
Cosmetics and bugfixes should not be in the same patch
> ---
> libswscale/swscale_internal.h | 2 +-
> libswscale/utils.c | 68 ++++++++++++++++++++---------------
> 2 files changed, 40 insertions(+), 30 deletions(-)
>
> diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
> index 3a78d95ba6..26d28d42e6 100644
> --- a/libswscale/swscale_internal.h
> +++ b/libswscale/swscale_internal.h
> @@ -1144,5 +1144,5 @@ void ff_sws_slice_worker(void *priv, int jobnr, int threadnr,
> #define MAX_LINES_AHEAD 4
>
> //shuffle filter and filterPos for hyScale and hcScale filters in avx2
> -void ff_shuffle_filter_coefficients(SwsContext *c, int* filterPos, int filterSize, int16_t *filter, int dstW);
> +int ff_shuffle_filter_coefficients(SwsContext *c, int* filterPos, int filterSize, int16_t *filter, int dstW);
> #endif /* SWSCALE_SWSCALE_INTERNAL_H */
> diff --git a/libswscale/utils.c b/libswscale/utils.c
> index c5ea8853d5..52f07e1661 100644
> --- a/libswscale/utils.c
> +++ b/libswscale/utils.c
> @@ -278,39 +278,47 @@ static const FormatEntry format_entries[] = {
> [AV_PIX_FMT_P416LE] = { 1, 1 },
> };
>
> -void ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos, int filterSize, int16_t *filter, int dstW){
> +int ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos,
> + int filterSize, int16_t *filter,
> + int dstW)
> +{
> #if ARCH_X86_64
> - int i, j, k, l;
> + int i = 0, j = 0, k = 0;
why?
they are set when used if iam not mistaken
> int cpu_flags = av_get_cpu_flags();
> + if (!filter || dstW % 16 != 0) return 0;
please add \n also a comment what the dstW & 16 case exactly does and why
[...]
> int sws_isSupportedInput(enum AVPixelFormat pix_fmt)
> @@ -1836,7 +1844,8 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
> get_local_pos(c, 0, 0, 0),
> get_local_pos(c, 0, 0, 0))) < 0)
> goto fail;
> - ff_shuffle_filter_coefficients(c, c->hLumFilterPos, c->hLumFilterSize, c->hLumFilter, dstW);
> + if ((ret = ff_shuffle_filter_coefficients(c, c->hLumFilterPos, c->hLumFilterSize, c->hLumFilter, dstW)) != 0)
> + goto nomem;
This is confusing as ret is never used, also error codes are <0
thx
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
Those who are best at talking, realize last or never when they are wrong.
[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]
[-- Attachment #2: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [FFmpeg-devel] [PATCH 1/4] libswscale: Re-factor ff_shuffle_filter_coefficients.
2022-02-03 14:11 ` Michael Niedermayer
@ 2022-02-09 9:17 ` Alan Kelly
0 siblings, 0 replies; 7+ messages in thread
From: Alan Kelly @ 2022-02-09 9:17 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Hi Michael,
Thanks for your feedback. I have updated the patches and split this patch
into two, one with cosmetic fixes and one propagating the errors. Since
there is now an extra patch in the set and the commit messages have
changed, new threads have been started.
Alan
On Thu, Feb 3, 2022 at 3:11 PM Michael Niedermayer <michael@niedermayer.cc>
wrote:
> On Mon, Jan 10, 2022 at 03:58:33PM +0100, Alan Kelly wrote:
> > Make the code more readable, follow the style guide and propagate memory
> > allocation errors.
>
> Cosmetics and bugfixes should not be in the same patch
>
>
> > ---
> > libswscale/swscale_internal.h | 2 +-
> > libswscale/utils.c | 68 ++++++++++++++++++++---------------
> > 2 files changed, 40 insertions(+), 30 deletions(-)
> >
> > diff --git a/libswscale/swscale_internal.h
> b/libswscale/swscale_internal.h
> > index 3a78d95ba6..26d28d42e6 100644
> > --- a/libswscale/swscale_internal.h
> > +++ b/libswscale/swscale_internal.h
> > @@ -1144,5 +1144,5 @@ void ff_sws_slice_worker(void *priv, int jobnr,
> int threadnr,
> > #define MAX_LINES_AHEAD 4
> >
> > //shuffle filter and filterPos for hyScale and hcScale filters in avx2
> > -void ff_shuffle_filter_coefficients(SwsContext *c, int* filterPos, int
> filterSize, int16_t *filter, int dstW);
> > +int ff_shuffle_filter_coefficients(SwsContext *c, int* filterPos, int
> filterSize, int16_t *filter, int dstW);
> > #endif /* SWSCALE_SWSCALE_INTERNAL_H */
> > diff --git a/libswscale/utils.c b/libswscale/utils.c
> > index c5ea8853d5..52f07e1661 100644
> > --- a/libswscale/utils.c
> > +++ b/libswscale/utils.c
> > @@ -278,39 +278,47 @@ static const FormatEntry format_entries[] = {
> > [AV_PIX_FMT_P416LE] = { 1, 1 },
> > };
> >
> > -void ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos, int
> filterSize, int16_t *filter, int dstW){
> > +int ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos,
> > + int filterSize, int16_t *filter,
> > + int dstW)
> > +{
> > #if ARCH_X86_64
>
> > - int i, j, k, l;
> > + int i = 0, j = 0, k = 0;
>
> why?
> they are set when used if iam not mistaken
>
>
> > int cpu_flags = av_get_cpu_flags();
>
> > + if (!filter || dstW % 16 != 0) return 0;
>
> please add \n also a comment what the dstW & 16 case exactly does and why
>
>
> [...]
> > int sws_isSupportedInput(enum AVPixelFormat pix_fmt)
> > @@ -1836,7 +1844,8 @@ av_cold int sws_init_context(SwsContext *c,
> SwsFilter *srcFilter,
> > get_local_pos(c, 0, 0, 0),
> > get_local_pos(c, 0, 0, 0))) < 0)
> > goto fail;
> > - ff_shuffle_filter_coefficients(c, c->hLumFilterPos,
> c->hLumFilterSize, c->hLumFilter, dstW);
> > + if ((ret = ff_shuffle_filter_coefficients(c,
> c->hLumFilterPos, c->hLumFilterSize, c->hLumFilter, dstW)) != 0)
> > + goto nomem;
>
> This is confusing as ret is never used, also error codes are <0
>
> thx
>
> [...]
> --
> Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
>
> Those who are best at talking, realize last or never when they are wrong.
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2022-02-09 9:17 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-01-10 14:58 [FFmpeg-devel] [PATCH 1/4] libswscale: Re-factor ff_shuffle_filter_coefficients Alan Kelly
2022-01-10 14:58 ` [FFmpeg-devel] [PATCH 2/4] libswscale: Avx2 hscale can process any input of size which is a multiple of 4 Alan Kelly
2022-01-10 14:58 ` [FFmpeg-devel] [PATCH 3/4] libswscale: Enable hscale_avx2 for input sizes which ar emultiples " Alan Kelly
2022-01-10 14:58 ` [FFmpeg-devel] [PATCH 4/4] checkasm/sw_scale: hscale does not requires cpuflag test Alan Kelly
2022-02-02 12:21 ` [FFmpeg-devel] [PATCH 1/4] libswscale: Re-factor ff_shuffle_filter_coefficients Alan Kelly
2022-02-03 14:11 ` Michael Niedermayer
2022-02-09 9:17 ` Alan Kelly
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git