* [FFmpeg-devel] [PATCH] area changed: scdet filter
@ 2024-05-12 11:05 radu.taraibuta
2024-05-12 11:34 ` Paul B Mahol
0 siblings, 1 reply; 10+ messages in thread
From: radu.taraibuta @ 2024-05-12 11:05 UTC (permalink / raw)
To: ffmpeg-devel
Improve scene detection accuracy by comparing frame with both previous and
next frame (creates one frame delay).
Add new mode parameter and new method to compute the frame difference using
cubic square to increase the weight of small changes and new mean formula.
This improves accuracy significantly.
Slightly improve performance by not using frame clone.
Signed-off-by: raduct <radu.taraibuta@gmail.com>
---
doc/filters.texi | 13 +++
libavfilter/scene_sad.c | 167 +++++++++++++++++++++++++++++++++++-
libavfilter/scene_sad.h | 2 +
libavfilter/vf_scdet.c | 150 ++++++++++++++++++++------------
tests/fate/filter-video.mak | 3 +
5 files changed, 281 insertions(+), 54 deletions(-)
diff --git a/doc/filters.texi b/doc/filters.texi
index bfa8ccec8b..de83a5e322 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -21797,6 +21797,19 @@ Default value is @code{10.}.
@item sc_pass, s
Set the flag to pass scene change frames to the next filter. Default value
is @code{0}
You can enable it if you want to get snapshot of scene change frames only.
+
+@item mode
+Set the scene change detection method. Default value is @code{0}
+Available values are:
+
+@table @samp
+@item 0
+Regular sum of absolute linear differences.
+
+@item 1
+Sum of mean of cubic root differences.
+
+@end table
@end table
@anchor{selectivecolor}
diff --git a/libavfilter/scene_sad.c b/libavfilter/scene_sad.c
index caf911eb5d..5280e356cc 100644
--- a/libavfilter/scene_sad.c
+++ b/libavfilter/scene_sad.c
@@ -65,9 +65,174 @@ ff_scene_sad_fn ff_scene_sad_get_fn(int depth)
if (!sad) {
if (depth == 8)
sad = ff_scene_sad_c;
- if (depth == 16)
+ else if (depth == 16)
sad = ff_scene_sad16_c;
}
return sad;
}
+/*
+* Lookup table for 40.25*pow(i,1/3) - a.k.a cubic root extended to 0 - 255
interval
+* Increase the weight of small differences compared to linear
+*/
+static const uint8_t cbrtTable[256] = {
+0, 40, 51, 58, 64, 69, 73, 77, 81, 84, 87, 90, 92, 95, 97,
99,
+101, 103, 105, 107, 109, 111, 113, 114, 116, 118, 119, 121, 122, 124, 125,
126,
+128, 129, 130, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144,
145,
+146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 158, 159,
160,
+161, 162, 163, 163, 164, 165, 166, 167, 167, 168, 169, 170, 170, 171, 172,
173,
+173, 174, 175, 176, 176, 177, 178, 178, 179, 180, 180, 181, 182, 182, 183,
184,
+184, 185, 186, 186, 187, 187, 188, 189, 189, 190, 190, 191, 192, 192, 193,
193,
+194, 195, 195, 196, 196, 197, 197, 198, 199, 199, 200, 200, 201, 201, 202,
202,
+203, 203, 204, 204, 205, 205, 206, 206, 207, 207, 208, 208, 209, 209, 210,
210,
+211, 211, 212, 212, 213, 213, 214, 214, 215, 215, 216, 216, 217, 217, 218,
218,
+219, 219, 219, 220, 220, 221, 221, 222, 222, 223, 223, 223, 224, 224, 225,
225,
+226, 226, 226, 227, 227, 228, 228, 229, 229, 229, 230, 230, 231, 231, 231,
232,
+232, 233, 233, 233, 234, 234, 235, 235, 235, 236, 236, 237, 237, 237, 238,
238,
+238, 239, 239, 240, 240, 240, 241, 241, 242, 242, 242, 243, 243, 243, 244,
244,
+244, 245, 245, 246, 246, 246, 247, 247, 247, 248, 248, 248, 249, 249, 249,
250,
+250, 250, 251, 251, 252, 252, 252, 253, 253, 253, 254, 254, 254, 255, 255,
255 };
+
+/*
+* Lookup table for 101.52*pow(i,1/3) - a.k.a cubic root extended to 0 -
1023 interval
+* Increase the weight of small differences compared to linear
+*/
+static const uint16_t cbrtTable10[1024] = {
+ 0, 102, 128, 146, 161, 174, 184, 194, 203, 211, 219, 226, 232, 239, 245,
250, 256, 261, 266, 271, 276, 280, 284, 289, 293, 297, 301, 305, 308, 312,
315, 319,
+322, 326, 329, 332, 335, 338, 341, 344, 347, 350, 353, 356, 358, 361, 364,
366, 369, 371, 374, 376, 379, 381, 384, 386, 388, 391, 393, 395, 397, 400,
402, 404,
+406, 408, 410, 412, 414, 416, 418, 420, 422, 424, 426, 428, 430, 432, 434,
436, 437, 439, 441, 443, 445, 446, 448, 450, 452, 453, 455, 457, 458, 460,
462, 463,
+465, 466, 468, 470, 471, 473, 474, 476, 477, 479, 480, 482, 483, 485, 486,
488, 489, 491, 492, 494, 495, 497, 498, 499, 501, 502, 504, 505, 506, 508,
509, 510,
+512, 513, 514, 516, 517, 518, 520, 521, 522, 523, 525, 526, 527, 528, 530,
531, 532, 533, 535, 536, 537, 538, 539, 541, 542, 543, 544, 545, 547, 548,
549, 550,
+551, 552, 553, 555, 556, 557, 558, 559, 560, 561, 562, 563, 565, 566, 567,
568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 580, 581, 582, 583,
584, 585,
+586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600,
601, 602, 602, 603, 604, 605, 606, 607, 608, 609, 610, 611, 612, 613, 614,
615, 616,
+617, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, 626, 627, 628, 629,
630, 631, 632, 633, 634, 634, 635, 636, 637, 638, 639, 640, 640, 641, 642,
643, 644,
+645, 645, 646, 647, 648, 649, 650, 650, 651, 652, 653, 654, 655, 655, 656,
657, 658, 659, 659, 660, 661, 662, 663, 663, 664, 665, 666, 667, 667, 668,
669, 670,
+670, 671, 672, 673, 674, 674, 675, 676, 677, 677, 678, 679, 680, 680, 681,
682, 683, 683, 684, 685, 686, 686, 687, 688, 689, 689, 690, 691, 691, 692,
693, 694,
+694, 695, 696, 697, 697, 698, 699, 699, 700, 701, 702, 702, 703, 704, 704,
705, 706, 706, 707, 708, 709, 709, 710, 711, 711, 712, 713, 713, 714, 715,
715, 716,
+717, 717, 718, 719, 720, 720, 721, 722, 722, 723, 724, 724, 725, 726, 726,
727, 728, 728, 729, 729, 730, 731, 731, 732, 733, 733, 734, 735, 735, 736,
737, 737,
+738, 739, 739, 740, 740, 741, 742, 742, 743, 744, 744, 745, 746, 746, 747,
747, 748, 749, 749, 750, 750, 751, 752, 752, 753, 754, 754, 755, 755, 756,
757, 757,
+758, 758, 759, 760, 760, 761, 761, 762, 763, 763, 764, 764, 765, 766, 766,
767, 767, 768, 769, 769, 770, 770, 771, 772, 772, 773, 773, 774, 774, 775,
776, 776,
+777, 777, 778, 779, 779, 780, 780, 781, 781, 782, 783, 783, 784, 784, 785,
785, 786, 787, 787, 788, 788, 789, 789, 790, 790, 791, 792, 792, 793, 793,
794, 794,
+795, 795, 796, 797, 797, 798, 798, 799, 799, 800, 800, 801, 801, 802, 803,
803, 804, 804, 805, 805, 806, 806, 807, 807, 808, 808, 809, 810, 810, 811,
811, 812,
+812, 813, 813, 814, 814, 815, 815, 816, 816, 817, 817, 818, 818, 819, 819,
820, 821, 821, 822, 822, 823, 823, 824, 824, 825, 825, 826, 826, 827, 827,
828, 828,
+829, 829, 830, 830, 831, 831, 832, 832, 833, 833, 834, 834, 835, 835, 836,
836, 837, 837, 838, 838, 839, 839, 840, 840, 841, 841, 842, 842, 843, 843,
844, 844,
+845, 845, 846, 846, 847, 847, 848, 848, 849, 849, 850, 850, 851, 851, 851,
852, 852, 853, 853, 854, 854, 855, 855, 856, 856, 857, 857, 858, 858, 859,
859, 860,
+860, 861, 861, 861, 862, 862, 863, 863, 864, 864, 865, 865, 866, 866, 867,
867, 868, 868, 868, 869, 869, 870, 870, 871, 871, 872, 872, 873, 873, 874,
874, 874,
+875, 875, 876, 876, 877, 877, 878, 878, 879, 879, 879, 880, 880, 881, 881,
882, 882, 883, 883, 883, 884, 884, 885, 885, 886, 886, 887, 887, 887, 888,
888, 889,
+889, 890, 890, 891, 891, 891, 892, 892, 893, 893, 894, 894, 894, 895, 895,
896, 896, 897, 897, 898, 898, 898, 899, 899, 900, 900, 901, 901, 901, 902,
902, 903,
+903, 904, 904, 904, 905, 905, 906, 906, 907, 907, 907, 908, 908, 909, 909,
909, 910, 910, 911, 911, 912, 912, 912, 913, 913, 914, 914, 915, 915, 915,
916, 916,
+917, 917, 917, 918, 918, 919, 919, 919, 920, 920, 921, 921, 922, 922, 922,
923, 923, 924, 924, 924, 925, 925, 926, 926, 926, 927, 927, 928, 928, 928,
929, 929,
+930, 930, 930, 931, 931, 932, 932, 933, 933, 933, 934, 934, 935, 935, 935,
936, 936, 937, 937, 937, 938, 938, 938, 939, 939, 940, 940, 940, 941, 941,
942, 942,
+942, 943, 943, 944, 944, 944, 945, 945, 946, 946, 946, 947, 947, 948, 948,
948, 949, 949, 949, 950, 950, 951, 951, 951, 952, 952, 953, 953, 953, 954,
954, 954,
+955, 955, 956, 956, 956, 957, 957, 958, 958, 958, 959, 959, 959, 960, 960,
961, 961, 961, 962, 962, 962, 963, 963, 964, 964, 964, 965, 965, 965, 966,
966, 967,
+967, 967, 968, 968, 968, 969, 969, 970, 970, 970, 971, 971, 971, 972, 972,
972, 973, 973, 974, 974, 974, 975, 975, 975, 976, 976, 977, 977, 977, 978,
978, 978,
+979, 979, 979, 980, 980, 981, 981, 981, 982, 982, 982, 983, 983, 983, 984,
984, 985, 985, 985, 986, 986, 986, 987, 987, 987, 988, 988, 988, 989, 989,
990, 990,
+990, 991, 991, 991, 992, 992, 992, 993, 993, 993, 994, 994, 994, 995, 995,
996, 996, 996, 997, 997, 997, 998, 998, 998, 999, 999, 999, 1000, 1000,
1000, 1001, 1001,
+1001, 1002, 1002, 1003, 1003, 1003, 1004, 1004, 1004, 1005, 1005, 1005,
1006, 1006, 1006, 1007, 1007, 1007, 1008, 1008, 1008, 1009, 1009, 1009,
1010, 1010, 1010, 1011, 1011, 1011, 1012, 1012,
+1012, 1013, 1013, 1014, 1014, 1014, 1015, 1015, 1015, 1016, 1016, 1016,
1017, 1017, 1017, 1018, 1018, 1018, 1019, 1019, 1019, 1020, 1020, 1020,
1021, 1021, 1021, 1022, 1022, 1022, 1023, 1023 };
+
+void ff_scene_scrd_c(SCENE_SAD_PARAMS)
+{
+ uint64_t scrdPlus = 0;
+ uint64_t scrdMinus = 0;
+ int x, y;
+
+ for (y = 0; y < height; y++) {
+ for (x = 0; x < width; x++)
+ if (src1[x] > src2[x])
+ scrdMinus += cbrtTable[src1[x] - src2[x]];
+ else
+ scrdPlus += cbrtTable[src2[x] - src1[x]];
+ src1 += stride1;
+ src2 += stride2;
+ }
+
+ double mean = (sqrt(scrdPlus) + sqrt(scrdMinus)) / 2.0;
+ *sum = 2.0 * mean * mean;
+}
+
+void ff_scene_scrd2B_c(SCENE_SAD_PARAMS, int bitdepth)
+{
+ uint64_t scrdPlus = 0;
+ uint64_t scrdMinus = 0;
+ const uint16_t* src1w = (const uint16_t*)src1;
+ const uint16_t* src2w = (const uint16_t*)src2;
+ int x, y;
+ int shift = FFABS(bitdepth - 10);
+
+ stride1 /= 2;
+ stride2 /= 2;
+
+ if (bitdepth > 10) {
+ for (y = 0; y < height; y++) {
+ for (x = 0; x < width; x++)
+ if (src1w[x] > src2w[x])
+ scrdMinus += cbrtTable10[(src1w[x] - src2w[x]) >>
shift];
+ else
+ scrdPlus += cbrtTable10[(src2w[x] - src1w[x]) >>
shift];
+ src1w += stride1;
+ src2w += stride2;
+ }
+ scrdMinus <<= shift;
+ scrdPlus <<= shift;
+ }
+ else {
+ for (y = 0; y < height; y++) {
+ for (x = 0; x < width; x++)
+ if (src1w[x] > src2w[x])
+ scrdMinus += cbrtTable10[(src1w[x] - src2w[x]) <<
shift];
+ else
+ scrdPlus += cbrtTable10[(src2w[x] - src1w[x]) <<
shift];
+ src1w += stride1;
+ src2w += stride2;
+ }
+ scrdMinus >>= shift;
+ scrdPlus >>= shift;
+ }
+
+ double mean = (sqrt(scrdPlus) + sqrt(scrdMinus)) / 2.0;
+ *sum = 2.0 * mean * mean;
+}
+
+void ff_scene_scrd9_c(SCENE_SAD_PARAMS)
+{
+ ff_scene_scrd2B_c(src1, stride1, src2, stride2, width, height, sum, 9);
+}
+
+void ff_scene_scrd10_c(SCENE_SAD_PARAMS)
+{
+ ff_scene_scrd2B_c(src1, stride1, src2, stride2, width, height, sum,
10);
+}
+
+void ff_scene_scrd12_c(SCENE_SAD_PARAMS)
+{
+ ff_scene_scrd2B_c(src1, stride1, src2, stride2, width, height, sum,
12);
+}
+
+void ff_scene_scrd14_c(SCENE_SAD_PARAMS)
+{
+ ff_scene_scrd2B_c(src1, stride1, src2, stride2, width, height, sum,
14);
+}
+
+void ff_scene_scrd16_c(SCENE_SAD_PARAMS)
+{
+ ff_scene_scrd2B_c(src1, stride1, src2, stride2, width, height, sum,
16);
+}
+
+ff_scene_sad_fn ff_scene_scrd_get_fn(int depth)
+{
+ ff_scene_sad_fn scrd = NULL;
+ if (depth == 8)
+ scrd = ff_scene_scrd_c;
+ else if (depth == 9)
+ scrd = ff_scene_scrd9_c;
+ else if (depth == 10)
+ scrd = ff_scene_scrd10_c;
+ else if (depth == 12)
+ scrd = ff_scene_scrd12_c;
+ else if (depth == 14)
+ scrd = ff_scene_scrd14_c;
+ else if (depth == 16)
+ scrd = ff_scene_scrd16_c;
+ return scrd;
+}
diff --git a/libavfilter/scene_sad.h b/libavfilter/scene_sad.h
index 173a051f2b..af9b06201c 100644
--- a/libavfilter/scene_sad.h
+++ b/libavfilter/scene_sad.h
@@ -41,4 +41,6 @@ ff_scene_sad_fn ff_scene_sad_get_fn_x86(int depth);
ff_scene_sad_fn ff_scene_sad_get_fn(int depth);
+ff_scene_sad_fn ff_scene_scrd_get_fn(int depth);
+
#endif /* AVFILTER_SCENE_SAD_H */
diff --git a/libavfilter/vf_scdet.c b/libavfilter/vf_scdet.c
index 15399cfebf..6162e4615b 100644
--- a/libavfilter/vf_scdet.c
+++ b/libavfilter/vf_scdet.c
@@ -31,6 +31,17 @@
#include "scene_sad.h"
#include "video.h"
+enum SCDETMode {
+ MODE_DIFF = 0,
+ MODE_MEAN_CBRT = 1
+};
+
+typedef struct SCDETFrameInfo {
+ AVFrame* picref;
+ double mafd;
+ double diff;
+} SCDETFrameInfo;
+
typedef struct SCDetContext {
const AVClass *class;
@@ -39,11 +50,12 @@ typedef struct SCDetContext {
int nb_planes;
int bitdepth;
ff_scene_sad_fn sad;
- double prev_mafd;
- double scene_score;
- AVFrame *prev_picref;
+ SCDETFrameInfo curr_frame;
+ SCDETFrameInfo prev_frame;
+
double threshold;
int sc_pass;
+ enum SCDETMode mode;
} SCDetContext;
#define OFFSET(x) offsetof(SCDetContext, x)
@@ -55,6 +67,7 @@ static const AVOption scdet_options[] = {
{ "t", "set scene change detect threshold",
OFFSET(threshold), AV_OPT_TYPE_DOUBLE, {.dbl = 10.}, 0, 100., V|F },
{ "sc_pass", "Set the flag to pass scene change frames",
OFFSET(sc_pass), AV_OPT_TYPE_BOOL, {.dbl = 0 }, 0, 1, V|F },
{ "s", "Set the flag to pass scene change frames",
OFFSET(sc_pass), AV_OPT_TYPE_BOOL, {.dbl = 0 }, 0, 1, V|F },
+ { "mode", "scene change detection method",
OFFSET(mode), AV_OPT_TYPE_INT, {.i64 = MODE_DIFF}, MODE_DIFF,
MODE_MEAN_CBRT, V|F },
{NULL}
};
@@ -85,13 +98,16 @@ static int config_input(AVFilterLink *inlink)
s->bitdepth = desc->comp[0].depth;
s->nb_planes = is_yuv ? 1 : av_pix_fmt_count_planes(inlink->format);
- for (int plane = 0; plane < 4; plane++) {
+ for (int plane = 0; plane < s->nb_planes; plane++) {
ptrdiff_t line_size = av_image_get_linesize(inlink->format,
inlink->w, plane);
s->width[plane] = line_size >> (s->bitdepth > 8);
- s->height[plane] = inlink->h >> ((plane == 1 || plane == 2) ?
desc->log2_chroma_h : 0);
+ s->height[plane] = plane == 1 || plane == 2 ?
AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h) : inlink->h;
}
- s->sad = ff_scene_sad_get_fn(s->bitdepth == 8 ? 8 : 16);
+ if (s->mode == MODE_DIFF)
+ s->sad = ff_scene_sad_get_fn(s->bitdepth == 8 ? 8 : 16);
+ else if (s->mode == MODE_MEAN_CBRT)
+ s->sad = ff_scene_scrd_get_fn(s->bitdepth);
if (!s->sad)
return AVERROR(EINVAL);
@@ -101,46 +117,86 @@ static int config_input(AVFilterLink *inlink)
static av_cold void uninit(AVFilterContext *ctx)
{
SCDetContext *s = ctx->priv;
-
- av_frame_free(&s->prev_picref);
}
-static double get_scene_score(AVFilterContext *ctx, AVFrame *frame)
+static void compute_diff(AVFilterContext *ctx)
{
- double ret = 0;
SCDetContext *s = ctx->priv;
- AVFrame *prev_picref = s->prev_picref;
+ AVFrame *prev_picref = s->prev_frame.picref;
+ AVFrame *curr_picref = s->curr_frame.picref;
- if (prev_picref && frame->height == prev_picref->height
- && frame->width == prev_picref->width) {
- uint64_t sad = 0;
- double mafd, diff;
- uint64_t count = 0;
+ if (prev_picref && curr_picref
+ && curr_picref->height == prev_picref->height
+ && curr_picref->width == prev_picref->width) {
+ uint64_t sum = 0;
+ uint64_t count = 0;
for (int plane = 0; plane < s->nb_planes; plane++) {
- uint64_t plane_sad;
+ uint64_t plane_sum;
s->sad(prev_picref->data[plane], prev_picref->linesize[plane],
- frame->data[plane], frame->linesize[plane],
- s->width[plane], s->height[plane], &plane_sad);
- sad += plane_sad;
+ curr_picref->data[plane], curr_picref->linesize[plane],
+ s->width[plane], s->height[plane], &plane_sum);
+ sum += plane_sum;
count += s->width[plane] * s->height[plane];
}
- mafd = (double)sad * 100. / count / (1ULL << s->bitdepth);
- diff = fabs(mafd - s->prev_mafd);
- ret = av_clipf(FFMIN(mafd, diff), 0, 100.);
- s->prev_mafd = mafd;
- av_frame_free(&prev_picref);
+ s->curr_frame.mafd = (double)sum * 100. / count / (1ULL <<
s->bitdepth);
+ s->curr_frame.diff = s->curr_frame.mafd - s->prev_frame.mafd;
+ } else {
+ s->curr_frame.mafd = 0;
+ s->curr_frame.diff = 0;
}
- s->prev_picref = av_frame_clone(frame);
- return ret;
}
-static int set_meta(SCDetContext *s, AVFrame *frame, const char *key, const
char *value)
+static int set_meta(AVFrame *frame, const char *key, const char *value)
{
return av_dict_set(&frame->metadata, key, value, 0);
}
+static int filter_frame(AVFilterContext* ctx, AVFrame* frame)
+{
+ AVFilterLink* inlink = ctx->inputs[0];
+ AVFilterLink* outlink = ctx->outputs[0];
+ SCDetContext* s = ctx->priv;
+
+ s->prev_frame = s->curr_frame;
+ s->curr_frame.picref = frame;
+
+ if (s->prev_frame.picref) {
+ compute_diff(ctx);
+
+ if (s->prev_frame.diff < -s->curr_frame.diff) {
+ s->prev_frame.diff = -s->curr_frame.diff;
+ s->prev_frame.mafd = s->curr_frame.mafd;
+ }
+ double scene_score = av_clipf(FFMAX(s->prev_frame.diff, 0), 0,
100.);
+
+ char buf[64];
+ snprintf(buf, sizeof(buf), "%0.3f", s->prev_frame.mafd);
+ set_meta(s->prev_frame.picref, "lavfi.scd.mafd", buf);
+ snprintf(buf, sizeof(buf), "%0.3f", scene_score);
+ set_meta(s->prev_frame.picref, "lavfi.scd.score", buf);
+
+ if (scene_score >= s->threshold) {
+ av_log(s, AV_LOG_INFO, "lavfi.scd.score: %.3f, lavfi.scd.time:
%s\n",
+ scene_score, av_ts2timestr(s->prev_frame.picref->pts,
&inlink->time_base));
+ set_meta(s->prev_frame.picref, "lavfi.scd.time",
+ av_ts2timestr(s->prev_frame.picref->pts,
&inlink->time_base));
+ }
+
+ if (s->sc_pass) {
+ if (scene_score >= s->threshold)
+ return ff_filter_frame(outlink, s->prev_frame.picref);
+ else
+ av_frame_free(&s->prev_frame.picref);
+ }
+ else
+ return ff_filter_frame(outlink, s->prev_frame.picref);
+ }
+
+ return 0;
+}
+
static int activate(AVFilterContext *ctx)
{
int ret;
@@ -148,6 +204,8 @@ static int activate(AVFilterContext *ctx)
AVFilterLink *outlink = ctx->outputs[0];
SCDetContext *s = ctx->priv;
AVFrame *frame;
+ int64_t pts;
+ int status;
FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
@@ -155,31 +213,17 @@ static int activate(AVFilterContext *ctx)
if (ret < 0)
return ret;
- if (frame) {
- char buf[64];
- s->scene_score = get_scene_score(ctx, frame);
- snprintf(buf, sizeof(buf), "%0.3f", s->prev_mafd);
- set_meta(s, frame, "lavfi.scd.mafd", buf);
- snprintf(buf, sizeof(buf), "%0.3f", s->scene_score);
- set_meta(s, frame, "lavfi.scd.score", buf);
+ if (ret > 0)
+ return filter_frame(ctx, frame);
- if (s->scene_score >= s->threshold) {
- av_log(s, AV_LOG_INFO, "lavfi.scd.score: %.3f, lavfi.scd.time:
%s\n",
- s->scene_score, av_ts2timestr(frame->pts,
&inlink->time_base));
- set_meta(s, frame, "lavfi.scd.time",
- av_ts2timestr(frame->pts, &inlink->time_base));
- }
- if (s->sc_pass) {
- if (s->scene_score >= s->threshold)
- return ff_filter_frame(outlink, frame);
- else {
- av_frame_free(&frame);
- }
- } else
- return ff_filter_frame(outlink, frame);
+ if (ff_inlink_acknowledge_status(inlink, &status, &pts)) {
+ if (status == AVERROR_EOF)
+ ret = filter_frame(ctx, NULL);
+
+ ff_outlink_set_status(outlink, status, pts);
+ return ret;
}
- FF_FILTER_FORWARD_STATUS(inlink, outlink);
FF_FILTER_FORWARD_WANTED(outlink, inlink);
return FFERROR_NOT_READY;
@@ -190,12 +234,12 @@ static const AVFilterPad scdet_inputs[] = {
.name = "default",
.type = AVMEDIA_TYPE_VIDEO,
.config_props = config_input,
- },
+ }
};
const AVFilter ff_vf_scdet = {
.name = "scdet",
- .description = NULL_IF_CONFIG_SMALL("Detect video scene change"),
+ .description = NULL_IF_CONFIG_SMALL("Detect video scene change."),
.priv_size = sizeof(SCDetContext),
.priv_class = &scdet_class,
.uninit = uninit,
@@ -203,5 +247,5 @@ const AVFilter ff_vf_scdet = {
FILTER_INPUTS(scdet_inputs),
FILTER_OUTPUTS(ff_video_default_filterpad),
FILTER_PIXFMTS_ARRAY(pix_fmts),
- .activate = activate,
+ .activate = activate
};
diff --git a/tests/fate/filter-video.mak b/tests/fate/filter-video.mak
index ee9f0f5e40..cff48e33d9 100644
--- a/tests/fate/filter-video.mak
+++ b/tests/fate/filter-video.mak
@@ -672,6 +672,9 @@ SCDET_DEPS = LAVFI_INDEV FILE_PROTOCOL MOVIE_FILTER
SCDET_FILTER SCALE_FILTER \
FATE_METADATA_FILTER-$(call ALLYES, $(SCDET_DEPS)) +=
fate-filter-metadata-scdet
fate-filter-metadata-scdet: SRC =
$(TARGET_SAMPLES)/svq3/Vertical400kbit.sorenson3.mov
fate-filter-metadata-scdet: CMD = run $(FILTER_METADATA_COMMAND)
"sws_flags=+accurate_rnd+bitexact;movie='$(SRC)',scdet=s=1"
+FATE_METADATA_FILTER-$(call ALLYES, $(SCDET_DEPS)) +=
fate-filter-metadata-scdet1
+fate-filter-metadata-scdet1: SRC =
$(TARGET_SAMPLES)/svq3/Vertical400kbit.sorenson3.mov
+fate-filter-metadata-scdet1: CMD = run $(FILTER_METADATA_COMMAND)
"sws_flags=+accurate_rnd+bitexact;movie='$(SRC)',scdet=s=1:t=6.5:mode=1"
CROPDETECT_DEPS = LAVFI_INDEV FILE_PROTOCOL MOVIE_FILTER MOVIE_FILTER
MESTIMATE_FILTER CROPDETECT_FILTER \
SCALE_FILTER MOV_DEMUXER H264_DECODER
--
2.43.0.windows.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [FFmpeg-devel] [PATCH] area changed: scdet filter
2024-05-12 11:05 [FFmpeg-devel] [PATCH] area changed: scdet filter radu.taraibuta
@ 2024-05-12 11:34 ` Paul B Mahol
0 siblings, 0 replies; 10+ messages in thread
From: Paul B Mahol @ 2024-05-12 11:34 UTC (permalink / raw)
To: FFmpeg development discussions and patches
On Sun, May 12, 2024 at 1:05 PM <radu.taraibuta@gmail.com> wrote:
> Improve scene detection accuracy by comparing frame with both previous and
> next frame (creates one frame delay).
> Add new mode parameter and new method to compute the frame difference using
> cubic square to increase the weight of small changes and new mean formula.
> This improves accuracy significantly.
> Slightly improve performance by not using frame clone.
>
>
Inconsistent code style with other filters. (Mostly using AVFilterLink*
link instead of AVFilterLink *link).
Unrelated changes, please split trivial unrelated changes into separate
patches.
Can't tables be generated at .init/.config_props time? No point in storing
them into binary.
Adding extra delay is not backward compatible change, it should be
implemented properly by adding option for users
to select mode: next & prev frame or just next or prev frame.
Could split frame clone change into earlier separate patch.
Where are results of improvements with accuracy so it can be confirmed?
> Signed-off-by: raduct <radu.taraibuta@gmail.com>
> ---
> doc/filters.texi | 13 +++
> libavfilter/scene_sad.c | 167 +++++++++++++++++++++++++++++++++++-
> libavfilter/scene_sad.h | 2 +
> libavfilter/vf_scdet.c | 150 ++++++++++++++++++++------------
> tests/fate/filter-video.mak | 3 +
> 5 files changed, 281 insertions(+), 54 deletions(-)
>
> diff --git a/doc/filters.texi b/doc/filters.texi
> index bfa8ccec8b..de83a5e322 100644
> --- a/doc/filters.texi
> +++ b/doc/filters.texi
> @@ -21797,6 +21797,19 @@ Default value is @code{10.}.
> @item sc_pass, s
> Set the flag to pass scene change frames to the next filter. Default value
> is @code{0}
> You can enable it if you want to get snapshot of scene change frames only.
> +
> +@item mode
> +Set the scene change detection method. Default value is @code{0}
> +Available values are:
> +
> +@table @samp
> +@item 0
> +Regular sum of absolute linear differences.
> +
> +@item 1
> +Sum of mean of cubic root differences.
> +
> +@end table
> @end table
>
> @anchor{selectivecolor}
> diff --git a/libavfilter/scene_sad.c b/libavfilter/scene_sad.c
> index caf911eb5d..5280e356cc 100644
> --- a/libavfilter/scene_sad.c
> +++ b/libavfilter/scene_sad.c
> @@ -65,9 +65,174 @@ ff_scene_sad_fn ff_scene_sad_get_fn(int depth)
> if (!sad) {
> if (depth == 8)
> sad = ff_scene_sad_c;
> - if (depth == 16)
> + else if (depth == 16)
> sad = ff_scene_sad16_c;
> }
> return sad;
> }
>
> +/*
> +* Lookup table for 40.25*pow(i,1/3) - a.k.a cubic root extended to 0 - 255
> interval
> +* Increase the weight of small differences compared to linear
> +*/
> +static const uint8_t cbrtTable[256] = {
> +0, 40, 51, 58, 64, 69, 73, 77, 81, 84, 87, 90, 92, 95, 97,
> 99,
> +101, 103, 105, 107, 109, 111, 113, 114, 116, 118, 119, 121, 122, 124, 125,
> 126,
> +128, 129, 130, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144,
> 145,
> +146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 158, 159,
> 160,
> +161, 162, 163, 163, 164, 165, 166, 167, 167, 168, 169, 170, 170, 171, 172,
> 173,
> +173, 174, 175, 176, 176, 177, 178, 178, 179, 180, 180, 181, 182, 182, 183,
> 184,
> +184, 185, 186, 186, 187, 187, 188, 189, 189, 190, 190, 191, 192, 192, 193,
> 193,
> +194, 195, 195, 196, 196, 197, 197, 198, 199, 199, 200, 200, 201, 201, 202,
> 202,
> +203, 203, 204, 204, 205, 205, 206, 206, 207, 207, 208, 208, 209, 209, 210,
> 210,
> +211, 211, 212, 212, 213, 213, 214, 214, 215, 215, 216, 216, 217, 217, 218,
> 218,
> +219, 219, 219, 220, 220, 221, 221, 222, 222, 223, 223, 223, 224, 224, 225,
> 225,
> +226, 226, 226, 227, 227, 228, 228, 229, 229, 229, 230, 230, 231, 231, 231,
> 232,
> +232, 233, 233, 233, 234, 234, 235, 235, 235, 236, 236, 237, 237, 237, 238,
> 238,
> +238, 239, 239, 240, 240, 240, 241, 241, 242, 242, 242, 243, 243, 243, 244,
> 244,
> +244, 245, 245, 246, 246, 246, 247, 247, 247, 248, 248, 248, 249, 249, 249,
> 250,
> +250, 250, 251, 251, 252, 252, 252, 253, 253, 253, 254, 254, 254, 255, 255,
> 255 };
> +
> +/*
> +* Lookup table for 101.52*pow(i,1/3) - a.k.a cubic root extended to 0 -
> 1023 interval
> +* Increase the weight of small differences compared to linear
> +*/
> +static const uint16_t cbrtTable10[1024] = {
> + 0, 102, 128, 146, 161, 174, 184, 194, 203, 211, 219, 226, 232, 239, 245,
> 250, 256, 261, 266, 271, 276, 280, 284, 289, 293, 297, 301, 305, 308, 312,
> 315, 319,
> +322, 326, 329, 332, 335, 338, 341, 344, 347, 350, 353, 356, 358, 361, 364,
> 366, 369, 371, 374, 376, 379, 381, 384, 386, 388, 391, 393, 395, 397, 400,
> 402, 404,
> +406, 408, 410, 412, 414, 416, 418, 420, 422, 424, 426, 428, 430, 432, 434,
> 436, 437, 439, 441, 443, 445, 446, 448, 450, 452, 453, 455, 457, 458, 460,
> 462, 463,
> +465, 466, 468, 470, 471, 473, 474, 476, 477, 479, 480, 482, 483, 485, 486,
> 488, 489, 491, 492, 494, 495, 497, 498, 499, 501, 502, 504, 505, 506, 508,
> 509, 510,
> +512, 513, 514, 516, 517, 518, 520, 521, 522, 523, 525, 526, 527, 528, 530,
> 531, 532, 533, 535, 536, 537, 538, 539, 541, 542, 543, 544, 545, 547, 548,
> 549, 550,
> +551, 552, 553, 555, 556, 557, 558, 559, 560, 561, 562, 563, 565, 566, 567,
> 568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 580, 581, 582, 583,
> 584, 585,
> +586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600,
> 601, 602, 602, 603, 604, 605, 606, 607, 608, 609, 610, 611, 612, 613, 614,
> 615, 616,
> +617, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, 626, 627, 628, 629,
> 630, 631, 632, 633, 634, 634, 635, 636, 637, 638, 639, 640, 640, 641, 642,
> 643, 644,
> +645, 645, 646, 647, 648, 649, 650, 650, 651, 652, 653, 654, 655, 655, 656,
> 657, 658, 659, 659, 660, 661, 662, 663, 663, 664, 665, 666, 667, 667, 668,
> 669, 670,
> +670, 671, 672, 673, 674, 674, 675, 676, 677, 677, 678, 679, 680, 680, 681,
> 682, 683, 683, 684, 685, 686, 686, 687, 688, 689, 689, 690, 691, 691, 692,
> 693, 694,
> +694, 695, 696, 697, 697, 698, 699, 699, 700, 701, 702, 702, 703, 704, 704,
> 705, 706, 706, 707, 708, 709, 709, 710, 711, 711, 712, 713, 713, 714, 715,
> 715, 716,
> +717, 717, 718, 719, 720, 720, 721, 722, 722, 723, 724, 724, 725, 726, 726,
> 727, 728, 728, 729, 729, 730, 731, 731, 732, 733, 733, 734, 735, 735, 736,
> 737, 737,
> +738, 739, 739, 740, 740, 741, 742, 742, 743, 744, 744, 745, 746, 746, 747,
> 747, 748, 749, 749, 750, 750, 751, 752, 752, 753, 754, 754, 755, 755, 756,
> 757, 757,
> +758, 758, 759, 760, 760, 761, 761, 762, 763, 763, 764, 764, 765, 766, 766,
> 767, 767, 768, 769, 769, 770, 770, 771, 772, 772, 773, 773, 774, 774, 775,
> 776, 776,
> +777, 777, 778, 779, 779, 780, 780, 781, 781, 782, 783, 783, 784, 784, 785,
> 785, 786, 787, 787, 788, 788, 789, 789, 790, 790, 791, 792, 792, 793, 793,
> 794, 794,
> +795, 795, 796, 797, 797, 798, 798, 799, 799, 800, 800, 801, 801, 802, 803,
> 803, 804, 804, 805, 805, 806, 806, 807, 807, 808, 808, 809, 810, 810, 811,
> 811, 812,
> +812, 813, 813, 814, 814, 815, 815, 816, 816, 817, 817, 818, 818, 819, 819,
> 820, 821, 821, 822, 822, 823, 823, 824, 824, 825, 825, 826, 826, 827, 827,
> 828, 828,
> +829, 829, 830, 830, 831, 831, 832, 832, 833, 833, 834, 834, 835, 835, 836,
> 836, 837, 837, 838, 838, 839, 839, 840, 840, 841, 841, 842, 842, 843, 843,
> 844, 844,
> +845, 845, 846, 846, 847, 847, 848, 848, 849, 849, 850, 850, 851, 851, 851,
> 852, 852, 853, 853, 854, 854, 855, 855, 856, 856, 857, 857, 858, 858, 859,
> 859, 860,
> +860, 861, 861, 861, 862, 862, 863, 863, 864, 864, 865, 865, 866, 866, 867,
> 867, 868, 868, 868, 869, 869, 870, 870, 871, 871, 872, 872, 873, 873, 874,
> 874, 874,
> +875, 875, 876, 876, 877, 877, 878, 878, 879, 879, 879, 880, 880, 881, 881,
> 882, 882, 883, 883, 883, 884, 884, 885, 885, 886, 886, 887, 887, 887, 888,
> 888, 889,
> +889, 890, 890, 891, 891, 891, 892, 892, 893, 893, 894, 894, 894, 895, 895,
> 896, 896, 897, 897, 898, 898, 898, 899, 899, 900, 900, 901, 901, 901, 902,
> 902, 903,
> +903, 904, 904, 904, 905, 905, 906, 906, 907, 907, 907, 908, 908, 909, 909,
> 909, 910, 910, 911, 911, 912, 912, 912, 913, 913, 914, 914, 915, 915, 915,
> 916, 916,
> +917, 917, 917, 918, 918, 919, 919, 919, 920, 920, 921, 921, 922, 922, 922,
> 923, 923, 924, 924, 924, 925, 925, 926, 926, 926, 927, 927, 928, 928, 928,
> 929, 929,
> +930, 930, 930, 931, 931, 932, 932, 933, 933, 933, 934, 934, 935, 935, 935,
> 936, 936, 937, 937, 937, 938, 938, 938, 939, 939, 940, 940, 940, 941, 941,
> 942, 942,
> +942, 943, 943, 944, 944, 944, 945, 945, 946, 946, 946, 947, 947, 948, 948,
> 948, 949, 949, 949, 950, 950, 951, 951, 951, 952, 952, 953, 953, 953, 954,
> 954, 954,
> +955, 955, 956, 956, 956, 957, 957, 958, 958, 958, 959, 959, 959, 960, 960,
> 961, 961, 961, 962, 962, 962, 963, 963, 964, 964, 964, 965, 965, 965, 966,
> 966, 967,
> +967, 967, 968, 968, 968, 969, 969, 970, 970, 970, 971, 971, 971, 972, 972,
> 972, 973, 973, 974, 974, 974, 975, 975, 975, 976, 976, 977, 977, 977, 978,
> 978, 978,
> +979, 979, 979, 980, 980, 981, 981, 981, 982, 982, 982, 983, 983, 983, 984,
> 984, 985, 985, 985, 986, 986, 986, 987, 987, 987, 988, 988, 988, 989, 989,
> 990, 990,
> +990, 991, 991, 991, 992, 992, 992, 993, 993, 993, 994, 994, 994, 995, 995,
> 996, 996, 996, 997, 997, 997, 998, 998, 998, 999, 999, 999, 1000, 1000,
> 1000, 1001, 1001,
> +1001, 1002, 1002, 1003, 1003, 1003, 1004, 1004, 1004, 1005, 1005, 1005,
> 1006, 1006, 1006, 1007, 1007, 1007, 1008, 1008, 1008, 1009, 1009, 1009,
> 1010, 1010, 1010, 1011, 1011, 1011, 1012, 1012,
> +1012, 1013, 1013, 1014, 1014, 1014, 1015, 1015, 1015, 1016, 1016, 1016,
> 1017, 1017, 1017, 1018, 1018, 1018, 1019, 1019, 1019, 1020, 1020, 1020,
> 1021, 1021, 1021, 1022, 1022, 1022, 1023, 1023 };
> +
> +void ff_scene_scrd_c(SCENE_SAD_PARAMS)
> +{
> + uint64_t scrdPlus = 0;
> + uint64_t scrdMinus = 0;
> + int x, y;
> +
> + for (y = 0; y < height; y++) {
> + for (x = 0; x < width; x++)
> + if (src1[x] > src2[x])
> + scrdMinus += cbrtTable[src1[x] - src2[x]];
> + else
> + scrdPlus += cbrtTable[src2[x] - src1[x]];
> + src1 += stride1;
> + src2 += stride2;
> + }
> +
> + double mean = (sqrt(scrdPlus) + sqrt(scrdMinus)) / 2.0;
> + *sum = 2.0 * mean * mean;
> +}
> +
> +void ff_scene_scrd2B_c(SCENE_SAD_PARAMS, int bitdepth)
> +{
> + uint64_t scrdPlus = 0;
> + uint64_t scrdMinus = 0;
> + const uint16_t* src1w = (const uint16_t*)src1;
> + const uint16_t* src2w = (const uint16_t*)src2;
> + int x, y;
> + int shift = FFABS(bitdepth - 10);
> +
> + stride1 /= 2;
> + stride2 /= 2;
> +
> + if (bitdepth > 10) {
> + for (y = 0; y < height; y++) {
> + for (x = 0; x < width; x++)
> + if (src1w[x] > src2w[x])
> + scrdMinus += cbrtTable10[(src1w[x] - src2w[x]) >>
> shift];
> + else
> + scrdPlus += cbrtTable10[(src2w[x] - src1w[x]) >>
> shift];
> + src1w += stride1;
> + src2w += stride2;
> + }
> + scrdMinus <<= shift;
> + scrdPlus <<= shift;
> + }
> + else {
> + for (y = 0; y < height; y++) {
> + for (x = 0; x < width; x++)
> + if (src1w[x] > src2w[x])
> + scrdMinus += cbrtTable10[(src1w[x] - src2w[x]) <<
> shift];
> + else
> + scrdPlus += cbrtTable10[(src2w[x] - src1w[x]) <<
> shift];
> + src1w += stride1;
> + src2w += stride2;
> + }
> + scrdMinus >>= shift;
> + scrdPlus >>= shift;
> + }
> +
> + double mean = (sqrt(scrdPlus) + sqrt(scrdMinus)) / 2.0;
> + *sum = 2.0 * mean * mean;
> +}
> +
> +void ff_scene_scrd9_c(SCENE_SAD_PARAMS)
> +{
> + ff_scene_scrd2B_c(src1, stride1, src2, stride2, width, height, sum,
> 9);
> +}
> +
> +void ff_scene_scrd10_c(SCENE_SAD_PARAMS)
> +{
> + ff_scene_scrd2B_c(src1, stride1, src2, stride2, width, height, sum,
> 10);
> +}
> +
> +void ff_scene_scrd12_c(SCENE_SAD_PARAMS)
> +{
> + ff_scene_scrd2B_c(src1, stride1, src2, stride2, width, height, sum,
> 12);
> +}
> +
> +void ff_scene_scrd14_c(SCENE_SAD_PARAMS)
> +{
> + ff_scene_scrd2B_c(src1, stride1, src2, stride2, width, height, sum,
> 14);
> +}
> +
> +void ff_scene_scrd16_c(SCENE_SAD_PARAMS)
> +{
> + ff_scene_scrd2B_c(src1, stride1, src2, stride2, width, height, sum,
> 16);
> +}
> +
> +ff_scene_sad_fn ff_scene_scrd_get_fn(int depth)
> +{
> + ff_scene_sad_fn scrd = NULL;
> + if (depth == 8)
> + scrd = ff_scene_scrd_c;
> + else if (depth == 9)
> + scrd = ff_scene_scrd9_c;
> + else if (depth == 10)
> + scrd = ff_scene_scrd10_c;
> + else if (depth == 12)
> + scrd = ff_scene_scrd12_c;
> + else if (depth == 14)
> + scrd = ff_scene_scrd14_c;
> + else if (depth == 16)
> + scrd = ff_scene_scrd16_c;
> + return scrd;
> +}
> diff --git a/libavfilter/scene_sad.h b/libavfilter/scene_sad.h
> index 173a051f2b..af9b06201c 100644
> --- a/libavfilter/scene_sad.h
> +++ b/libavfilter/scene_sad.h
> @@ -41,4 +41,6 @@ ff_scene_sad_fn ff_scene_sad_get_fn_x86(int depth);
>
> ff_scene_sad_fn ff_scene_sad_get_fn(int depth);
>
> +ff_scene_sad_fn ff_scene_scrd_get_fn(int depth);
> +
> #endif /* AVFILTER_SCENE_SAD_H */
> diff --git a/libavfilter/vf_scdet.c b/libavfilter/vf_scdet.c
> index 15399cfebf..6162e4615b 100644
> --- a/libavfilter/vf_scdet.c
> +++ b/libavfilter/vf_scdet.c
> @@ -31,6 +31,17 @@
> #include "scene_sad.h"
> #include "video.h"
>
> +enum SCDETMode {
> + MODE_DIFF = 0,
> + MODE_MEAN_CBRT = 1
> +};
> +
> +typedef struct SCDETFrameInfo {
> + AVFrame* picref;
> + double mafd;
> + double diff;
> +} SCDETFrameInfo;
> +
> typedef struct SCDetContext {
> const AVClass *class;
>
> @@ -39,11 +50,12 @@ typedef struct SCDetContext {
> int nb_planes;
> int bitdepth;
> ff_scene_sad_fn sad;
> - double prev_mafd;
> - double scene_score;
> - AVFrame *prev_picref;
> + SCDETFrameInfo curr_frame;
> + SCDETFrameInfo prev_frame;
> +
> double threshold;
> int sc_pass;
> + enum SCDETMode mode;
> } SCDetContext;
>
> #define OFFSET(x) offsetof(SCDetContext, x)
> @@ -55,6 +67,7 @@ static const AVOption scdet_options[] = {
> { "t", "set scene change detect threshold",
> OFFSET(threshold), AV_OPT_TYPE_DOUBLE, {.dbl = 10.}, 0, 100., V|F
> },
> { "sc_pass", "Set the flag to pass scene change frames",
> OFFSET(sc_pass), AV_OPT_TYPE_BOOL, {.dbl = 0 }, 0, 1, V|F
> },
> { "s", "Set the flag to pass scene change frames",
> OFFSET(sc_pass), AV_OPT_TYPE_BOOL, {.dbl = 0 }, 0, 1, V|F
> },
> + { "mode", "scene change detection method",
> OFFSET(mode), AV_OPT_TYPE_INT, {.i64 = MODE_DIFF}, MODE_DIFF,
> MODE_MEAN_CBRT, V|F },
> {NULL}
> };
>
> @@ -85,13 +98,16 @@ static int config_input(AVFilterLink *inlink)
> s->bitdepth = desc->comp[0].depth;
> s->nb_planes = is_yuv ? 1 : av_pix_fmt_count_planes(inlink->format);
>
> - for (int plane = 0; plane < 4; plane++) {
> + for (int plane = 0; plane < s->nb_planes; plane++) {
> ptrdiff_t line_size = av_image_get_linesize(inlink->format,
> inlink->w, plane);
> s->width[plane] = line_size >> (s->bitdepth > 8);
> - s->height[plane] = inlink->h >> ((plane == 1 || plane == 2) ?
> desc->log2_chroma_h : 0);
> + s->height[plane] = plane == 1 || plane == 2 ?
> AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h) : inlink->h;
> }
>
> - s->sad = ff_scene_sad_get_fn(s->bitdepth == 8 ? 8 : 16);
> + if (s->mode == MODE_DIFF)
> + s->sad = ff_scene_sad_get_fn(s->bitdepth == 8 ? 8 : 16);
> + else if (s->mode == MODE_MEAN_CBRT)
> + s->sad = ff_scene_scrd_get_fn(s->bitdepth);
> if (!s->sad)
> return AVERROR(EINVAL);
>
> @@ -101,46 +117,86 @@ static int config_input(AVFilterLink *inlink)
> static av_cold void uninit(AVFilterContext *ctx)
> {
> SCDetContext *s = ctx->priv;
> -
> - av_frame_free(&s->prev_picref);
> }
>
> -static double get_scene_score(AVFilterContext *ctx, AVFrame *frame)
> +static void compute_diff(AVFilterContext *ctx)
> {
> - double ret = 0;
> SCDetContext *s = ctx->priv;
> - AVFrame *prev_picref = s->prev_picref;
> + AVFrame *prev_picref = s->prev_frame.picref;
> + AVFrame *curr_picref = s->curr_frame.picref;
>
> - if (prev_picref && frame->height == prev_picref->height
> - && frame->width == prev_picref->width) {
> - uint64_t sad = 0;
> - double mafd, diff;
> - uint64_t count = 0;
> + if (prev_picref && curr_picref
> + && curr_picref->height == prev_picref->height
> + && curr_picref->width == prev_picref->width) {
>
> + uint64_t sum = 0;
> + uint64_t count = 0;
> for (int plane = 0; plane < s->nb_planes; plane++) {
> - uint64_t plane_sad;
> + uint64_t plane_sum;
> s->sad(prev_picref->data[plane], prev_picref->linesize[plane],
> - frame->data[plane], frame->linesize[plane],
> - s->width[plane], s->height[plane], &plane_sad);
> - sad += plane_sad;
> + curr_picref->data[plane],
> curr_picref->linesize[plane],
> + s->width[plane], s->height[plane], &plane_sum);
> + sum += plane_sum;
> count += s->width[plane] * s->height[plane];
> }
>
> - mafd = (double)sad * 100. / count / (1ULL << s->bitdepth);
> - diff = fabs(mafd - s->prev_mafd);
> - ret = av_clipf(FFMIN(mafd, diff), 0, 100.);
> - s->prev_mafd = mafd;
> - av_frame_free(&prev_picref);
> + s->curr_frame.mafd = (double)sum * 100. / count / (1ULL <<
> s->bitdepth);
> + s->curr_frame.diff = s->curr_frame.mafd - s->prev_frame.mafd;
> + } else {
> + s->curr_frame.mafd = 0;
> + s->curr_frame.diff = 0;
> }
> - s->prev_picref = av_frame_clone(frame);
> - return ret;
> }
>
> -static int set_meta(SCDetContext *s, AVFrame *frame, const char *key,
> const
> char *value)
> +static int set_meta(AVFrame *frame, const char *key, const char *value)
> {
> return av_dict_set(&frame->metadata, key, value, 0);
> }
>
> +static int filter_frame(AVFilterContext* ctx, AVFrame* frame)
> +{
> + AVFilterLink* inlink = ctx->inputs[0];
> + AVFilterLink* outlink = ctx->outputs[0];
> + SCDetContext* s = ctx->priv;
> +
> + s->prev_frame = s->curr_frame;
> + s->curr_frame.picref = frame;
> +
> + if (s->prev_frame.picref) {
> + compute_diff(ctx);
> +
> + if (s->prev_frame.diff < -s->curr_frame.diff) {
> + s->prev_frame.diff = -s->curr_frame.diff;
> + s->prev_frame.mafd = s->curr_frame.mafd;
> + }
> + double scene_score = av_clipf(FFMAX(s->prev_frame.diff, 0), 0,
> 100.);
> +
> + char buf[64];
> + snprintf(buf, sizeof(buf), "%0.3f", s->prev_frame.mafd);
> + set_meta(s->prev_frame.picref, "lavfi.scd.mafd", buf);
> + snprintf(buf, sizeof(buf), "%0.3f", scene_score);
> + set_meta(s->prev_frame.picref, "lavfi.scd.score", buf);
> +
> + if (scene_score >= s->threshold) {
> + av_log(s, AV_LOG_INFO, "lavfi.scd.score: %.3f, lavfi.scd.time:
> %s\n",
> + scene_score, av_ts2timestr(s->prev_frame.picref->pts,
> &inlink->time_base));
> + set_meta(s->prev_frame.picref, "lavfi.scd.time",
> + av_ts2timestr(s->prev_frame.picref->pts,
> &inlink->time_base));
> + }
> +
> + if (s->sc_pass) {
> + if (scene_score >= s->threshold)
> + return ff_filter_frame(outlink, s->prev_frame.picref);
> + else
> + av_frame_free(&s->prev_frame.picref);
> + }
> + else
> + return ff_filter_frame(outlink, s->prev_frame.picref);
> + }
> +
> + return 0;
> +}
> +
> static int activate(AVFilterContext *ctx)
> {
> int ret;
> @@ -148,6 +204,8 @@ static int activate(AVFilterContext *ctx)
> AVFilterLink *outlink = ctx->outputs[0];
> SCDetContext *s = ctx->priv;
> AVFrame *frame;
> + int64_t pts;
> + int status;
>
> FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
>
> @@ -155,31 +213,17 @@ static int activate(AVFilterContext *ctx)
> if (ret < 0)
> return ret;
>
> - if (frame) {
> - char buf[64];
> - s->scene_score = get_scene_score(ctx, frame);
> - snprintf(buf, sizeof(buf), "%0.3f", s->prev_mafd);
> - set_meta(s, frame, "lavfi.scd.mafd", buf);
> - snprintf(buf, sizeof(buf), "%0.3f", s->scene_score);
> - set_meta(s, frame, "lavfi.scd.score", buf);
> + if (ret > 0)
> + return filter_frame(ctx, frame);
>
> - if (s->scene_score >= s->threshold) {
> - av_log(s, AV_LOG_INFO, "lavfi.scd.score: %.3f, lavfi.scd.time:
> %s\n",
> - s->scene_score, av_ts2timestr(frame->pts,
> &inlink->time_base));
> - set_meta(s, frame, "lavfi.scd.time",
> - av_ts2timestr(frame->pts, &inlink->time_base));
> - }
> - if (s->sc_pass) {
> - if (s->scene_score >= s->threshold)
> - return ff_filter_frame(outlink, frame);
> - else {
> - av_frame_free(&frame);
> - }
> - } else
> - return ff_filter_frame(outlink, frame);
> + if (ff_inlink_acknowledge_status(inlink, &status, &pts)) {
> + if (status == AVERROR_EOF)
> + ret = filter_frame(ctx, NULL);
> +
> + ff_outlink_set_status(outlink, status, pts);
> + return ret;
> }
>
> - FF_FILTER_FORWARD_STATUS(inlink, outlink);
> FF_FILTER_FORWARD_WANTED(outlink, inlink);
>
> return FFERROR_NOT_READY;
> @@ -190,12 +234,12 @@ static const AVFilterPad scdet_inputs[] = {
> .name = "default",
> .type = AVMEDIA_TYPE_VIDEO,
> .config_props = config_input,
> - },
> + }
> };
>
> const AVFilter ff_vf_scdet = {
> .name = "scdet",
> - .description = NULL_IF_CONFIG_SMALL("Detect video scene change"),
> + .description = NULL_IF_CONFIG_SMALL("Detect video scene change."),
> .priv_size = sizeof(SCDetContext),
> .priv_class = &scdet_class,
> .uninit = uninit,
> @@ -203,5 +247,5 @@ const AVFilter ff_vf_scdet = {
> FILTER_INPUTS(scdet_inputs),
> FILTER_OUTPUTS(ff_video_default_filterpad),
> FILTER_PIXFMTS_ARRAY(pix_fmts),
> - .activate = activate,
> + .activate = activate
> };
> diff --git a/tests/fate/filter-video.mak b/tests/fate/filter-video.mak
> index ee9f0f5e40..cff48e33d9 100644
> --- a/tests/fate/filter-video.mak
> +++ b/tests/fate/filter-video.mak
> @@ -672,6 +672,9 @@ SCDET_DEPS = LAVFI_INDEV FILE_PROTOCOL MOVIE_FILTER
> SCDET_FILTER SCALE_FILTER \
> FATE_METADATA_FILTER-$(call ALLYES, $(SCDET_DEPS)) +=
> fate-filter-metadata-scdet
> fate-filter-metadata-scdet: SRC =
> $(TARGET_SAMPLES)/svq3/Vertical400kbit.sorenson3.mov
> fate-filter-metadata-scdet: CMD = run $(FILTER_METADATA_COMMAND)
> "sws_flags=+accurate_rnd+bitexact;movie='$(SRC)',scdet=s=1"
> +FATE_METADATA_FILTER-$(call ALLYES, $(SCDET_DEPS)) +=
> fate-filter-metadata-scdet1
> +fate-filter-metadata-scdet1: SRC =
> $(TARGET_SAMPLES)/svq3/Vertical400kbit.sorenson3.mov
> +fate-filter-metadata-scdet1: CMD = run $(FILTER_METADATA_COMMAND)
> "sws_flags=+accurate_rnd+bitexact;movie='$(SRC)',scdet=s=1:t=6.5:mode=1"
>
> CROPDETECT_DEPS = LAVFI_INDEV FILE_PROTOCOL MOVIE_FILTER MOVIE_FILTER
> MESTIMATE_FILTER CROPDETECT_FILTER \
> SCALE_FILTER MOV_DEMUXER H264_DECODER
> --
> 2.43.0.windows.1
>
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [FFmpeg-devel] [PATCH] area changed: scdet filter
2024-06-02 20:17 ` radu.taraibuta
@ 2024-06-03 22:42 ` Michael Niedermayer
0 siblings, 0 replies; 10+ messages in thread
From: Michael Niedermayer @ 2024-06-03 22:42 UTC (permalink / raw)
To: FFmpeg development discussions and patches
[-- Attachment #1.1: Type: text/plain, Size: 10900 bytes --]
On Sun, Jun 02, 2024 at 11:17:29PM +0300, radu.taraibuta@gmail.com wrote:
>
>
> > -----Original Message-----
> > From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of
> > Michael Niedermayer
> > Sent: vineri, 31 mai 2024 00:32
> > To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org>
> > Subject: Re: [FFmpeg-devel] [PATCH] area changed: scdet filter
> >
> > On Mon, May 13, 2024 at 06:52:19PM +0300, radu.taraibuta@gmail.com
> > wrote:
> > > Previous observations:
> > >
> > > - Inconsistent code style with other filters. (Mostly using
> > > AVFilterLink* link instead of AVFilterLink *link).
> > > I hope it's fine now.
> > >
> > > - Unrelated changes, please split trivial unrelated changes into
> > > separate patches.
> > > Removed trivial changes from this patch.
> > >
> > > - Can't tables be generated at .init/.config_props time? No point in
> > > storing them into binary.
> > > Done.
> > >
> > > - Adding extra delay is not backward compatible change, it should be
> > > implemented properly by adding option for users to select mode: next &
> > > prev frame or just next or prev frame.
> > > Added legacy option to the mode parameter.
> > >
> > > - Could split frame clone change into earlier separate patch.
> > > Cannot be done. It's either frame clone or 1 frame delay.
> > >
> > > - Where are results of improvements with accuracy so it can be
> confirmed?
> > > Here are my test results with manual labeling of scene changes:
> > > 2379 Full length movie
> > >
> > > Method Threshold TP FP FN Precision
> > > Recall F
> > > Cubic 7 2357 423 22 0.847841727
> 0.990752417
> > > 0.913742973
> > > Cubic 10 2297 200 82 0.919903885
> 0.965531736
> > > 0.94216571
> > > Cubic 12 2217 146 162 0.938214135
> 0.931904161
> > > 0.935048503
> > > Cubic 15 2049 101 330 0.953023256
> 0.861286255
> > > 0.904835505
> > > Linear 2.8 2357 1060 22 0.689786362
> 0.990752417
> > > 0.813319531
> > > Linear 8 2099 236 280 0.898929336
> 0.882303489
> > > 0.890538821
> > > Linear 10 1886 173 493 0.91597863
> 0.792770071
> > > 0.849932402
> > > Legacy 5 2235 1260 144 0.639484979
> > 0.939470366
> > > 0.760980592
> > > Legacy 8 1998 414 381 0.828358209
> > 0.839848676
> > > 0.83406387
> > > Legacy 10 1743 193 636 0.900309917
> > 0.732660782
> > > 0.80787949
> > >
> > > 15 HDR10Plus_PB_EAC3JOC
> > > https://mega.nz/file/nehDka6Z#C5_OPbSZkONdOp1jRmc09C9-
> > viDc3zMj8ZHruHcW
> > > KyA
> > >
> > > Method Threshold TP FP FN Precision
> > > Recall F
> > > Cubic 10 15 0 0 1 1 1
> > > Linear 5 13 1 2 0.928571429
> 0.866666667
> > > 0.896551724
> > > Legacy 5 12 2 3 0.857142857 0.8
> > > 0.827586207
> > >
> > > 21 (HDR HEVC 10-bit BT.2020 24fps) Exodus Sample
> > >
> > https://mega.nz/file/Sfw1hDpK#ErxCOpQDVjcI1gq6ZbX3vIfdtXZompkFe0jq47E
> > h
> > > R2o
> > >
> > > Method Threshold TP FP FN Precision
> > > Recall F
> > > Cubic 10 21 0 0 1 1 1
> > > Linear 4 20 0 1 1 0.952380952
> > > 0.975609756
> > > Legacy 4 19 0 2 1 0.904761905
> > 0.95
> > >
> > > 94 Bieber Grammys
> > > https://mega.nz/#!c9dhAaKA!MG5Yi-
> > MJNATE2_KqcnNJZCRKtTWvdjJP1NwG8Ggdw3E
> > >
> > > Method Threshold TP FP FN Precision
> > > Recall F
> > > Cubic 15 91 23 3 0.798245614
> 0.968085106
> > > 0.875
> > > Cubic 18 85 9 9 0.904255319
> 0.904255319
> > > 0.904255319
> > > Linear 7 79 49 15 0.6171875
> 0.840425532
> > > 0.711711712
> > > Linear 8 74 28 20 0.725490196
> 0.787234043
> > > 0.755102041
> > > Legacy 7 74 40 20 0.649122807
> > 0.787234043
> > > 0.711538462
> > > Legacy 8 71 26 23 0.731958763
> > 0.755319149
> > > 0.743455497
> > >
> > >
> > > Improve scene detection accuracy by comparing frame with both previous
> > > and next frame (creates one frame delay).
> > > Add new mode parameter and new method to compute the frame difference
> > > using cubic square to increase the weight of small changes and new mean
> > formula.
> > > This improves accuracy significantly. Slightly improve performance by
> > > not using frame clone.
> > > Add legacy mode for backward compatibility.
> > >
> > > Signed-off-by: raduct <radu.taraibuta@gmail.com>
> > > ---
> > > doc/filters.texi | 16 ++++
> > > libavfilter/scene_sad.c | 151 ++++++++++++++++++++++++++++++++++
> > > libavfilter/scene_sad.h | 6 ++
> > > libavfilter/vf_scdet.c | 156 +++++++++++++++++++++++++-----------
> > > tests/fate/filter-video.mak | 3 +
> > > 5 files changed, 284 insertions(+), 48 deletions(-)
> > >
> > > diff --git a/doc/filters.texi b/doc/filters.texi index
> > > bfa8ccec8b..53814e003b 100644
> > > --- a/doc/filters.texi
> > > +++ b/doc/filters.texi
> > > @@ -21797,6 +21797,22 @@ Default value is @code{10.}.
> > > @item sc_pass, s
> > > Set the flag to pass scene change frames to the next filter. Default
> > > value is @code{0}
> >
> > The patch is corrupted by linebreaks:
> >
> > Applying: area changed: scdet filter
> > error: corrupt patch at line 16
> > Patch failed at 0001 area changed: scdet filter
> >
> > please check the linebreak settings or attach the patch or use git
> send-email
> >
> > thx
> >
> > [...]
> > --
> > Michael GnuPG fingerprint:
> > 9FF2128B147EF6730BADF133611EC787040B0FAB
> >
> > Homeopathy is like voting while filling the ballot out with transparent
> ink.
> > Sometimes the outcome one wanted occurs. Rarely its worse than filling out
> a
> > ballot properly.
>
> Please find attached the patch.
>
> doc/filters.texi | 16 ++++
> libavfilter/scene_sad.c | 151 ++++++++++++++++++++++++++++++++++++++++++
> libavfilter/scene_sad.h | 6 +
> libavfilter/vf_scdet.c | 156 ++++++++++++++++++++++++++++++--------------
> tests/fate/filter-video.mak | 3
> 5 files changed, 284 insertions(+), 48 deletions(-)
> 8f29f2e1c202ab283a9ca0f5d9599de6ab534d7a 0001-area-changed-scdet-filter.patch
> From 6d55c65d92376b0ab6e3bb2439af30fbcc430d0b Mon Sep 17 00:00:00 2001
> From: raduct <radu.taraibuta@gmail.com>
> Date: Wed, 8 May 2024 08:24:46 +0300
> Subject: [PATCH] area changed: scdet filter
>
> Improve scene detection accuracy by comparing frame with both previous and next frame (creates one frame delay).
> Add new mode parameter and new method to compute the frame difference using cubic square to increase the weight of small changes and new mean formula. This improves accuracy significantly. Slightly improve performance by not using frame clone.
> Add legacy mode for backward compatibility.
>
> Signed-off-by: raduct <radu.taraibuta@gmail.com>
> ---
> doc/filters.texi | 16 ++++
> libavfilter/scene_sad.c | 151 ++++++++++++++++++++++++++++++++++
> libavfilter/scene_sad.h | 6 ++
> libavfilter/vf_scdet.c | 156 +++++++++++++++++++++++++-----------
> tests/fate/filter-video.mak | 3 +
> 5 files changed, 284 insertions(+), 48 deletions(-)
fails to build
libavfilter/scene_sad.c: In function ‘ff_init_cbrt’:
libavfilter/scene_sad.c:86:5: warning: ISO C90 forbids mixed declarations and code [-Wdeclaration-after-statement]
86 | uint8_t *table = cbrt_table[bitdepth];
| ^~~~~~~
libavfilter/scene_sad.c:92:13: error: implicit declaration of function ‘av_malloc’; did you mean ‘malloc’? [-Werror=implicit-function-declaration]
92 | table = av_malloc((1 << bitdepth) * (bitdepth > 8 ? 2 : 1));
| ^~~~~~~~~
| malloc
libavfilter/scene_sad.c:92:11: warning: assignment to ‘uint8_t *’ {aka ‘unsigned char *’} from ‘int’ makes pointer from integer without a cast [-Wint-conversion]
92 | table = av_malloc((1 << bitdepth) * (bitdepth > 8 ? 2 : 1));
| ^
libavfilter/scene_sad.c:98:5: warning: ISO C90 forbids mixed declarations and code [-Wdeclaration-after-statement]
98 | int size = 1 << bitdepth;
| ^~~
libavfilter/scene_sad.c: In function ‘ff_uninit_cbrt’:
libavfilter/scene_sad.c:120:9: error: implicit declaration of function ‘av_free’; did you mean ‘free’? [-Werror=implicit-function-declaration]
120 | av_free(cbrt_table[bitdepth]);
| ^~~~~~~
| free
libavfilter/scene_sad.c: At top level:
libavfilter/scene_sad.c:126:6: error: no previous prototype for ‘ff_scene_scrd_c’ [-Werror=missing-prototypes]
126 | void ff_scene_scrd_c(SCENE_SAD_PARAMS)
| ^~~~~~~~~~~~~~~
libavfilter/scene_sad.c: In function ‘ff_scene_scrd_c’:
libavfilter/scene_sad.c:148:5: warning: ISO C90 forbids mixed declarations and code [-Wdeclaration-after-statement]
148 | double mean = (sqrt(scrdPlus) + sqrt(scrdMinus)) / 2.0;
| ^~~~~~
libavfilter/scene_sad.c: At top level:
libavfilter/scene_sad.c:152:6: error: no previous prototype for ‘ff_scene_scrd2B_c’ [-Werror=missing-prototypes]
152 | void ff_scene_scrd2B_c(SCENE_SAD_PARAMS, int bitdepth)
| ^~~~~~~~~~~~~~~~~
libavfilter/scene_sad.c: In function ‘ff_scene_scrd2B_c’:
libavfilter/scene_sad.c:179:5: warning: ISO C90 forbids mixed declarations and code [-Wdeclaration-after-statement]
179 | double mean = (sqrt(scrdPlus) + sqrt(scrdMinus)) / 2.0;
| ^~~~~~
libavfilter/scene_sad.c: At top level:
libavfilter/scene_sad.c:183:6: error: no previous prototype for ‘ff_scene_scrd9_c’ [-Werror=missing-prototypes]
183 | void ff_scene_scrd9_c(SCENE_SAD_PARAMS)
| ^~~~~~~~~~~~~~~~
libavfilter/scene_sad.c:188:6: error: no previous prototype for ‘ff_scene_scrd10_c’ [-Werror=missing-prototypes]
188 | void ff_scene_scrd10_c(SCENE_SAD_PARAMS)
| ^~~~~~~~~~~~~~~~~
libavfilter/scene_sad.c:193:6: error: no previous prototype for ‘ff_scene_scrd12_c’ [-Werror=missing-prototypes]
193 | void ff_scene_scrd12_c(SCENE_SAD_PARAMS)
| ^~~~~~~~~~~~~~~~~
libavfilter/scene_sad.c:198:6: error: no previous prototype for ‘ff_scene_scrd14_c’ [-Werror=missing-prototypes]
198 | void ff_scene_scrd14_c(SCENE_SAD_PARAMS)
| ^~~~~~~~~~~~~~~~~
libavfilter/scene_sad.c:203:6: error: no previous prototype for ‘ff_scene_scrd16_c’ [-Werror=missing-prototypes]
203 | void ff_scene_scrd16_c(SCENE_SAD_PARAMS)
| ^~~~~~~~~~~~~~~~~
cc1: some warnings being treated as errors
make: *** [ffbuild/common.mak:81: libavfilter/scene_sad.o] Error 1
make: *** Waiting for unfinished jobs....
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
Frequently ignored answer#1 FFmpeg bugs should be sent to our bugtracker. User
questions about the command line tools should be sent to the ffmpeg-user ML.
And questions about how to use libav* should be sent to the libav-user ML.
[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]
[-- Attachment #2: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [FFmpeg-devel] [PATCH] area changed: scdet filter
2024-05-30 21:31 ` Michael Niedermayer
@ 2024-06-02 20:17 ` radu.taraibuta
2024-06-03 22:42 ` Michael Niedermayer
0 siblings, 1 reply; 10+ messages in thread
From: radu.taraibuta @ 2024-06-02 20:17 UTC (permalink / raw)
To: 'FFmpeg development discussions and patches'
[-- Attachment #1: Type: text/plain, Size: 5009 bytes --]
> -----Original Message-----
> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of
> Michael Niedermayer
> Sent: vineri, 31 mai 2024 00:32
> To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org>
> Subject: Re: [FFmpeg-devel] [PATCH] area changed: scdet filter
>
> On Mon, May 13, 2024 at 06:52:19PM +0300, radu.taraibuta@gmail.com
> wrote:
> > Previous observations:
> >
> > - Inconsistent code style with other filters. (Mostly using
> > AVFilterLink* link instead of AVFilterLink *link).
> > I hope it's fine now.
> >
> > - Unrelated changes, please split trivial unrelated changes into
> > separate patches.
> > Removed trivial changes from this patch.
> >
> > - Can't tables be generated at .init/.config_props time? No point in
> > storing them into binary.
> > Done.
> >
> > - Adding extra delay is not backward compatible change, it should be
> > implemented properly by adding option for users to select mode: next &
> > prev frame or just next or prev frame.
> > Added legacy option to the mode parameter.
> >
> > - Could split frame clone change into earlier separate patch.
> > Cannot be done. It's either frame clone or 1 frame delay.
> >
> > - Where are results of improvements with accuracy so it can be
confirmed?
> > Here are my test results with manual labeling of scene changes:
> > 2379 Full length movie
> >
> > Method Threshold TP FP FN Precision
> > Recall F
> > Cubic 7 2357 423 22 0.847841727
0.990752417
> > 0.913742973
> > Cubic 10 2297 200 82 0.919903885
0.965531736
> > 0.94216571
> > Cubic 12 2217 146 162 0.938214135
0.931904161
> > 0.935048503
> > Cubic 15 2049 101 330 0.953023256
0.861286255
> > 0.904835505
> > Linear 2.8 2357 1060 22 0.689786362
0.990752417
> > 0.813319531
> > Linear 8 2099 236 280 0.898929336
0.882303489
> > 0.890538821
> > Linear 10 1886 173 493 0.91597863
0.792770071
> > 0.849932402
> > Legacy 5 2235 1260 144 0.639484979
> 0.939470366
> > 0.760980592
> > Legacy 8 1998 414 381 0.828358209
> 0.839848676
> > 0.83406387
> > Legacy 10 1743 193 636 0.900309917
> 0.732660782
> > 0.80787949
> >
> > 15 HDR10Plus_PB_EAC3JOC
> > https://mega.nz/file/nehDka6Z#C5_OPbSZkONdOp1jRmc09C9-
> viDc3zMj8ZHruHcW
> > KyA
> >
> > Method Threshold TP FP FN Precision
> > Recall F
> > Cubic 10 15 0 0 1 1 1
> > Linear 5 13 1 2 0.928571429
0.866666667
> > 0.896551724
> > Legacy 5 12 2 3 0.857142857 0.8
> > 0.827586207
> >
> > 21 (HDR HEVC 10-bit BT.2020 24fps) Exodus Sample
> >
> https://mega.nz/file/Sfw1hDpK#ErxCOpQDVjcI1gq6ZbX3vIfdtXZompkFe0jq47E
> h
> > R2o
> >
> > Method Threshold TP FP FN Precision
> > Recall F
> > Cubic 10 21 0 0 1 1 1
> > Linear 4 20 0 1 1 0.952380952
> > 0.975609756
> > Legacy 4 19 0 2 1 0.904761905
> 0.95
> >
> > 94 Bieber Grammys
> > https://mega.nz/#!c9dhAaKA!MG5Yi-
> MJNATE2_KqcnNJZCRKtTWvdjJP1NwG8Ggdw3E
> >
> > Method Threshold TP FP FN Precision
> > Recall F
> > Cubic 15 91 23 3 0.798245614
0.968085106
> > 0.875
> > Cubic 18 85 9 9 0.904255319
0.904255319
> > 0.904255319
> > Linear 7 79 49 15 0.6171875
0.840425532
> > 0.711711712
> > Linear 8 74 28 20 0.725490196
0.787234043
> > 0.755102041
> > Legacy 7 74 40 20 0.649122807
> 0.787234043
> > 0.711538462
> > Legacy 8 71 26 23 0.731958763
> 0.755319149
> > 0.743455497
> >
> >
> > Improve scene detection accuracy by comparing frame with both previous
> > and next frame (creates one frame delay).
> > Add new mode parameter and new method to compute the frame difference
> > using cubic square to increase the weight of small changes and new mean
> formula.
> > This improves accuracy significantly. Slightly improve performance by
> > not using frame clone.
> > Add legacy mode for backward compatibility.
> >
> > Signed-off-by: raduct <radu.taraibuta@gmail.com>
> > ---
> > doc/filters.texi | 16 ++++
> > libavfilter/scene_sad.c | 151 ++++++++++++++++++++++++++++++++++
> > libavfilter/scene_sad.h | 6 ++
> > libavfilter/vf_scdet.c | 156 +++++++++++++++++++++++++-----------
> > tests/fate/filter-video.mak | 3 +
> > 5 files changed, 284 insertions(+), 48 deletions(-)
> >
> > diff --git a/doc/filters.texi b/doc/filters.texi index
> > bfa8ccec8b..53814e003b 100644
> > --- a/doc/filters.texi
> > +++ b/doc/filters.texi
> > @@ -21797,6 +21797,22 @@ Default value is @code{10.}.
> > @item sc_pass, s
> > Set the flag to pass scene change frames to the next filter. Default
> > value is @code{0}
>
> The patch is corrupted by linebreaks:
>
> Applying: area changed: scdet filter
> error: corrupt patch at line 16
> Patch failed at 0001 area changed: scdet filter
>
> please check the linebreak settings or attach the patch or use git
send-email
>
> thx
>
> [...]
> --
> Michael GnuPG fingerprint:
> 9FF2128B147EF6730BADF133611EC787040B0FAB
>
> Homeopathy is like voting while filling the ballot out with transparent
ink.
> Sometimes the outcome one wanted occurs. Rarely its worse than filling out
a
> ballot properly.
Please find attached the patch.
[-- Attachment #2: 0001-area-changed-scdet-filter.patch --]
[-- Type: application/octet-stream, Size: 16580 bytes --]
[-- Attachment #3: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [FFmpeg-devel] [PATCH] area changed: scdet filter
2024-05-13 15:52 radu.taraibuta
2024-05-19 16:05 ` radu.taraibuta
@ 2024-05-30 21:31 ` Michael Niedermayer
2024-06-02 20:17 ` radu.taraibuta
1 sibling, 1 reply; 10+ messages in thread
From: Michael Niedermayer @ 2024-05-30 21:31 UTC (permalink / raw)
To: FFmpeg development discussions and patches
[-- Attachment #1.1: Type: text/plain, Size: 4542 bytes --]
On Mon, May 13, 2024 at 06:52:19PM +0300, radu.taraibuta@gmail.com wrote:
> Previous observations:
>
> - Inconsistent code style with other filters. (Mostly using AVFilterLink*
> link instead of AVFilterLink *link).
> I hope it's fine now.
>
> - Unrelated changes, please split trivial unrelated changes into separate
> patches.
> Removed trivial changes from this patch.
>
> - Can't tables be generated at .init/.config_props time? No point in
> storing them into binary.
> Done.
>
> - Adding extra delay is not backward compatible change, it should be
> implemented properly by adding option for users to select mode: next & prev
> frame or just next or prev frame.
> Added legacy option to the mode parameter.
>
> - Could split frame clone change into earlier separate patch.
> Cannot be done. It's either frame clone or 1 frame delay.
>
> - Where are results of improvements with accuracy so it can be confirmed?
> Here are my test results with manual labeling of scene changes:
> 2379 Full length movie
>
> Method Threshold TP FP FN Precision
> Recall F
> Cubic 7 2357 423 22 0.847841727 0.990752417
> 0.913742973
> Cubic 10 2297 200 82 0.919903885 0.965531736
> 0.94216571
> Cubic 12 2217 146 162 0.938214135 0.931904161
> 0.935048503
> Cubic 15 2049 101 330 0.953023256 0.861286255
> 0.904835505
> Linear 2.8 2357 1060 22 0.689786362 0.990752417
> 0.813319531
> Linear 8 2099 236 280 0.898929336 0.882303489
> 0.890538821
> Linear 10 1886 173 493 0.91597863 0.792770071
> 0.849932402
> Legacy 5 2235 1260 144 0.639484979 0.939470366
> 0.760980592
> Legacy 8 1998 414 381 0.828358209 0.839848676
> 0.83406387
> Legacy 10 1743 193 636 0.900309917 0.732660782
> 0.80787949
>
> 15 HDR10Plus_PB_EAC3JOC
> https://mega.nz/file/nehDka6Z#C5_OPbSZkONdOp1jRmc09C9-viDc3zMj8ZHruHcWKyA
>
> Method Threshold TP FP FN Precision
> Recall F
> Cubic 10 15 0 0 1 1 1
> Linear 5 13 1 2 0.928571429 0.866666667
> 0.896551724
> Legacy 5 12 2 3 0.857142857 0.8
> 0.827586207
>
> 21 (HDR HEVC 10-bit BT.2020 24fps) Exodus Sample
> https://mega.nz/file/Sfw1hDpK#ErxCOpQDVjcI1gq6ZbX3vIfdtXZompkFe0jq47EhR2o
>
> Method Threshold TP FP FN Precision
> Recall F
> Cubic 10 21 0 0 1 1 1
> Linear 4 20 0 1 1 0.952380952
> 0.975609756
> Legacy 4 19 0 2 1 0.904761905 0.95
>
> 94 Bieber Grammys
> https://mega.nz/#!c9dhAaKA!MG5Yi-MJNATE2_KqcnNJZCRKtTWvdjJP1NwG8Ggdw3E
>
> Method Threshold TP FP FN Precision
> Recall F
> Cubic 15 91 23 3 0.798245614 0.968085106
> 0.875
> Cubic 18 85 9 9 0.904255319 0.904255319
> 0.904255319
> Linear 7 79 49 15 0.6171875 0.840425532
> 0.711711712
> Linear 8 74 28 20 0.725490196 0.787234043
> 0.755102041
> Legacy 7 74 40 20 0.649122807 0.787234043
> 0.711538462
> Legacy 8 71 26 23 0.731958763 0.755319149
> 0.743455497
>
>
> Improve scene detection accuracy by comparing frame with both previous and
> next frame (creates one frame delay).
> Add new mode parameter and new method to compute the frame difference using
> cubic square to increase the weight of small changes and new mean formula.
> This improves accuracy significantly. Slightly improve performance by not
> using frame clone.
> Add legacy mode for backward compatibility.
>
> Signed-off-by: raduct <radu.taraibuta@gmail.com>
> ---
> doc/filters.texi | 16 ++++
> libavfilter/scene_sad.c | 151 ++++++++++++++++++++++++++++++++++
> libavfilter/scene_sad.h | 6 ++
> libavfilter/vf_scdet.c | 156 +++++++++++++++++++++++++-----------
> tests/fate/filter-video.mak | 3 +
> 5 files changed, 284 insertions(+), 48 deletions(-)
>
> diff --git a/doc/filters.texi b/doc/filters.texi
> index bfa8ccec8b..53814e003b 100644
> --- a/doc/filters.texi
> +++ b/doc/filters.texi
> @@ -21797,6 +21797,22 @@ Default value is @code{10.}.
> @item sc_pass, s
> Set the flag to pass scene change frames to the next filter. Default value
> is @code{0}
The patch is corrupted by linebreaks:
Applying: area changed: scdet filter
error: corrupt patch at line 16
Patch failed at 0001 area changed: scdet filter
please check the linebreak settings or attach the patch or use git send-email
thx
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
Homeopathy is like voting while filling the ballot out with transparent ink.
Sometimes the outcome one wanted occurs. Rarely its worse than filling out
a ballot properly.
[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]
[-- Attachment #2: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [FFmpeg-devel] [PATCH] area changed: scdet filter
2024-05-28 7:51 ` radu.taraibuta
@ 2024-05-28 13:16 ` Paul B Mahol
0 siblings, 0 replies; 10+ messages in thread
From: Paul B Mahol @ 2024-05-28 13:16 UTC (permalink / raw)
To: FFmpeg development discussions and patches
On Tue, May 28, 2024 at 9:51 AM <radu.taraibuta@gmail.com> wrote:
> > -----Original Message-----
> > From: radu.taraibuta@gmail.com <radu.taraibuta@gmail.com>
> > Sent: duminică, 19 mai 2024 19:05
> > To: ffmpeg-devel@ffmpeg.org
> > Subject: RE: [PATCH] area changed: scdet filter
> >
> >
> > > -----Original Message-----
> > > From: radu.taraibuta@gmail.com <radu.taraibuta@gmail.com>
> > > Sent: luni, 13 mai 2024 18:52
> > > To: ffmpeg-devel@ffmpeg.org
> > > Subject: [PATCH] area changed: scdet filter
> > >
> > > Previous observations:
> > >
> > > - Inconsistent code style with other filters. (Mostly using
> > > AVFilterLink* link instead of AVFilterLink *link).
> > > I hope it's fine now.
> > >
> > > - Unrelated changes, please split trivial unrelated changes into
> > > separate patches.
> > > Removed trivial changes from this patch.
> > >
> > > - Can't tables be generated at .init/.config_props time? No point in
> > > storing them into binary.
> > > Done.
> > >
> > > - Adding extra delay is not backward compatible change, it should be
> > > implemented properly by adding option for users to select mode: next &
> > prev
> > > frame or just next or prev frame.
> > > Added legacy option to the mode parameter.
> > >
> > > - Could split frame clone change into earlier separate patch.
> > > Cannot be done. It's either frame clone or 1 frame delay.
> > >
> > > - Where are results of improvements with accuracy so it can be
> confirmed?
> > > Here are my test results with manual labeling of scene changes:
> > > 2379 Full length movie
> > >
> > > Method Threshold TP FP FN Precision
> > > Recall F
> > > Cubic 7 2357 423 22 0.847841727
> 0.990752417
> > > 0.913742973
> > > Cubic 10 2297 200 82 0.919903885
> 0.965531736
> > > 0.94216571
> > > Cubic 12 2217 146 162 0.938214135
> 0.931904161
> > > 0.935048503
> > > Cubic 15 2049 101 330 0.953023256
> 0.861286255
> > > 0.904835505
> > > Linear 2.8 2357 1060 22 0.689786362
> > 0.990752417
> > > 0.813319531
> > > Linear 8 2099 236 280 0.898929336
> > 0.882303489
> > > 0.890538821
> > > Linear 10 1886 173 493 0.91597863
> > 0.792770071
> > > 0.849932402
> > > Legacy 5 2235 1260 144 0.639484979
> > 0.939470366
> > > 0.760980592
> > > Legacy 8 1998 414 381 0.828358209
> > 0.839848676
> > > 0.83406387
> > > Legacy 10 1743 193 636 0.900309917
> > 0.732660782
> > > 0.80787949
> > >
> > > 15 HDR10Plus_PB_EAC3JOC
> > > https://mega.nz/file/nehDka6Z#C5_OPbSZkONdOp1jRmc09C9-
> > > viDc3zMj8ZHruHcWKyA
> > >
> > > Method Threshold TP FP FN Precision
> > > Recall F
> > > Cubic 10 15 0 0 1 1 1
> > > Linear 5 13 1 2 0.928571429
> > 0.866666667
> > > 0.896551724
> > > Legacy 5 12 2 3 0.857142857 0.8
> > > 0.827586207
> > >
> > > 21 (HDR HEVC 10-bit BT.2020 24fps) Exodus Sample
> > >
> > https://mega.nz/file/Sfw1hDpK#ErxCOpQDVjcI1gq6ZbX3vIfdtXZompkFe0jq47E
> > > hR2o
> > >
> > > Method Threshold TP FP FN Precision
> > > Recall F
> > > Cubic 10 21 0 0 1 1 1
> > > Linear 4 20 0 1 1 0.952380952
> > > 0.975609756
> > > Legacy 4 19 0 2 1 0.904761905
> > 0.95
> > >
> > > 94 Bieber Grammys
> > > https://mega.nz/#!c9dhAaKA!MG5Yi-
> > > MJNATE2_KqcnNJZCRKtTWvdjJP1NwG8Ggdw3E
> > >
> > > Method Threshold TP FP FN Precision
> > > Recall F
> > > Cubic 15 91 23 3 0.798245614
> 0.968085106
> > > 0.875
> > > Cubic 18 85 9 9 0.904255319
> 0.904255319
> > > 0.904255319
> > > Linear 7 79 49 15 0.6171875
> > 0.840425532
> > > 0.711711712
> > > Linear 8 74 28 20 0.725490196
> > 0.787234043
> > > 0.755102041
> > > Legacy 7 74 40 20 0.649122807
> > 0.787234043
> > > 0.711538462
> > > Legacy 8 71 26 23 0.731958763
> > 0.755319149
> > > 0.743455497
> > >
> > >
> > > Improve scene detection accuracy by comparing frame with both previous
> > > and next frame (creates one frame delay).
> > > Add new mode parameter and new method to compute the frame difference
> > > using cubic square to increase the weight of small changes and new
> > > mean formula.
> > > This improves accuracy significantly. Slightly improve performance by
> > > not using frame clone.
> > > Add legacy mode for backward compatibility.
> > >
> > > Signed-off-by: raduct <radu.taraibuta@gmail.com>
> > > ---
> > > doc/filters.texi | 16 ++++
> > > libavfilter/scene_sad.c | 151 ++++++++++++++++++++++++++++++++++
> > > libavfilter/scene_sad.h | 6 ++
> > > libavfilter/vf_scdet.c | 156 +++++++++++++++++++++++++-----------
> > > tests/fate/filter-video.mak | 3 +
> > > 5 files changed, 284 insertions(+), 48 deletions(-)
> > >
> > > diff --git a/doc/filters.texi b/doc/filters.texi index
> > > bfa8ccec8b..53814e003b 100644
> > > --- a/doc/filters.texi
> > > +++ b/doc/filters.texi
> > > @@ -21797,6 +21797,22 @@ Default value is @code{10.}.
> > > @item sc_pass, s
> > > Set the flag to pass scene change frames to the next filter. Default
> > value
> > > is @code{0}
> > > You can enable it if you want to get snapshot of scene change frames
> > only.
> > > +
> > > +@item mode
> > > +Set the scene change detection method. Default value is @code{-1}
> > > +Available values are:
> > > +
> > > +@table @samp
> > > +@item -1
> > > +Legacy mode for sum of absolute linear differences. Compare frame
> > > +with
> > > previous only and no delay.
> > > +
> > > +@item 0
> > > +Sum of absolute linear differences. Compare frame with both previous
> > > +and
> > > next which introduces a 1 frame delay.
> > > +
> > > +@item 1
> > > +Sum of mean of cubic root differences. Compare frame with both
> > > +previous
> > > and
> > > next which introduces a 1 frame delay.
> > > +
> > > +@end table
> > > @end table
> > >
> > > @anchor{selectivecolor}
> > > diff --git a/libavfilter/scene_sad.c b/libavfilter/scene_sad.c index
> > > caf911eb5d..9b80d426bc 100644
> > > --- a/libavfilter/scene_sad.c
> > > +++ b/libavfilter/scene_sad.c
> > > @@ -21,6 +21,7 @@
> > > * Scene SAD functions
> > > */
> > >
> > > +#include "libavutil/thread.h"
> > > #include "scene_sad.h"
> > >
> > > void ff_scene_sad16_c(SCENE_SAD_PARAMS)
> > > @@ -71,3 +72,153 @@ ff_scene_sad_fn ff_scene_sad_get_fn(int depth)
> > > return sad;
> > > }
> > >
> > > +static AVMutex cbrt_mutex = AV_MUTEX_INITIALIZER; static uint8_t
> > > +*cbrt_table[16] = { NULL }; static int cbrt_table_ref[16] = { 0 };
> > > +
> > > +int ff_init_cbrt(int bitdepth)
> > > +{
> > > + if (bitdepth < 4 || bitdepth > 16)
> > > + return AVERROR(EINVAL);
> > > +
> > > + ff_mutex_lock(&cbrt_mutex);
> > > +
> > > + uint8_t *table = cbrt_table[bitdepth];
> > > + if (table) {
> > > + cbrt_table_ref[bitdepth]++;
> > > + goto end;
> > > + }
> > > +
> > > + table = av_malloc((1 << bitdepth) * (bitdepth > 8 ? 2 : 1));
> > > + if (!table)
> > > + goto end;
> > > + cbrt_table[bitdepth] = table;
> > > + cbrt_table_ref[bitdepth] = 1;
> > > +
> > > + int size = 1 << bitdepth;
> > > + double factor = pow(size - 1, 2. / 3.);
> > > + if (bitdepth <= 8) {
> > > + for (int i = 0; i < size; i++)
> > > + table[i] = round(factor * pow(i, 1. / 3.));
> > > + } else {
> > > + uint16_t *tablew = (uint16_t*)table;
> > > + for (int i = 0; i < size; i++)
> > > + tablew[i] = round(factor * pow(i, 1. / 3.));
> > > + }
> > > +
> > > +end:
> > > + ff_mutex_unlock(&cbrt_mutex);
> > > + return table != NULL;
> > > +}
> > > +
> > > +void ff_uninit_cbrt(int bitdepth)
> > > +{
> > > + if (bitdepth < 4 || bitdepth > 16)
> > > + return;
> > > + ff_mutex_lock(&cbrt_mutex);
> > > + if (!--cbrt_table_ref[bitdepth]) {
> > > + av_free(cbrt_table[bitdepth]);
> > > + cbrt_table[bitdepth] = NULL;
> > > + }
> > > + ff_mutex_unlock(&cbrt_mutex);
> > > +}
> > > +
> > > +void ff_scene_scrd_c(SCENE_SAD_PARAMS) {
> > > + uint64_t scrdPlus = 0;
> > > + uint64_t scrdMinus = 0;
> > > + int x, y;
> > > +
> > > + uint8_t *table = cbrt_table[8];
> > > + if (!table) {
> > > + *sum = 0;
> > > + return;
> > > + }
> > > +
> > > + for (y = 0; y < height; y++) {
> > > + for (x = 0; x < width; x++)
> > > + if (src1[x] > src2[x])
> > > + scrdMinus += table[src1[x] - src2[x]];
> > > + else
> > > + scrdPlus += table[src2[x] - src1[x]];
> > > + src1 += stride1;
> > > + src2 += stride2;
> > > + }
> > > +
> > > + double mean = (sqrt(scrdPlus) + sqrt(scrdMinus)) / 2.0;
> > > + *sum = 2.0 * mean * mean;
> > > +}
> > > +
> > > +void ff_scene_scrd2B_c(SCENE_SAD_PARAMS, int bitdepth) {
> > > + uint64_t scrdPlus = 0;
> > > + uint64_t scrdMinus = 0;
> > > + const uint16_t *src1w = (const uint16_t*)src1;
> > > + const uint16_t *src2w = (const uint16_t*)src2;
> > > + int x, y;
> > > +
> > > + uint16_t *table = (uint16_t*)cbrt_table[bitdepth];
> > > + if (!table) {
> > > + *sum = 0;
> > > + return;
> > > + }
> > > +
> > > + stride1 /= 2;
> > > + stride2 /= 2;
> > > +
> > > + for (y = 0; y < height; y++) {
> > > + for (x = 0; x < width; x++)
> > > + if (src1w[x] > src2w[x])
> > > + scrdMinus += table[src1w[x] - src2w[x]];
> > > + else
> > > + scrdPlus += table[src2w[x] - src1w[x]];
> > > + src1w += stride1;
> > > + src2w += stride2;
> > > + }
> > > +
> > > + double mean = (sqrt(scrdPlus) + sqrt(scrdMinus)) / 2.0;
> > > + *sum = 2.0 * mean * mean;
> > > +}
> > > +
> > > +void ff_scene_scrd9_c(SCENE_SAD_PARAMS)
> > > +{
> > > + ff_scene_scrd2B_c(src1, stride1, src2, stride2, width, height,
> > > +sum,
> > 9);
> > > +}
> > > +
> > > +void ff_scene_scrd10_c(SCENE_SAD_PARAMS)
> > > +{
> > > + ff_scene_scrd2B_c(src1, stride1, src2, stride2, width, height,
> > > +sum,
> > > 10);
> > > +}
> > > +
> > > +void ff_scene_scrd12_c(SCENE_SAD_PARAMS)
> > > +{
> > > + ff_scene_scrd2B_c(src1, stride1, src2, stride2, width, height,
> > > +sum,
> > > 12);
> > > +}
> > > +
> > > +void ff_scene_scrd14_c(SCENE_SAD_PARAMS)
> > > +{
> > > + ff_scene_scrd2B_c(src1, stride1, src2, stride2, width, height,
> > > +sum,
> > > 14);
> > > +}
> > > +
> > > +void ff_scene_scrd16_c(SCENE_SAD_PARAMS)
> > > +{
> > > + ff_scene_scrd2B_c(src1, stride1, src2, stride2, width, height,
> > > +sum,
> > > 16);
> > > +}
> > > +
> > > +ff_scene_sad_fn ff_scene_scrd_get_fn(int depth) {
> > > + ff_scene_sad_fn scrd = NULL;
> > > + if (depth == 8)
> > > + scrd = ff_scene_scrd_c;
> > > + else if (depth == 9)
> > > + scrd = ff_scene_scrd9_c;
> > > + else if (depth == 10)
> > > + scrd = ff_scene_scrd10_c;
> > > + else if (depth == 12)
> > > + scrd = ff_scene_scrd12_c;
> > > + else if (depth == 14)
> > > + scrd = ff_scene_scrd14_c;
> > > + else if (depth == 16)
> > > + scrd = ff_scene_scrd16_c;
> > > + return scrd;
> > > +}
> > > diff --git a/libavfilter/scene_sad.h b/libavfilter/scene_sad.h index
> > > 173a051f2b..c294bd90f9 100644
> > > --- a/libavfilter/scene_sad.h
> > > +++ b/libavfilter/scene_sad.h
> > > @@ -41,4 +41,10 @@ ff_scene_sad_fn ff_scene_sad_get_fn_x86(int depth);
> > >
> > > ff_scene_sad_fn ff_scene_sad_get_fn(int depth);
> > >
> > > +ff_scene_sad_fn ff_scene_scrd_get_fn(int depth);
> > > +
> > > +int ff_init_cbrt(int bitdepth);
> > > +
> > > +void ff_uninit_cbrt(int bitdepth);
> > > +
> > > #endif /* AVFILTER_SCENE_SAD_H */
> > > diff --git a/libavfilter/vf_scdet.c b/libavfilter/vf_scdet.c index
> > > 15399cfebf..93da5837b3 100644
> > > --- a/libavfilter/vf_scdet.c
> > > +++ b/libavfilter/vf_scdet.c
> > > @@ -31,6 +31,18 @@
> > > #include "scene_sad.h"
> > > #include "video.h"
> > >
> > > +enum SCDETMode {
> > > + MODE_LEGACY = -1,
> > > + MODE_LINEAR = 0,
> > > + MODE_MEAN_CBRT = 1
> > > +};
> > > +
> > > +typedef struct SCDETFrameInfo {
> > > + AVFrame *picref;
> > > + double mafd;
> > > + double diff;
> > > +} SCDETFrameInfo;
> > > +
> > > typedef struct SCDetContext {
> > > const AVClass *class;
> > >
> > > @@ -39,11 +51,12 @@ typedef struct SCDetContext {
> > > int nb_planes;
> > > int bitdepth;
> > > ff_scene_sad_fn sad;
> > > - double prev_mafd;
> > > - double scene_score;
> > > - AVFrame *prev_picref;
> > > + SCDETFrameInfo curr_frame;
> > > + SCDETFrameInfo prev_frame;
> > > +
> > > double threshold;
> > > int sc_pass;
> > > + enum SCDETMode mode;
> > > } SCDetContext;
> > >
> > > #define OFFSET(x) offsetof(SCDetContext, x) @@ -55,6 +68,7 @@ static
> > > const AVOption scdet_options[] = {
> > > { "t", "set scene change detect threshold",
> > > OFFSET(threshold), AV_OPT_TYPE_DOUBLE, {.dbl = 10.}, 0, 100.,
> V|F
> > },
> > > { "sc_pass", "Set the flag to pass scene change frames",
> > > OFFSET(sc_pass), AV_OPT_TYPE_BOOL, {.dbl = 0 }, 0, 1,
> V|F
> > },
> > > { "s", "Set the flag to pass scene change frames",
> > > OFFSET(sc_pass), AV_OPT_TYPE_BOOL, {.dbl = 0 }, 0, 1,
> V|F
> > },
> > > + { "mode", "scene change detection method",
> > > OFFSET(mode), AV_OPT_TYPE_INT, {.i64 = MODE_LEGACY},
> > > MODE_LEGACY,
> > > MODE_MEAN_CBRT, V|F },
> > > {NULL}
> > > };
> > >
> > > @@ -91,7 +105,14 @@ static int config_input(AVFilterLink *inlink)
> > > s->height[plane] = inlink->h >> ((plane == 1 || plane == 2) ?
> > > desc->log2_chroma_h : 0);
> > > }
> > >
> > > - s->sad = ff_scene_sad_get_fn(s->bitdepth == 8 ? 8 : 16);
> > > + if (s->mode == MODE_LINEAR || s->mode == MODE_LEGACY)
> > > + s->sad = ff_scene_sad_get_fn(s->bitdepth == 8 ? 8 : 16);
> > > + else if (s->mode == MODE_MEAN_CBRT) {
> > > + int ret = ff_init_cbrt(s->bitdepth);
> > > + if (ret < 0)
> > > + return ret;
> > > + s->sad = ff_scene_scrd_get_fn(s->bitdepth);
> > > + }
> > > if (!s->sad)
> > > return AVERROR(EINVAL);
> > >
> > > @@ -101,46 +122,97 @@ static int config_input(AVFilterLink *inlink)
> > > static av_cold void uninit(AVFilterContext *ctx) {
> > > SCDetContext *s = ctx->priv;
> > > -
> > > - av_frame_free(&s->prev_picref);
> > > + if (s->mode == MODE_LEGACY)
> > > + av_frame_free(&s->prev_frame.picref);
> > > + if (s->mode == MODE_MEAN_CBRT)
> > > + ff_uninit_cbrt(s->bitdepth);
> > > }
> > >
> > > -static double get_scene_score(AVFilterContext *ctx, AVFrame *frame)
> > > +static void compute_diff(AVFilterContext *ctx)
> > > {
> > > - double ret = 0;
> > > SCDetContext *s = ctx->priv;
> > > - AVFrame *prev_picref = s->prev_picref;
> > > + AVFrame *prev_picref = s->prev_frame.picref;
> > > + AVFrame *curr_picref = s->curr_frame.picref;
> > >
> > > - if (prev_picref && frame->height == prev_picref->height
> > > - && frame->width == prev_picref->width) {
> > > - uint64_t sad = 0;
> > > - double mafd, diff;
> > > - uint64_t count = 0;
> > > + if (prev_picref && curr_picref
> > > + && curr_picref->height == prev_picref->height
> > > + && curr_picref->width == prev_picref->width) {
> > >
> > > + uint64_t sum = 0;
> > > + uint64_t count = 0;
> > > for (int plane = 0; plane < s->nb_planes; plane++) {
> > > - uint64_t plane_sad;
> > > + uint64_t plane_sum;
> > > s->sad(prev_picref->data[plane],
> > prev_picref->linesize[plane],
> > > - frame->data[plane], frame->linesize[plane],
> > > - s->width[plane], s->height[plane], &plane_sad);
> > > - sad += plane_sad;
> > > + curr_picref->data[plane],
> > curr_picref->linesize[plane],
> > > + s->width[plane], s->height[plane], &plane_sum);
> > > + sum += plane_sum;
> > > count += s->width[plane] * s->height[plane];
> > > }
> > >
> > > - mafd = (double)sad * 100. / count / (1ULL << s->bitdepth);
> > > - diff = fabs(mafd - s->prev_mafd);
> > > - ret = av_clipf(FFMIN(mafd, diff), 0, 100.);
> > > - s->prev_mafd = mafd;
> > > - av_frame_free(&prev_picref);
> > > + s->curr_frame.mafd = (double)sum * 100. / count / (1ULL <<
> > > s->bitdepth);
> > > + if (s->mode == MODE_LEGACY)
> > > + s->curr_frame.diff = fabs(s->curr_frame.mafd -
> > > s->prev_frame.mafd);
> > > + else
> > > + s->curr_frame.diff = s->curr_frame.mafd -
> s->prev_frame.mafd;
> > > + } else {
> > > + s->curr_frame.mafd = 0;
> > > + s->curr_frame.diff = 0;
> > > }
> > > - s->prev_picref = av_frame_clone(frame);
> > > - return ret;
> > > }
> > >
> > > -static int set_meta(SCDetContext *s, AVFrame *frame, const char *key,
> > const
> > > char *value)
> > > +static int set_meta(AVFrame *frame, const char *key, const char
> > > +*value)
> > > {
> > > return av_dict_set(&frame->metadata, key, value, 0); }
> > >
> > > +static int filter_frame(AVFilterContext *ctx, AVFrame *frame) {
> > > + AVFilterLink *inlink = ctx->inputs[0];
> > > + AVFilterLink *outlink = ctx->outputs[0];
> > > + SCDetContext *s = ctx->priv;
> > > +
> > > + s->prev_frame = s->curr_frame;
> > > + s->curr_frame.picref = frame;
> > > +
> > > + if ((s->mode != MODE_LEGACY && s->prev_frame.picref) || (s->mode
> > > + ==
> > > MODE_LEGACY && frame != NULL)) {
> > > + compute_diff(ctx);
> > > +
> > > + if (s->mode == MODE_LEGACY) {
> > > + av_frame_free(&s->prev_frame.picref);
> > > + s->prev_frame = s->curr_frame;
> > > + s->curr_frame.picref =
> av_frame_clone(s->curr_frame.picref);
> > > + } else if (s->prev_frame.diff < -s->curr_frame.diff) {
> > > + s->prev_frame.diff = -s->curr_frame.diff;
> > > + s->prev_frame.mafd = s->curr_frame.mafd;
> > > + }
> > > + double scene_score = av_clipf(s->mode == MODE_LEGACY ?
> > > FFMIN(s->prev_frame.mafd, s->prev_frame.diff) :
> > > FFMAX(s->prev_frame.diff, 0), 0, 100.);
> > > +
> > > + char buf[64];
> > > + snprintf(buf, sizeof(buf), "%0.3f", s->prev_frame.mafd);
> > > + set_meta(s->prev_frame.picref, "lavfi.scd.mafd", buf);
> > > + snprintf(buf, sizeof(buf), "%0.3f", scene_score);
> > > + set_meta(s->prev_frame.picref, "lavfi.scd.score", buf);
> > > +
> > > + if (scene_score >= s->threshold) {
> > > + av_log(s, AV_LOG_INFO, "lavfi.scd.score: %.3f,
> > lavfi.scd.time:
> > > %s\n",
> > > + scene_score, av_ts2timestr(s->prev_frame.picref->pts,
> > > &inlink->time_base));
> > > + set_meta(s->prev_frame.picref, "lavfi.scd.time",
> > > + av_ts2timestr(s->prev_frame.picref->pts,
> > > &inlink->time_base));
> > > + }
> > > +
> > > + if (s->sc_pass) {
> > > + if (scene_score >= s->threshold)
> > > + return ff_filter_frame(outlink, s->prev_frame.picref);
> > > + else
> > > + av_frame_free(&s->prev_frame.picref);
> > > + }
> > > + else
> > > + return ff_filter_frame(outlink, s->prev_frame.picref);
> > > + }
> > > +
> > > + return 0;
> > > +}
> > > +
> > > static int activate(AVFilterContext *ctx) {
> > > int ret;
> > > @@ -148,6 +220,8 @@ static int activate(AVFilterContext *ctx)
> > > AVFilterLink *outlink = ctx->outputs[0];
> > > SCDetContext *s = ctx->priv;
> > > AVFrame *frame;
> > > + int64_t pts;
> > > + int status;
> > >
> > > FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
> > >
> > > @@ -155,31 +229,17 @@ static int activate(AVFilterContext *ctx)
> > > if (ret < 0)
> > > return ret;
> > >
> > > - if (frame) {
> > > - char buf[64];
> > > - s->scene_score = get_scene_score(ctx, frame);
> > > - snprintf(buf, sizeof(buf), "%0.3f", s->prev_mafd);
> > > - set_meta(s, frame, "lavfi.scd.mafd", buf);
> > > - snprintf(buf, sizeof(buf), "%0.3f", s->scene_score);
> > > - set_meta(s, frame, "lavfi.scd.score", buf);
> > > + if (ret > 0)
> > > + return filter_frame(ctx, frame);
> > >
> > > - if (s->scene_score >= s->threshold) {
> > > - av_log(s, AV_LOG_INFO, "lavfi.scd.score: %.3f,
> > lavfi.scd.time:
> > > %s\n",
> > > - s->scene_score, av_ts2timestr(frame->pts,
> > > &inlink->time_base));
> > > - set_meta(s, frame, "lavfi.scd.time",
> > > - av_ts2timestr(frame->pts, &inlink->time_base));
> > > - }
> > > - if (s->sc_pass) {
> > > - if (s->scene_score >= s->threshold)
> > > - return ff_filter_frame(outlink, frame);
> > > - else {
> > > - av_frame_free(&frame);
> > > - }
> > > - } else
> > > - return ff_filter_frame(outlink, frame);
> > > + if (ff_inlink_acknowledge_status(inlink, &status, &pts)) {
> > > + if (status == AVERROR_EOF)
> > > + ret = filter_frame(ctx, NULL);
> > > +
> > > + ff_outlink_set_status(outlink, status, pts);
> > > + return ret;
> > > }
> > >
> > > - FF_FILTER_FORWARD_STATUS(inlink, outlink);
> > > FF_FILTER_FORWARD_WANTED(outlink, inlink);
> > >
> > > return FFERROR_NOT_READY;
> > > diff --git a/tests/fate/filter-video.mak b/tests/fate/filter-video.mak
> > > index ee9f0f5e40..cff48e33d9 100644
> > > --- a/tests/fate/filter-video.mak
> > > +++ b/tests/fate/filter-video.mak
> > > @@ -672,6 +672,9 @@ SCDET_DEPS = LAVFI_INDEV FILE_PROTOCOL
> > > MOVIE_FILTER SCDET_FILTER SCALE_FILTER \ FATE_METADATA_FILTER-$(call
> > > ALLYES, $(SCDET_DEPS)) += fate-filter-metadata-scdet
> > > fate-filter-metadata-scdet: SRC =
> > > $(TARGET_SAMPLES)/svq3/Vertical400kbit.sorenson3.mov
> > > fate-filter-metadata-scdet: CMD = run $(FILTER_METADATA_COMMAND)
> > > "sws_flags=+accurate_rnd+bitexact;movie='$(SRC)',scdet=s=1"
> > > +FATE_METADATA_FILTER-$(call ALLYES, $(SCDET_DEPS)) +=
> > > fate-filter-metadata-scdet1
> > > +fate-filter-metadata-scdet1: SRC =
> > > $(TARGET_SAMPLES)/svq3/Vertical400kbit.sorenson3.mov
> > > +fate-filter-metadata-scdet1: CMD = run $(FILTER_METADATA_COMMAND)
> > >
> "sws_flags=+accurate_rnd+bitexact;movie='$(SRC)',scdet=s=1:t=6.5:mode=1"
> > >
> > > CROPDETECT_DEPS = LAVFI_INDEV FILE_PROTOCOL MOVIE_FILTER
> > MOVIE_FILTER
> > > MESTIMATE_FILTER CROPDETECT_FILTER \
> > > SCALE_FILTER MOV_DEMUXER H264_DECODER
> > > --
> > > 2.43.0.windows.1
> > >
> >
> > So what's next? Is there anything else I should do?
> >
> Anybody?
>
I Plan to push it to another fork.
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [FFmpeg-devel] [PATCH] area changed: scdet filter
2024-05-19 16:05 ` radu.taraibuta
@ 2024-05-28 7:51 ` radu.taraibuta
2024-05-28 13:16 ` Paul B Mahol
0 siblings, 1 reply; 10+ messages in thread
From: radu.taraibuta @ 2024-05-28 7:51 UTC (permalink / raw)
To: ffmpeg-devel
> -----Original Message-----
> From: radu.taraibuta@gmail.com <radu.taraibuta@gmail.com>
> Sent: duminică, 19 mai 2024 19:05
> To: ffmpeg-devel@ffmpeg.org
> Subject: RE: [PATCH] area changed: scdet filter
>
>
> > -----Original Message-----
> > From: radu.taraibuta@gmail.com <radu.taraibuta@gmail.com>
> > Sent: luni, 13 mai 2024 18:52
> > To: ffmpeg-devel@ffmpeg.org
> > Subject: [PATCH] area changed: scdet filter
> >
> > Previous observations:
> >
> > - Inconsistent code style with other filters. (Mostly using
> > AVFilterLink* link instead of AVFilterLink *link).
> > I hope it's fine now.
> >
> > - Unrelated changes, please split trivial unrelated changes into
> > separate patches.
> > Removed trivial changes from this patch.
> >
> > - Can't tables be generated at .init/.config_props time? No point in
> > storing them into binary.
> > Done.
> >
> > - Adding extra delay is not backward compatible change, it should be
> > implemented properly by adding option for users to select mode: next &
> prev
> > frame or just next or prev frame.
> > Added legacy option to the mode parameter.
> >
> > - Could split frame clone change into earlier separate patch.
> > Cannot be done. It's either frame clone or 1 frame delay.
> >
> > - Where are results of improvements with accuracy so it can be
confirmed?
> > Here are my test results with manual labeling of scene changes:
> > 2379 Full length movie
> >
> > Method Threshold TP FP FN Precision
> > Recall F
> > Cubic 7 2357 423 22 0.847841727
0.990752417
> > 0.913742973
> > Cubic 10 2297 200 82 0.919903885
0.965531736
> > 0.94216571
> > Cubic 12 2217 146 162 0.938214135
0.931904161
> > 0.935048503
> > Cubic 15 2049 101 330 0.953023256
0.861286255
> > 0.904835505
> > Linear 2.8 2357 1060 22 0.689786362
> 0.990752417
> > 0.813319531
> > Linear 8 2099 236 280 0.898929336
> 0.882303489
> > 0.890538821
> > Linear 10 1886 173 493 0.91597863
> 0.792770071
> > 0.849932402
> > Legacy 5 2235 1260 144 0.639484979
> 0.939470366
> > 0.760980592
> > Legacy 8 1998 414 381 0.828358209
> 0.839848676
> > 0.83406387
> > Legacy 10 1743 193 636 0.900309917
> 0.732660782
> > 0.80787949
> >
> > 15 HDR10Plus_PB_EAC3JOC
> > https://mega.nz/file/nehDka6Z#C5_OPbSZkONdOp1jRmc09C9-
> > viDc3zMj8ZHruHcWKyA
> >
> > Method Threshold TP FP FN Precision
> > Recall F
> > Cubic 10 15 0 0 1 1 1
> > Linear 5 13 1 2 0.928571429
> 0.866666667
> > 0.896551724
> > Legacy 5 12 2 3 0.857142857 0.8
> > 0.827586207
> >
> > 21 (HDR HEVC 10-bit BT.2020 24fps) Exodus Sample
> >
> https://mega.nz/file/Sfw1hDpK#ErxCOpQDVjcI1gq6ZbX3vIfdtXZompkFe0jq47E
> > hR2o
> >
> > Method Threshold TP FP FN Precision
> > Recall F
> > Cubic 10 21 0 0 1 1 1
> > Linear 4 20 0 1 1 0.952380952
> > 0.975609756
> > Legacy 4 19 0 2 1 0.904761905
> 0.95
> >
> > 94 Bieber Grammys
> > https://mega.nz/#!c9dhAaKA!MG5Yi-
> > MJNATE2_KqcnNJZCRKtTWvdjJP1NwG8Ggdw3E
> >
> > Method Threshold TP FP FN Precision
> > Recall F
> > Cubic 15 91 23 3 0.798245614
0.968085106
> > 0.875
> > Cubic 18 85 9 9 0.904255319
0.904255319
> > 0.904255319
> > Linear 7 79 49 15 0.6171875
> 0.840425532
> > 0.711711712
> > Linear 8 74 28 20 0.725490196
> 0.787234043
> > 0.755102041
> > Legacy 7 74 40 20 0.649122807
> 0.787234043
> > 0.711538462
> > Legacy 8 71 26 23 0.731958763
> 0.755319149
> > 0.743455497
> >
> >
> > Improve scene detection accuracy by comparing frame with both previous
> > and next frame (creates one frame delay).
> > Add new mode parameter and new method to compute the frame difference
> > using cubic square to increase the weight of small changes and new
> > mean formula.
> > This improves accuracy significantly. Slightly improve performance by
> > not using frame clone.
> > Add legacy mode for backward compatibility.
> >
> > Signed-off-by: raduct <radu.taraibuta@gmail.com>
> > ---
> > doc/filters.texi | 16 ++++
> > libavfilter/scene_sad.c | 151 ++++++++++++++++++++++++++++++++++
> > libavfilter/scene_sad.h | 6 ++
> > libavfilter/vf_scdet.c | 156 +++++++++++++++++++++++++-----------
> > tests/fate/filter-video.mak | 3 +
> > 5 files changed, 284 insertions(+), 48 deletions(-)
> >
> > diff --git a/doc/filters.texi b/doc/filters.texi index
> > bfa8ccec8b..53814e003b 100644
> > --- a/doc/filters.texi
> > +++ b/doc/filters.texi
> > @@ -21797,6 +21797,22 @@ Default value is @code{10.}.
> > @item sc_pass, s
> > Set the flag to pass scene change frames to the next filter. Default
> value
> > is @code{0}
> > You can enable it if you want to get snapshot of scene change frames
> only.
> > +
> > +@item mode
> > +Set the scene change detection method. Default value is @code{-1}
> > +Available values are:
> > +
> > +@table @samp
> > +@item -1
> > +Legacy mode for sum of absolute linear differences. Compare frame
> > +with
> > previous only and no delay.
> > +
> > +@item 0
> > +Sum of absolute linear differences. Compare frame with both previous
> > +and
> > next which introduces a 1 frame delay.
> > +
> > +@item 1
> > +Sum of mean of cubic root differences. Compare frame with both
> > +previous
> > and
> > next which introduces a 1 frame delay.
> > +
> > +@end table
> > @end table
> >
> > @anchor{selectivecolor}
> > diff --git a/libavfilter/scene_sad.c b/libavfilter/scene_sad.c index
> > caf911eb5d..9b80d426bc 100644
> > --- a/libavfilter/scene_sad.c
> > +++ b/libavfilter/scene_sad.c
> > @@ -21,6 +21,7 @@
> > * Scene SAD functions
> > */
> >
> > +#include "libavutil/thread.h"
> > #include "scene_sad.h"
> >
> > void ff_scene_sad16_c(SCENE_SAD_PARAMS)
> > @@ -71,3 +72,153 @@ ff_scene_sad_fn ff_scene_sad_get_fn(int depth)
> > return sad;
> > }
> >
> > +static AVMutex cbrt_mutex = AV_MUTEX_INITIALIZER; static uint8_t
> > +*cbrt_table[16] = { NULL }; static int cbrt_table_ref[16] = { 0 };
> > +
> > +int ff_init_cbrt(int bitdepth)
> > +{
> > + if (bitdepth < 4 || bitdepth > 16)
> > + return AVERROR(EINVAL);
> > +
> > + ff_mutex_lock(&cbrt_mutex);
> > +
> > + uint8_t *table = cbrt_table[bitdepth];
> > + if (table) {
> > + cbrt_table_ref[bitdepth]++;
> > + goto end;
> > + }
> > +
> > + table = av_malloc((1 << bitdepth) * (bitdepth > 8 ? 2 : 1));
> > + if (!table)
> > + goto end;
> > + cbrt_table[bitdepth] = table;
> > + cbrt_table_ref[bitdepth] = 1;
> > +
> > + int size = 1 << bitdepth;
> > + double factor = pow(size - 1, 2. / 3.);
> > + if (bitdepth <= 8) {
> > + for (int i = 0; i < size; i++)
> > + table[i] = round(factor * pow(i, 1. / 3.));
> > + } else {
> > + uint16_t *tablew = (uint16_t*)table;
> > + for (int i = 0; i < size; i++)
> > + tablew[i] = round(factor * pow(i, 1. / 3.));
> > + }
> > +
> > +end:
> > + ff_mutex_unlock(&cbrt_mutex);
> > + return table != NULL;
> > +}
> > +
> > +void ff_uninit_cbrt(int bitdepth)
> > +{
> > + if (bitdepth < 4 || bitdepth > 16)
> > + return;
> > + ff_mutex_lock(&cbrt_mutex);
> > + if (!--cbrt_table_ref[bitdepth]) {
> > + av_free(cbrt_table[bitdepth]);
> > + cbrt_table[bitdepth] = NULL;
> > + }
> > + ff_mutex_unlock(&cbrt_mutex);
> > +}
> > +
> > +void ff_scene_scrd_c(SCENE_SAD_PARAMS) {
> > + uint64_t scrdPlus = 0;
> > + uint64_t scrdMinus = 0;
> > + int x, y;
> > +
> > + uint8_t *table = cbrt_table[8];
> > + if (!table) {
> > + *sum = 0;
> > + return;
> > + }
> > +
> > + for (y = 0; y < height; y++) {
> > + for (x = 0; x < width; x++)
> > + if (src1[x] > src2[x])
> > + scrdMinus += table[src1[x] - src2[x]];
> > + else
> > + scrdPlus += table[src2[x] - src1[x]];
> > + src1 += stride1;
> > + src2 += stride2;
> > + }
> > +
> > + double mean = (sqrt(scrdPlus) + sqrt(scrdMinus)) / 2.0;
> > + *sum = 2.0 * mean * mean;
> > +}
> > +
> > +void ff_scene_scrd2B_c(SCENE_SAD_PARAMS, int bitdepth) {
> > + uint64_t scrdPlus = 0;
> > + uint64_t scrdMinus = 0;
> > + const uint16_t *src1w = (const uint16_t*)src1;
> > + const uint16_t *src2w = (const uint16_t*)src2;
> > + int x, y;
> > +
> > + uint16_t *table = (uint16_t*)cbrt_table[bitdepth];
> > + if (!table) {
> > + *sum = 0;
> > + return;
> > + }
> > +
> > + stride1 /= 2;
> > + stride2 /= 2;
> > +
> > + for (y = 0; y < height; y++) {
> > + for (x = 0; x < width; x++)
> > + if (src1w[x] > src2w[x])
> > + scrdMinus += table[src1w[x] - src2w[x]];
> > + else
> > + scrdPlus += table[src2w[x] - src1w[x]];
> > + src1w += stride1;
> > + src2w += stride2;
> > + }
> > +
> > + double mean = (sqrt(scrdPlus) + sqrt(scrdMinus)) / 2.0;
> > + *sum = 2.0 * mean * mean;
> > +}
> > +
> > +void ff_scene_scrd9_c(SCENE_SAD_PARAMS)
> > +{
> > + ff_scene_scrd2B_c(src1, stride1, src2, stride2, width, height,
> > +sum,
> 9);
> > +}
> > +
> > +void ff_scene_scrd10_c(SCENE_SAD_PARAMS)
> > +{
> > + ff_scene_scrd2B_c(src1, stride1, src2, stride2, width, height,
> > +sum,
> > 10);
> > +}
> > +
> > +void ff_scene_scrd12_c(SCENE_SAD_PARAMS)
> > +{
> > + ff_scene_scrd2B_c(src1, stride1, src2, stride2, width, height,
> > +sum,
> > 12);
> > +}
> > +
> > +void ff_scene_scrd14_c(SCENE_SAD_PARAMS)
> > +{
> > + ff_scene_scrd2B_c(src1, stride1, src2, stride2, width, height,
> > +sum,
> > 14);
> > +}
> > +
> > +void ff_scene_scrd16_c(SCENE_SAD_PARAMS)
> > +{
> > + ff_scene_scrd2B_c(src1, stride1, src2, stride2, width, height,
> > +sum,
> > 16);
> > +}
> > +
> > +ff_scene_sad_fn ff_scene_scrd_get_fn(int depth) {
> > + ff_scene_sad_fn scrd = NULL;
> > + if (depth == 8)
> > + scrd = ff_scene_scrd_c;
> > + else if (depth == 9)
> > + scrd = ff_scene_scrd9_c;
> > + else if (depth == 10)
> > + scrd = ff_scene_scrd10_c;
> > + else if (depth == 12)
> > + scrd = ff_scene_scrd12_c;
> > + else if (depth == 14)
> > + scrd = ff_scene_scrd14_c;
> > + else if (depth == 16)
> > + scrd = ff_scene_scrd16_c;
> > + return scrd;
> > +}
> > diff --git a/libavfilter/scene_sad.h b/libavfilter/scene_sad.h index
> > 173a051f2b..c294bd90f9 100644
> > --- a/libavfilter/scene_sad.h
> > +++ b/libavfilter/scene_sad.h
> > @@ -41,4 +41,10 @@ ff_scene_sad_fn ff_scene_sad_get_fn_x86(int depth);
> >
> > ff_scene_sad_fn ff_scene_sad_get_fn(int depth);
> >
> > +ff_scene_sad_fn ff_scene_scrd_get_fn(int depth);
> > +
> > +int ff_init_cbrt(int bitdepth);
> > +
> > +void ff_uninit_cbrt(int bitdepth);
> > +
> > #endif /* AVFILTER_SCENE_SAD_H */
> > diff --git a/libavfilter/vf_scdet.c b/libavfilter/vf_scdet.c index
> > 15399cfebf..93da5837b3 100644
> > --- a/libavfilter/vf_scdet.c
> > +++ b/libavfilter/vf_scdet.c
> > @@ -31,6 +31,18 @@
> > #include "scene_sad.h"
> > #include "video.h"
> >
> > +enum SCDETMode {
> > + MODE_LEGACY = -1,
> > + MODE_LINEAR = 0,
> > + MODE_MEAN_CBRT = 1
> > +};
> > +
> > +typedef struct SCDETFrameInfo {
> > + AVFrame *picref;
> > + double mafd;
> > + double diff;
> > +} SCDETFrameInfo;
> > +
> > typedef struct SCDetContext {
> > const AVClass *class;
> >
> > @@ -39,11 +51,12 @@ typedef struct SCDetContext {
> > int nb_planes;
> > int bitdepth;
> > ff_scene_sad_fn sad;
> > - double prev_mafd;
> > - double scene_score;
> > - AVFrame *prev_picref;
> > + SCDETFrameInfo curr_frame;
> > + SCDETFrameInfo prev_frame;
> > +
> > double threshold;
> > int sc_pass;
> > + enum SCDETMode mode;
> > } SCDetContext;
> >
> > #define OFFSET(x) offsetof(SCDetContext, x) @@ -55,6 +68,7 @@ static
> > const AVOption scdet_options[] = {
> > { "t", "set scene change detect threshold",
> > OFFSET(threshold), AV_OPT_TYPE_DOUBLE, {.dbl = 10.}, 0, 100.,
V|F
> },
> > { "sc_pass", "Set the flag to pass scene change frames",
> > OFFSET(sc_pass), AV_OPT_TYPE_BOOL, {.dbl = 0 }, 0, 1,
V|F
> },
> > { "s", "Set the flag to pass scene change frames",
> > OFFSET(sc_pass), AV_OPT_TYPE_BOOL, {.dbl = 0 }, 0, 1,
V|F
> },
> > + { "mode", "scene change detection method",
> > OFFSET(mode), AV_OPT_TYPE_INT, {.i64 = MODE_LEGACY},
> > MODE_LEGACY,
> > MODE_MEAN_CBRT, V|F },
> > {NULL}
> > };
> >
> > @@ -91,7 +105,14 @@ static int config_input(AVFilterLink *inlink)
> > s->height[plane] = inlink->h >> ((plane == 1 || plane == 2) ?
> > desc->log2_chroma_h : 0);
> > }
> >
> > - s->sad = ff_scene_sad_get_fn(s->bitdepth == 8 ? 8 : 16);
> > + if (s->mode == MODE_LINEAR || s->mode == MODE_LEGACY)
> > + s->sad = ff_scene_sad_get_fn(s->bitdepth == 8 ? 8 : 16);
> > + else if (s->mode == MODE_MEAN_CBRT) {
> > + int ret = ff_init_cbrt(s->bitdepth);
> > + if (ret < 0)
> > + return ret;
> > + s->sad = ff_scene_scrd_get_fn(s->bitdepth);
> > + }
> > if (!s->sad)
> > return AVERROR(EINVAL);
> >
> > @@ -101,46 +122,97 @@ static int config_input(AVFilterLink *inlink)
> > static av_cold void uninit(AVFilterContext *ctx) {
> > SCDetContext *s = ctx->priv;
> > -
> > - av_frame_free(&s->prev_picref);
> > + if (s->mode == MODE_LEGACY)
> > + av_frame_free(&s->prev_frame.picref);
> > + if (s->mode == MODE_MEAN_CBRT)
> > + ff_uninit_cbrt(s->bitdepth);
> > }
> >
> > -static double get_scene_score(AVFilterContext *ctx, AVFrame *frame)
> > +static void compute_diff(AVFilterContext *ctx)
> > {
> > - double ret = 0;
> > SCDetContext *s = ctx->priv;
> > - AVFrame *prev_picref = s->prev_picref;
> > + AVFrame *prev_picref = s->prev_frame.picref;
> > + AVFrame *curr_picref = s->curr_frame.picref;
> >
> > - if (prev_picref && frame->height == prev_picref->height
> > - && frame->width == prev_picref->width) {
> > - uint64_t sad = 0;
> > - double mafd, diff;
> > - uint64_t count = 0;
> > + if (prev_picref && curr_picref
> > + && curr_picref->height == prev_picref->height
> > + && curr_picref->width == prev_picref->width) {
> >
> > + uint64_t sum = 0;
> > + uint64_t count = 0;
> > for (int plane = 0; plane < s->nb_planes; plane++) {
> > - uint64_t plane_sad;
> > + uint64_t plane_sum;
> > s->sad(prev_picref->data[plane],
> prev_picref->linesize[plane],
> > - frame->data[plane], frame->linesize[plane],
> > - s->width[plane], s->height[plane], &plane_sad);
> > - sad += plane_sad;
> > + curr_picref->data[plane],
> curr_picref->linesize[plane],
> > + s->width[plane], s->height[plane], &plane_sum);
> > + sum += plane_sum;
> > count += s->width[plane] * s->height[plane];
> > }
> >
> > - mafd = (double)sad * 100. / count / (1ULL << s->bitdepth);
> > - diff = fabs(mafd - s->prev_mafd);
> > - ret = av_clipf(FFMIN(mafd, diff), 0, 100.);
> > - s->prev_mafd = mafd;
> > - av_frame_free(&prev_picref);
> > + s->curr_frame.mafd = (double)sum * 100. / count / (1ULL <<
> > s->bitdepth);
> > + if (s->mode == MODE_LEGACY)
> > + s->curr_frame.diff = fabs(s->curr_frame.mafd -
> > s->prev_frame.mafd);
> > + else
> > + s->curr_frame.diff = s->curr_frame.mafd -
s->prev_frame.mafd;
> > + } else {
> > + s->curr_frame.mafd = 0;
> > + s->curr_frame.diff = 0;
> > }
> > - s->prev_picref = av_frame_clone(frame);
> > - return ret;
> > }
> >
> > -static int set_meta(SCDetContext *s, AVFrame *frame, const char *key,
> const
> > char *value)
> > +static int set_meta(AVFrame *frame, const char *key, const char
> > +*value)
> > {
> > return av_dict_set(&frame->metadata, key, value, 0); }
> >
> > +static int filter_frame(AVFilterContext *ctx, AVFrame *frame) {
> > + AVFilterLink *inlink = ctx->inputs[0];
> > + AVFilterLink *outlink = ctx->outputs[0];
> > + SCDetContext *s = ctx->priv;
> > +
> > + s->prev_frame = s->curr_frame;
> > + s->curr_frame.picref = frame;
> > +
> > + if ((s->mode != MODE_LEGACY && s->prev_frame.picref) || (s->mode
> > + ==
> > MODE_LEGACY && frame != NULL)) {
> > + compute_diff(ctx);
> > +
> > + if (s->mode == MODE_LEGACY) {
> > + av_frame_free(&s->prev_frame.picref);
> > + s->prev_frame = s->curr_frame;
> > + s->curr_frame.picref =
av_frame_clone(s->curr_frame.picref);
> > + } else if (s->prev_frame.diff < -s->curr_frame.diff) {
> > + s->prev_frame.diff = -s->curr_frame.diff;
> > + s->prev_frame.mafd = s->curr_frame.mafd;
> > + }
> > + double scene_score = av_clipf(s->mode == MODE_LEGACY ?
> > FFMIN(s->prev_frame.mafd, s->prev_frame.diff) :
> > FFMAX(s->prev_frame.diff, 0), 0, 100.);
> > +
> > + char buf[64];
> > + snprintf(buf, sizeof(buf), "%0.3f", s->prev_frame.mafd);
> > + set_meta(s->prev_frame.picref, "lavfi.scd.mafd", buf);
> > + snprintf(buf, sizeof(buf), "%0.3f", scene_score);
> > + set_meta(s->prev_frame.picref, "lavfi.scd.score", buf);
> > +
> > + if (scene_score >= s->threshold) {
> > + av_log(s, AV_LOG_INFO, "lavfi.scd.score: %.3f,
> lavfi.scd.time:
> > %s\n",
> > + scene_score, av_ts2timestr(s->prev_frame.picref->pts,
> > &inlink->time_base));
> > + set_meta(s->prev_frame.picref, "lavfi.scd.time",
> > + av_ts2timestr(s->prev_frame.picref->pts,
> > &inlink->time_base));
> > + }
> > +
> > + if (s->sc_pass) {
> > + if (scene_score >= s->threshold)
> > + return ff_filter_frame(outlink, s->prev_frame.picref);
> > + else
> > + av_frame_free(&s->prev_frame.picref);
> > + }
> > + else
> > + return ff_filter_frame(outlink, s->prev_frame.picref);
> > + }
> > +
> > + return 0;
> > +}
> > +
> > static int activate(AVFilterContext *ctx) {
> > int ret;
> > @@ -148,6 +220,8 @@ static int activate(AVFilterContext *ctx)
> > AVFilterLink *outlink = ctx->outputs[0];
> > SCDetContext *s = ctx->priv;
> > AVFrame *frame;
> > + int64_t pts;
> > + int status;
> >
> > FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
> >
> > @@ -155,31 +229,17 @@ static int activate(AVFilterContext *ctx)
> > if (ret < 0)
> > return ret;
> >
> > - if (frame) {
> > - char buf[64];
> > - s->scene_score = get_scene_score(ctx, frame);
> > - snprintf(buf, sizeof(buf), "%0.3f", s->prev_mafd);
> > - set_meta(s, frame, "lavfi.scd.mafd", buf);
> > - snprintf(buf, sizeof(buf), "%0.3f", s->scene_score);
> > - set_meta(s, frame, "lavfi.scd.score", buf);
> > + if (ret > 0)
> > + return filter_frame(ctx, frame);
> >
> > - if (s->scene_score >= s->threshold) {
> > - av_log(s, AV_LOG_INFO, "lavfi.scd.score: %.3f,
> lavfi.scd.time:
> > %s\n",
> > - s->scene_score, av_ts2timestr(frame->pts,
> > &inlink->time_base));
> > - set_meta(s, frame, "lavfi.scd.time",
> > - av_ts2timestr(frame->pts, &inlink->time_base));
> > - }
> > - if (s->sc_pass) {
> > - if (s->scene_score >= s->threshold)
> > - return ff_filter_frame(outlink, frame);
> > - else {
> > - av_frame_free(&frame);
> > - }
> > - } else
> > - return ff_filter_frame(outlink, frame);
> > + if (ff_inlink_acknowledge_status(inlink, &status, &pts)) {
> > + if (status == AVERROR_EOF)
> > + ret = filter_frame(ctx, NULL);
> > +
> > + ff_outlink_set_status(outlink, status, pts);
> > + return ret;
> > }
> >
> > - FF_FILTER_FORWARD_STATUS(inlink, outlink);
> > FF_FILTER_FORWARD_WANTED(outlink, inlink);
> >
> > return FFERROR_NOT_READY;
> > diff --git a/tests/fate/filter-video.mak b/tests/fate/filter-video.mak
> > index ee9f0f5e40..cff48e33d9 100644
> > --- a/tests/fate/filter-video.mak
> > +++ b/tests/fate/filter-video.mak
> > @@ -672,6 +672,9 @@ SCDET_DEPS = LAVFI_INDEV FILE_PROTOCOL
> > MOVIE_FILTER SCDET_FILTER SCALE_FILTER \ FATE_METADATA_FILTER-$(call
> > ALLYES, $(SCDET_DEPS)) += fate-filter-metadata-scdet
> > fate-filter-metadata-scdet: SRC =
> > $(TARGET_SAMPLES)/svq3/Vertical400kbit.sorenson3.mov
> > fate-filter-metadata-scdet: CMD = run $(FILTER_METADATA_COMMAND)
> > "sws_flags=+accurate_rnd+bitexact;movie='$(SRC)',scdet=s=1"
> > +FATE_METADATA_FILTER-$(call ALLYES, $(SCDET_DEPS)) +=
> > fate-filter-metadata-scdet1
> > +fate-filter-metadata-scdet1: SRC =
> > $(TARGET_SAMPLES)/svq3/Vertical400kbit.sorenson3.mov
> > +fate-filter-metadata-scdet1: CMD = run $(FILTER_METADATA_COMMAND)
> > "sws_flags=+accurate_rnd+bitexact;movie='$(SRC)',scdet=s=1:t=6.5:mode=1"
> >
> > CROPDETECT_DEPS = LAVFI_INDEV FILE_PROTOCOL MOVIE_FILTER
> MOVIE_FILTER
> > MESTIMATE_FILTER CROPDETECT_FILTER \
> > SCALE_FILTER MOV_DEMUXER H264_DECODER
> > --
> > 2.43.0.windows.1
> >
>
> So what's next? Is there anything else I should do?
>
Anybody?
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [FFmpeg-devel] [PATCH] area changed: scdet filter
2024-05-13 15:52 radu.taraibuta
@ 2024-05-19 16:05 ` radu.taraibuta
2024-05-28 7:51 ` radu.taraibuta
2024-05-30 21:31 ` Michael Niedermayer
1 sibling, 1 reply; 10+ messages in thread
From: radu.taraibuta @ 2024-05-19 16:05 UTC (permalink / raw)
To: ffmpeg-devel
> -----Original Message-----
> From: radu.taraibuta@gmail.com <radu.taraibuta@gmail.com>
> Sent: luni, 13 mai 2024 18:52
> To: ffmpeg-devel@ffmpeg.org
> Subject: [PATCH] area changed: scdet filter
>
> Previous observations:
>
> - Inconsistent code style with other filters. (Mostly using AVFilterLink*
> link instead of AVFilterLink *link).
> I hope it's fine now.
>
> - Unrelated changes, please split trivial unrelated changes into separate
> patches.
> Removed trivial changes from this patch.
>
> - Can't tables be generated at .init/.config_props time? No point in
> storing them into binary.
> Done.
>
> - Adding extra delay is not backward compatible change, it should be
> implemented properly by adding option for users to select mode: next &
prev
> frame or just next or prev frame.
> Added legacy option to the mode parameter.
>
> - Could split frame clone change into earlier separate patch.
> Cannot be done. It's either frame clone or 1 frame delay.
>
> - Where are results of improvements with accuracy so it can be confirmed?
> Here are my test results with manual labeling of scene changes:
> 2379 Full length movie
>
> Method Threshold TP FP FN Precision
> Recall F
> Cubic 7 2357 423 22 0.847841727 0.990752417
> 0.913742973
> Cubic 10 2297 200 82 0.919903885 0.965531736
> 0.94216571
> Cubic 12 2217 146 162 0.938214135 0.931904161
> 0.935048503
> Cubic 15 2049 101 330 0.953023256 0.861286255
> 0.904835505
> Linear 2.8 2357 1060 22 0.689786362
0.990752417
> 0.813319531
> Linear 8 2099 236 280 0.898929336
0.882303489
> 0.890538821
> Linear 10 1886 173 493 0.91597863
0.792770071
> 0.849932402
> Legacy 5 2235 1260 144 0.639484979
0.939470366
> 0.760980592
> Legacy 8 1998 414 381 0.828358209
0.839848676
> 0.83406387
> Legacy 10 1743 193 636 0.900309917
0.732660782
> 0.80787949
>
> 15 HDR10Plus_PB_EAC3JOC
> https://mega.nz/file/nehDka6Z#C5_OPbSZkONdOp1jRmc09C9-
> viDc3zMj8ZHruHcWKyA
>
> Method Threshold TP FP FN Precision
> Recall F
> Cubic 10 15 0 0 1 1 1
> Linear 5 13 1 2 0.928571429
0.866666667
> 0.896551724
> Legacy 5 12 2 3 0.857142857 0.8
> 0.827586207
>
> 21 (HDR HEVC 10-bit BT.2020 24fps) Exodus Sample
> https://mega.nz/file/Sfw1hDpK#ErxCOpQDVjcI1gq6ZbX3vIfdtXZompkFe0jq47E
> hR2o
>
> Method Threshold TP FP FN Precision
> Recall F
> Cubic 10 21 0 0 1 1 1
> Linear 4 20 0 1 1 0.952380952
> 0.975609756
> Legacy 4 19 0 2 1 0.904761905
0.95
>
> 94 Bieber Grammys
> https://mega.nz/#!c9dhAaKA!MG5Yi-
> MJNATE2_KqcnNJZCRKtTWvdjJP1NwG8Ggdw3E
>
> Method Threshold TP FP FN Precision
> Recall F
> Cubic 15 91 23 3 0.798245614 0.968085106
> 0.875
> Cubic 18 85 9 9 0.904255319 0.904255319
> 0.904255319
> Linear 7 79 49 15 0.6171875
0.840425532
> 0.711711712
> Linear 8 74 28 20 0.725490196
0.787234043
> 0.755102041
> Legacy 7 74 40 20 0.649122807
0.787234043
> 0.711538462
> Legacy 8 71 26 23 0.731958763
0.755319149
> 0.743455497
>
>
> Improve scene detection accuracy by comparing frame with both previous and
> next frame (creates one frame delay).
> Add new mode parameter and new method to compute the frame difference
> using
> cubic square to increase the weight of small changes and new mean formula.
> This improves accuracy significantly. Slightly improve performance by not
> using frame clone.
> Add legacy mode for backward compatibility.
>
> Signed-off-by: raduct <radu.taraibuta@gmail.com>
> ---
> doc/filters.texi | 16 ++++
> libavfilter/scene_sad.c | 151 ++++++++++++++++++++++++++++++++++
> libavfilter/scene_sad.h | 6 ++
> libavfilter/vf_scdet.c | 156 +++++++++++++++++++++++++-----------
> tests/fate/filter-video.mak | 3 +
> 5 files changed, 284 insertions(+), 48 deletions(-)
>
> diff --git a/doc/filters.texi b/doc/filters.texi
> index bfa8ccec8b..53814e003b 100644
> --- a/doc/filters.texi
> +++ b/doc/filters.texi
> @@ -21797,6 +21797,22 @@ Default value is @code{10.}.
> @item sc_pass, s
> Set the flag to pass scene change frames to the next filter. Default
value
> is @code{0}
> You can enable it if you want to get snapshot of scene change frames
only.
> +
> +@item mode
> +Set the scene change detection method. Default value is @code{-1}
> +Available values are:
> +
> +@table @samp
> +@item -1
> +Legacy mode for sum of absolute linear differences. Compare frame with
> previous only and no delay.
> +
> +@item 0
> +Sum of absolute linear differences. Compare frame with both previous and
> next which introduces a 1 frame delay.
> +
> +@item 1
> +Sum of mean of cubic root differences. Compare frame with both previous
> and
> next which introduces a 1 frame delay.
> +
> +@end table
> @end table
>
> @anchor{selectivecolor}
> diff --git a/libavfilter/scene_sad.c b/libavfilter/scene_sad.c
> index caf911eb5d..9b80d426bc 100644
> --- a/libavfilter/scene_sad.c
> +++ b/libavfilter/scene_sad.c
> @@ -21,6 +21,7 @@
> * Scene SAD functions
> */
>
> +#include "libavutil/thread.h"
> #include "scene_sad.h"
>
> void ff_scene_sad16_c(SCENE_SAD_PARAMS)
> @@ -71,3 +72,153 @@ ff_scene_sad_fn ff_scene_sad_get_fn(int depth)
> return sad;
> }
>
> +static AVMutex cbrt_mutex = AV_MUTEX_INITIALIZER;
> +static uint8_t *cbrt_table[16] = { NULL };
> +static int cbrt_table_ref[16] = { 0 };
> +
> +int ff_init_cbrt(int bitdepth)
> +{
> + if (bitdepth < 4 || bitdepth > 16)
> + return AVERROR(EINVAL);
> +
> + ff_mutex_lock(&cbrt_mutex);
> +
> + uint8_t *table = cbrt_table[bitdepth];
> + if (table) {
> + cbrt_table_ref[bitdepth]++;
> + goto end;
> + }
> +
> + table = av_malloc((1 << bitdepth) * (bitdepth > 8 ? 2 : 1));
> + if (!table)
> + goto end;
> + cbrt_table[bitdepth] = table;
> + cbrt_table_ref[bitdepth] = 1;
> +
> + int size = 1 << bitdepth;
> + double factor = pow(size - 1, 2. / 3.);
> + if (bitdepth <= 8) {
> + for (int i = 0; i < size; i++)
> + table[i] = round(factor * pow(i, 1. / 3.));
> + } else {
> + uint16_t *tablew = (uint16_t*)table;
> + for (int i = 0; i < size; i++)
> + tablew[i] = round(factor * pow(i, 1. / 3.));
> + }
> +
> +end:
> + ff_mutex_unlock(&cbrt_mutex);
> + return table != NULL;
> +}
> +
> +void ff_uninit_cbrt(int bitdepth)
> +{
> + if (bitdepth < 4 || bitdepth > 16)
> + return;
> + ff_mutex_lock(&cbrt_mutex);
> + if (!--cbrt_table_ref[bitdepth]) {
> + av_free(cbrt_table[bitdepth]);
> + cbrt_table[bitdepth] = NULL;
> + }
> + ff_mutex_unlock(&cbrt_mutex);
> +}
> +
> +void ff_scene_scrd_c(SCENE_SAD_PARAMS)
> +{
> + uint64_t scrdPlus = 0;
> + uint64_t scrdMinus = 0;
> + int x, y;
> +
> + uint8_t *table = cbrt_table[8];
> + if (!table) {
> + *sum = 0;
> + return;
> + }
> +
> + for (y = 0; y < height; y++) {
> + for (x = 0; x < width; x++)
> + if (src1[x] > src2[x])
> + scrdMinus += table[src1[x] - src2[x]];
> + else
> + scrdPlus += table[src2[x] - src1[x]];
> + src1 += stride1;
> + src2 += stride2;
> + }
> +
> + double mean = (sqrt(scrdPlus) + sqrt(scrdMinus)) / 2.0;
> + *sum = 2.0 * mean * mean;
> +}
> +
> +void ff_scene_scrd2B_c(SCENE_SAD_PARAMS, int bitdepth)
> +{
> + uint64_t scrdPlus = 0;
> + uint64_t scrdMinus = 0;
> + const uint16_t *src1w = (const uint16_t*)src1;
> + const uint16_t *src2w = (const uint16_t*)src2;
> + int x, y;
> +
> + uint16_t *table = (uint16_t*)cbrt_table[bitdepth];
> + if (!table) {
> + *sum = 0;
> + return;
> + }
> +
> + stride1 /= 2;
> + stride2 /= 2;
> +
> + for (y = 0; y < height; y++) {
> + for (x = 0; x < width; x++)
> + if (src1w[x] > src2w[x])
> + scrdMinus += table[src1w[x] - src2w[x]];
> + else
> + scrdPlus += table[src2w[x] - src1w[x]];
> + src1w += stride1;
> + src2w += stride2;
> + }
> +
> + double mean = (sqrt(scrdPlus) + sqrt(scrdMinus)) / 2.0;
> + *sum = 2.0 * mean * mean;
> +}
> +
> +void ff_scene_scrd9_c(SCENE_SAD_PARAMS)
> +{
> + ff_scene_scrd2B_c(src1, stride1, src2, stride2, width, height, sum,
9);
> +}
> +
> +void ff_scene_scrd10_c(SCENE_SAD_PARAMS)
> +{
> + ff_scene_scrd2B_c(src1, stride1, src2, stride2, width, height, sum,
> 10);
> +}
> +
> +void ff_scene_scrd12_c(SCENE_SAD_PARAMS)
> +{
> + ff_scene_scrd2B_c(src1, stride1, src2, stride2, width, height, sum,
> 12);
> +}
> +
> +void ff_scene_scrd14_c(SCENE_SAD_PARAMS)
> +{
> + ff_scene_scrd2B_c(src1, stride1, src2, stride2, width, height, sum,
> 14);
> +}
> +
> +void ff_scene_scrd16_c(SCENE_SAD_PARAMS)
> +{
> + ff_scene_scrd2B_c(src1, stride1, src2, stride2, width, height, sum,
> 16);
> +}
> +
> +ff_scene_sad_fn ff_scene_scrd_get_fn(int depth)
> +{
> + ff_scene_sad_fn scrd = NULL;
> + if (depth == 8)
> + scrd = ff_scene_scrd_c;
> + else if (depth == 9)
> + scrd = ff_scene_scrd9_c;
> + else if (depth == 10)
> + scrd = ff_scene_scrd10_c;
> + else if (depth == 12)
> + scrd = ff_scene_scrd12_c;
> + else if (depth == 14)
> + scrd = ff_scene_scrd14_c;
> + else if (depth == 16)
> + scrd = ff_scene_scrd16_c;
> + return scrd;
> +}
> diff --git a/libavfilter/scene_sad.h b/libavfilter/scene_sad.h
> index 173a051f2b..c294bd90f9 100644
> --- a/libavfilter/scene_sad.h
> +++ b/libavfilter/scene_sad.h
> @@ -41,4 +41,10 @@ ff_scene_sad_fn ff_scene_sad_get_fn_x86(int depth);
>
> ff_scene_sad_fn ff_scene_sad_get_fn(int depth);
>
> +ff_scene_sad_fn ff_scene_scrd_get_fn(int depth);
> +
> +int ff_init_cbrt(int bitdepth);
> +
> +void ff_uninit_cbrt(int bitdepth);
> +
> #endif /* AVFILTER_SCENE_SAD_H */
> diff --git a/libavfilter/vf_scdet.c b/libavfilter/vf_scdet.c
> index 15399cfebf..93da5837b3 100644
> --- a/libavfilter/vf_scdet.c
> +++ b/libavfilter/vf_scdet.c
> @@ -31,6 +31,18 @@
> #include "scene_sad.h"
> #include "video.h"
>
> +enum SCDETMode {
> + MODE_LEGACY = -1,
> + MODE_LINEAR = 0,
> + MODE_MEAN_CBRT = 1
> +};
> +
> +typedef struct SCDETFrameInfo {
> + AVFrame *picref;
> + double mafd;
> + double diff;
> +} SCDETFrameInfo;
> +
> typedef struct SCDetContext {
> const AVClass *class;
>
> @@ -39,11 +51,12 @@ typedef struct SCDetContext {
> int nb_planes;
> int bitdepth;
> ff_scene_sad_fn sad;
> - double prev_mafd;
> - double scene_score;
> - AVFrame *prev_picref;
> + SCDETFrameInfo curr_frame;
> + SCDETFrameInfo prev_frame;
> +
> double threshold;
> int sc_pass;
> + enum SCDETMode mode;
> } SCDetContext;
>
> #define OFFSET(x) offsetof(SCDetContext, x)
> @@ -55,6 +68,7 @@ static const AVOption scdet_options[] = {
> { "t", "set scene change detect threshold",
> OFFSET(threshold), AV_OPT_TYPE_DOUBLE, {.dbl = 10.}, 0, 100., V|F
},
> { "sc_pass", "Set the flag to pass scene change frames",
> OFFSET(sc_pass), AV_OPT_TYPE_BOOL, {.dbl = 0 }, 0, 1, V|F
},
> { "s", "Set the flag to pass scene change frames",
> OFFSET(sc_pass), AV_OPT_TYPE_BOOL, {.dbl = 0 }, 0, 1, V|F
},
> + { "mode", "scene change detection method",
> OFFSET(mode), AV_OPT_TYPE_INT, {.i64 = MODE_LEGACY},
> MODE_LEGACY,
> MODE_MEAN_CBRT, V|F },
> {NULL}
> };
>
> @@ -91,7 +105,14 @@ static int config_input(AVFilterLink *inlink)
> s->height[plane] = inlink->h >> ((plane == 1 || plane == 2) ?
> desc->log2_chroma_h : 0);
> }
>
> - s->sad = ff_scene_sad_get_fn(s->bitdepth == 8 ? 8 : 16);
> + if (s->mode == MODE_LINEAR || s->mode == MODE_LEGACY)
> + s->sad = ff_scene_sad_get_fn(s->bitdepth == 8 ? 8 : 16);
> + else if (s->mode == MODE_MEAN_CBRT) {
> + int ret = ff_init_cbrt(s->bitdepth);
> + if (ret < 0)
> + return ret;
> + s->sad = ff_scene_scrd_get_fn(s->bitdepth);
> + }
> if (!s->sad)
> return AVERROR(EINVAL);
>
> @@ -101,46 +122,97 @@ static int config_input(AVFilterLink *inlink)
> static av_cold void uninit(AVFilterContext *ctx)
> {
> SCDetContext *s = ctx->priv;
> -
> - av_frame_free(&s->prev_picref);
> + if (s->mode == MODE_LEGACY)
> + av_frame_free(&s->prev_frame.picref);
> + if (s->mode == MODE_MEAN_CBRT)
> + ff_uninit_cbrt(s->bitdepth);
> }
>
> -static double get_scene_score(AVFilterContext *ctx, AVFrame *frame)
> +static void compute_diff(AVFilterContext *ctx)
> {
> - double ret = 0;
> SCDetContext *s = ctx->priv;
> - AVFrame *prev_picref = s->prev_picref;
> + AVFrame *prev_picref = s->prev_frame.picref;
> + AVFrame *curr_picref = s->curr_frame.picref;
>
> - if (prev_picref && frame->height == prev_picref->height
> - && frame->width == prev_picref->width) {
> - uint64_t sad = 0;
> - double mafd, diff;
> - uint64_t count = 0;
> + if (prev_picref && curr_picref
> + && curr_picref->height == prev_picref->height
> + && curr_picref->width == prev_picref->width) {
>
> + uint64_t sum = 0;
> + uint64_t count = 0;
> for (int plane = 0; plane < s->nb_planes; plane++) {
> - uint64_t plane_sad;
> + uint64_t plane_sum;
> s->sad(prev_picref->data[plane],
prev_picref->linesize[plane],
> - frame->data[plane], frame->linesize[plane],
> - s->width[plane], s->height[plane], &plane_sad);
> - sad += plane_sad;
> + curr_picref->data[plane],
curr_picref->linesize[plane],
> + s->width[plane], s->height[plane], &plane_sum);
> + sum += plane_sum;
> count += s->width[plane] * s->height[plane];
> }
>
> - mafd = (double)sad * 100. / count / (1ULL << s->bitdepth);
> - diff = fabs(mafd - s->prev_mafd);
> - ret = av_clipf(FFMIN(mafd, diff), 0, 100.);
> - s->prev_mafd = mafd;
> - av_frame_free(&prev_picref);
> + s->curr_frame.mafd = (double)sum * 100. / count / (1ULL <<
> s->bitdepth);
> + if (s->mode == MODE_LEGACY)
> + s->curr_frame.diff = fabs(s->curr_frame.mafd -
> s->prev_frame.mafd);
> + else
> + s->curr_frame.diff = s->curr_frame.mafd - s->prev_frame.mafd;
> + } else {
> + s->curr_frame.mafd = 0;
> + s->curr_frame.diff = 0;
> }
> - s->prev_picref = av_frame_clone(frame);
> - return ret;
> }
>
> -static int set_meta(SCDetContext *s, AVFrame *frame, const char *key,
const
> char *value)
> +static int set_meta(AVFrame *frame, const char *key, const char *value)
> {
> return av_dict_set(&frame->metadata, key, value, 0);
> }
>
> +static int filter_frame(AVFilterContext *ctx, AVFrame *frame)
> +{
> + AVFilterLink *inlink = ctx->inputs[0];
> + AVFilterLink *outlink = ctx->outputs[0];
> + SCDetContext *s = ctx->priv;
> +
> + s->prev_frame = s->curr_frame;
> + s->curr_frame.picref = frame;
> +
> + if ((s->mode != MODE_LEGACY && s->prev_frame.picref) || (s->mode ==
> MODE_LEGACY && frame != NULL)) {
> + compute_diff(ctx);
> +
> + if (s->mode == MODE_LEGACY) {
> + av_frame_free(&s->prev_frame.picref);
> + s->prev_frame = s->curr_frame;
> + s->curr_frame.picref = av_frame_clone(s->curr_frame.picref);
> + } else if (s->prev_frame.diff < -s->curr_frame.diff) {
> + s->prev_frame.diff = -s->curr_frame.diff;
> + s->prev_frame.mafd = s->curr_frame.mafd;
> + }
> + double scene_score = av_clipf(s->mode == MODE_LEGACY ?
> FFMIN(s->prev_frame.mafd, s->prev_frame.diff) : FFMAX(s->prev_frame.diff,
> 0), 0, 100.);
> +
> + char buf[64];
> + snprintf(buf, sizeof(buf), "%0.3f", s->prev_frame.mafd);
> + set_meta(s->prev_frame.picref, "lavfi.scd.mafd", buf);
> + snprintf(buf, sizeof(buf), "%0.3f", scene_score);
> + set_meta(s->prev_frame.picref, "lavfi.scd.score", buf);
> +
> + if (scene_score >= s->threshold) {
> + av_log(s, AV_LOG_INFO, "lavfi.scd.score: %.3f,
lavfi.scd.time:
> %s\n",
> + scene_score, av_ts2timestr(s->prev_frame.picref->pts,
> &inlink->time_base));
> + set_meta(s->prev_frame.picref, "lavfi.scd.time",
> + av_ts2timestr(s->prev_frame.picref->pts,
> &inlink->time_base));
> + }
> +
> + if (s->sc_pass) {
> + if (scene_score >= s->threshold)
> + return ff_filter_frame(outlink, s->prev_frame.picref);
> + else
> + av_frame_free(&s->prev_frame.picref);
> + }
> + else
> + return ff_filter_frame(outlink, s->prev_frame.picref);
> + }
> +
> + return 0;
> +}
> +
> static int activate(AVFilterContext *ctx)
> {
> int ret;
> @@ -148,6 +220,8 @@ static int activate(AVFilterContext *ctx)
> AVFilterLink *outlink = ctx->outputs[0];
> SCDetContext *s = ctx->priv;
> AVFrame *frame;
> + int64_t pts;
> + int status;
>
> FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
>
> @@ -155,31 +229,17 @@ static int activate(AVFilterContext *ctx)
> if (ret < 0)
> return ret;
>
> - if (frame) {
> - char buf[64];
> - s->scene_score = get_scene_score(ctx, frame);
> - snprintf(buf, sizeof(buf), "%0.3f", s->prev_mafd);
> - set_meta(s, frame, "lavfi.scd.mafd", buf);
> - snprintf(buf, sizeof(buf), "%0.3f", s->scene_score);
> - set_meta(s, frame, "lavfi.scd.score", buf);
> + if (ret > 0)
> + return filter_frame(ctx, frame);
>
> - if (s->scene_score >= s->threshold) {
> - av_log(s, AV_LOG_INFO, "lavfi.scd.score: %.3f,
lavfi.scd.time:
> %s\n",
> - s->scene_score, av_ts2timestr(frame->pts,
> &inlink->time_base));
> - set_meta(s, frame, "lavfi.scd.time",
> - av_ts2timestr(frame->pts, &inlink->time_base));
> - }
> - if (s->sc_pass) {
> - if (s->scene_score >= s->threshold)
> - return ff_filter_frame(outlink, frame);
> - else {
> - av_frame_free(&frame);
> - }
> - } else
> - return ff_filter_frame(outlink, frame);
> + if (ff_inlink_acknowledge_status(inlink, &status, &pts)) {
> + if (status == AVERROR_EOF)
> + ret = filter_frame(ctx, NULL);
> +
> + ff_outlink_set_status(outlink, status, pts);
> + return ret;
> }
>
> - FF_FILTER_FORWARD_STATUS(inlink, outlink);
> FF_FILTER_FORWARD_WANTED(outlink, inlink);
>
> return FFERROR_NOT_READY;
> diff --git a/tests/fate/filter-video.mak b/tests/fate/filter-video.mak
> index ee9f0f5e40..cff48e33d9 100644
> --- a/tests/fate/filter-video.mak
> +++ b/tests/fate/filter-video.mak
> @@ -672,6 +672,9 @@ SCDET_DEPS = LAVFI_INDEV FILE_PROTOCOL
> MOVIE_FILTER
> SCDET_FILTER SCALE_FILTER \
> FATE_METADATA_FILTER-$(call ALLYES, $(SCDET_DEPS)) +=
> fate-filter-metadata-scdet
> fate-filter-metadata-scdet: SRC =
> $(TARGET_SAMPLES)/svq3/Vertical400kbit.sorenson3.mov
> fate-filter-metadata-scdet: CMD = run $(FILTER_METADATA_COMMAND)
> "sws_flags=+accurate_rnd+bitexact;movie='$(SRC)',scdet=s=1"
> +FATE_METADATA_FILTER-$(call ALLYES, $(SCDET_DEPS)) +=
> fate-filter-metadata-scdet1
> +fate-filter-metadata-scdet1: SRC =
> $(TARGET_SAMPLES)/svq3/Vertical400kbit.sorenson3.mov
> +fate-filter-metadata-scdet1: CMD = run $(FILTER_METADATA_COMMAND)
> "sws_flags=+accurate_rnd+bitexact;movie='$(SRC)',scdet=s=1:t=6.5:mode=1"
>
> CROPDETECT_DEPS = LAVFI_INDEV FILE_PROTOCOL MOVIE_FILTER
> MOVIE_FILTER
> MESTIMATE_FILTER CROPDETECT_FILTER \
> SCALE_FILTER MOV_DEMUXER H264_DECODER
> --
> 2.43.0.windows.1
>
So what's next? Is there anything else I should do?
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 10+ messages in thread
* [FFmpeg-devel] [PATCH] area changed: scdet filter
@ 2024-05-13 15:52 radu.taraibuta
2024-05-19 16:05 ` radu.taraibuta
2024-05-30 21:31 ` Michael Niedermayer
0 siblings, 2 replies; 10+ messages in thread
From: radu.taraibuta @ 2024-05-13 15:52 UTC (permalink / raw)
To: ffmpeg-devel
Previous observations:
- Inconsistent code style with other filters. (Mostly using AVFilterLink*
link instead of AVFilterLink *link).
I hope it's fine now.
- Unrelated changes, please split trivial unrelated changes into separate
patches.
Removed trivial changes from this patch.
- Can't tables be generated at .init/.config_props time? No point in
storing them into binary.
Done.
- Adding extra delay is not backward compatible change, it should be
implemented properly by adding option for users to select mode: next & prev
frame or just next or prev frame.
Added legacy option to the mode parameter.
- Could split frame clone change into earlier separate patch.
Cannot be done. It's either frame clone or 1 frame delay.
- Where are results of improvements with accuracy so it can be confirmed?
Here are my test results with manual labeling of scene changes:
2379 Full length movie
Method Threshold TP FP FN Precision
Recall F
Cubic 7 2357 423 22 0.847841727 0.990752417
0.913742973
Cubic 10 2297 200 82 0.919903885 0.965531736
0.94216571
Cubic 12 2217 146 162 0.938214135 0.931904161
0.935048503
Cubic 15 2049 101 330 0.953023256 0.861286255
0.904835505
Linear 2.8 2357 1060 22 0.689786362 0.990752417
0.813319531
Linear 8 2099 236 280 0.898929336 0.882303489
0.890538821
Linear 10 1886 173 493 0.91597863 0.792770071
0.849932402
Legacy 5 2235 1260 144 0.639484979 0.939470366
0.760980592
Legacy 8 1998 414 381 0.828358209 0.839848676
0.83406387
Legacy 10 1743 193 636 0.900309917 0.732660782
0.80787949
15 HDR10Plus_PB_EAC3JOC
https://mega.nz/file/nehDka6Z#C5_OPbSZkONdOp1jRmc09C9-viDc3zMj8ZHruHcWKyA
Method Threshold TP FP FN Precision
Recall F
Cubic 10 15 0 0 1 1 1
Linear 5 13 1 2 0.928571429 0.866666667
0.896551724
Legacy 5 12 2 3 0.857142857 0.8
0.827586207
21 (HDR HEVC 10-bit BT.2020 24fps) Exodus Sample
https://mega.nz/file/Sfw1hDpK#ErxCOpQDVjcI1gq6ZbX3vIfdtXZompkFe0jq47EhR2o
Method Threshold TP FP FN Precision
Recall F
Cubic 10 21 0 0 1 1 1
Linear 4 20 0 1 1 0.952380952
0.975609756
Legacy 4 19 0 2 1 0.904761905 0.95
94 Bieber Grammys
https://mega.nz/#!c9dhAaKA!MG5Yi-MJNATE2_KqcnNJZCRKtTWvdjJP1NwG8Ggdw3E
Method Threshold TP FP FN Precision
Recall F
Cubic 15 91 23 3 0.798245614 0.968085106
0.875
Cubic 18 85 9 9 0.904255319 0.904255319
0.904255319
Linear 7 79 49 15 0.6171875 0.840425532
0.711711712
Linear 8 74 28 20 0.725490196 0.787234043
0.755102041
Legacy 7 74 40 20 0.649122807 0.787234043
0.711538462
Legacy 8 71 26 23 0.731958763 0.755319149
0.743455497
Improve scene detection accuracy by comparing frame with both previous and
next frame (creates one frame delay).
Add new mode parameter and new method to compute the frame difference using
cubic square to increase the weight of small changes and new mean formula.
This improves accuracy significantly. Slightly improve performance by not
using frame clone.
Add legacy mode for backward compatibility.
Signed-off-by: raduct <radu.taraibuta@gmail.com>
---
doc/filters.texi | 16 ++++
libavfilter/scene_sad.c | 151 ++++++++++++++++++++++++++++++++++
libavfilter/scene_sad.h | 6 ++
libavfilter/vf_scdet.c | 156 +++++++++++++++++++++++++-----------
tests/fate/filter-video.mak | 3 +
5 files changed, 284 insertions(+), 48 deletions(-)
diff --git a/doc/filters.texi b/doc/filters.texi
index bfa8ccec8b..53814e003b 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -21797,6 +21797,22 @@ Default value is @code{10.}.
@item sc_pass, s
Set the flag to pass scene change frames to the next filter. Default value
is @code{0}
You can enable it if you want to get snapshot of scene change frames only.
+
+@item mode
+Set the scene change detection method. Default value is @code{-1}
+Available values are:
+
+@table @samp
+@item -1
+Legacy mode for sum of absolute linear differences. Compare frame with
previous only and no delay.
+
+@item 0
+Sum of absolute linear differences. Compare frame with both previous and
next which introduces a 1 frame delay.
+
+@item 1
+Sum of mean of cubic root differences. Compare frame with both previous and
next which introduces a 1 frame delay.
+
+@end table
@end table
@anchor{selectivecolor}
diff --git a/libavfilter/scene_sad.c b/libavfilter/scene_sad.c
index caf911eb5d..9b80d426bc 100644
--- a/libavfilter/scene_sad.c
+++ b/libavfilter/scene_sad.c
@@ -21,6 +21,7 @@
* Scene SAD functions
*/
+#include "libavutil/thread.h"
#include "scene_sad.h"
void ff_scene_sad16_c(SCENE_SAD_PARAMS)
@@ -71,3 +72,153 @@ ff_scene_sad_fn ff_scene_sad_get_fn(int depth)
return sad;
}
+static AVMutex cbrt_mutex = AV_MUTEX_INITIALIZER;
+static uint8_t *cbrt_table[16] = { NULL };
+static int cbrt_table_ref[16] = { 0 };
+
+int ff_init_cbrt(int bitdepth)
+{
+ if (bitdepth < 4 || bitdepth > 16)
+ return AVERROR(EINVAL);
+
+ ff_mutex_lock(&cbrt_mutex);
+
+ uint8_t *table = cbrt_table[bitdepth];
+ if (table) {
+ cbrt_table_ref[bitdepth]++;
+ goto end;
+ }
+
+ table = av_malloc((1 << bitdepth) * (bitdepth > 8 ? 2 : 1));
+ if (!table)
+ goto end;
+ cbrt_table[bitdepth] = table;
+ cbrt_table_ref[bitdepth] = 1;
+
+ int size = 1 << bitdepth;
+ double factor = pow(size - 1, 2. / 3.);
+ if (bitdepth <= 8) {
+ for (int i = 0; i < size; i++)
+ table[i] = round(factor * pow(i, 1. / 3.));
+ } else {
+ uint16_t *tablew = (uint16_t*)table;
+ for (int i = 0; i < size; i++)
+ tablew[i] = round(factor * pow(i, 1. / 3.));
+ }
+
+end:
+ ff_mutex_unlock(&cbrt_mutex);
+ return table != NULL;
+}
+
+void ff_uninit_cbrt(int bitdepth)
+{
+ if (bitdepth < 4 || bitdepth > 16)
+ return;
+ ff_mutex_lock(&cbrt_mutex);
+ if (!--cbrt_table_ref[bitdepth]) {
+ av_free(cbrt_table[bitdepth]);
+ cbrt_table[bitdepth] = NULL;
+ }
+ ff_mutex_unlock(&cbrt_mutex);
+}
+
+void ff_scene_scrd_c(SCENE_SAD_PARAMS)
+{
+ uint64_t scrdPlus = 0;
+ uint64_t scrdMinus = 0;
+ int x, y;
+
+ uint8_t *table = cbrt_table[8];
+ if (!table) {
+ *sum = 0;
+ return;
+ }
+
+ for (y = 0; y < height; y++) {
+ for (x = 0; x < width; x++)
+ if (src1[x] > src2[x])
+ scrdMinus += table[src1[x] - src2[x]];
+ else
+ scrdPlus += table[src2[x] - src1[x]];
+ src1 += stride1;
+ src2 += stride2;
+ }
+
+ double mean = (sqrt(scrdPlus) + sqrt(scrdMinus)) / 2.0;
+ *sum = 2.0 * mean * mean;
+}
+
+void ff_scene_scrd2B_c(SCENE_SAD_PARAMS, int bitdepth)
+{
+ uint64_t scrdPlus = 0;
+ uint64_t scrdMinus = 0;
+ const uint16_t *src1w = (const uint16_t*)src1;
+ const uint16_t *src2w = (const uint16_t*)src2;
+ int x, y;
+
+ uint16_t *table = (uint16_t*)cbrt_table[bitdepth];
+ if (!table) {
+ *sum = 0;
+ return;
+ }
+
+ stride1 /= 2;
+ stride2 /= 2;
+
+ for (y = 0; y < height; y++) {
+ for (x = 0; x < width; x++)
+ if (src1w[x] > src2w[x])
+ scrdMinus += table[src1w[x] - src2w[x]];
+ else
+ scrdPlus += table[src2w[x] - src1w[x]];
+ src1w += stride1;
+ src2w += stride2;
+ }
+
+ double mean = (sqrt(scrdPlus) + sqrt(scrdMinus)) / 2.0;
+ *sum = 2.0 * mean * mean;
+}
+
+void ff_scene_scrd9_c(SCENE_SAD_PARAMS)
+{
+ ff_scene_scrd2B_c(src1, stride1, src2, stride2, width, height, sum, 9);
+}
+
+void ff_scene_scrd10_c(SCENE_SAD_PARAMS)
+{
+ ff_scene_scrd2B_c(src1, stride1, src2, stride2, width, height, sum,
10);
+}
+
+void ff_scene_scrd12_c(SCENE_SAD_PARAMS)
+{
+ ff_scene_scrd2B_c(src1, stride1, src2, stride2, width, height, sum,
12);
+}
+
+void ff_scene_scrd14_c(SCENE_SAD_PARAMS)
+{
+ ff_scene_scrd2B_c(src1, stride1, src2, stride2, width, height, sum,
14);
+}
+
+void ff_scene_scrd16_c(SCENE_SAD_PARAMS)
+{
+ ff_scene_scrd2B_c(src1, stride1, src2, stride2, width, height, sum,
16);
+}
+
+ff_scene_sad_fn ff_scene_scrd_get_fn(int depth)
+{
+ ff_scene_sad_fn scrd = NULL;
+ if (depth == 8)
+ scrd = ff_scene_scrd_c;
+ else if (depth == 9)
+ scrd = ff_scene_scrd9_c;
+ else if (depth == 10)
+ scrd = ff_scene_scrd10_c;
+ else if (depth == 12)
+ scrd = ff_scene_scrd12_c;
+ else if (depth == 14)
+ scrd = ff_scene_scrd14_c;
+ else if (depth == 16)
+ scrd = ff_scene_scrd16_c;
+ return scrd;
+}
diff --git a/libavfilter/scene_sad.h b/libavfilter/scene_sad.h
index 173a051f2b..c294bd90f9 100644
--- a/libavfilter/scene_sad.h
+++ b/libavfilter/scene_sad.h
@@ -41,4 +41,10 @@ ff_scene_sad_fn ff_scene_sad_get_fn_x86(int depth);
ff_scene_sad_fn ff_scene_sad_get_fn(int depth);
+ff_scene_sad_fn ff_scene_scrd_get_fn(int depth);
+
+int ff_init_cbrt(int bitdepth);
+
+void ff_uninit_cbrt(int bitdepth);
+
#endif /* AVFILTER_SCENE_SAD_H */
diff --git a/libavfilter/vf_scdet.c b/libavfilter/vf_scdet.c
index 15399cfebf..93da5837b3 100644
--- a/libavfilter/vf_scdet.c
+++ b/libavfilter/vf_scdet.c
@@ -31,6 +31,18 @@
#include "scene_sad.h"
#include "video.h"
+enum SCDETMode {
+ MODE_LEGACY = -1,
+ MODE_LINEAR = 0,
+ MODE_MEAN_CBRT = 1
+};
+
+typedef struct SCDETFrameInfo {
+ AVFrame *picref;
+ double mafd;
+ double diff;
+} SCDETFrameInfo;
+
typedef struct SCDetContext {
const AVClass *class;
@@ -39,11 +51,12 @@ typedef struct SCDetContext {
int nb_planes;
int bitdepth;
ff_scene_sad_fn sad;
- double prev_mafd;
- double scene_score;
- AVFrame *prev_picref;
+ SCDETFrameInfo curr_frame;
+ SCDETFrameInfo prev_frame;
+
double threshold;
int sc_pass;
+ enum SCDETMode mode;
} SCDetContext;
#define OFFSET(x) offsetof(SCDetContext, x)
@@ -55,6 +68,7 @@ static const AVOption scdet_options[] = {
{ "t", "set scene change detect threshold",
OFFSET(threshold), AV_OPT_TYPE_DOUBLE, {.dbl = 10.}, 0, 100., V|F },
{ "sc_pass", "Set the flag to pass scene change frames",
OFFSET(sc_pass), AV_OPT_TYPE_BOOL, {.dbl = 0 }, 0, 1, V|F },
{ "s", "Set the flag to pass scene change frames",
OFFSET(sc_pass), AV_OPT_TYPE_BOOL, {.dbl = 0 }, 0, 1, V|F },
+ { "mode", "scene change detection method",
OFFSET(mode), AV_OPT_TYPE_INT, {.i64 = MODE_LEGACY}, MODE_LEGACY,
MODE_MEAN_CBRT, V|F },
{NULL}
};
@@ -91,7 +105,14 @@ static int config_input(AVFilterLink *inlink)
s->height[plane] = inlink->h >> ((plane == 1 || plane == 2) ?
desc->log2_chroma_h : 0);
}
- s->sad = ff_scene_sad_get_fn(s->bitdepth == 8 ? 8 : 16);
+ if (s->mode == MODE_LINEAR || s->mode == MODE_LEGACY)
+ s->sad = ff_scene_sad_get_fn(s->bitdepth == 8 ? 8 : 16);
+ else if (s->mode == MODE_MEAN_CBRT) {
+ int ret = ff_init_cbrt(s->bitdepth);
+ if (ret < 0)
+ return ret;
+ s->sad = ff_scene_scrd_get_fn(s->bitdepth);
+ }
if (!s->sad)
return AVERROR(EINVAL);
@@ -101,46 +122,97 @@ static int config_input(AVFilterLink *inlink)
static av_cold void uninit(AVFilterContext *ctx)
{
SCDetContext *s = ctx->priv;
-
- av_frame_free(&s->prev_picref);
+ if (s->mode == MODE_LEGACY)
+ av_frame_free(&s->prev_frame.picref);
+ if (s->mode == MODE_MEAN_CBRT)
+ ff_uninit_cbrt(s->bitdepth);
}
-static double get_scene_score(AVFilterContext *ctx, AVFrame *frame)
+static void compute_diff(AVFilterContext *ctx)
{
- double ret = 0;
SCDetContext *s = ctx->priv;
- AVFrame *prev_picref = s->prev_picref;
+ AVFrame *prev_picref = s->prev_frame.picref;
+ AVFrame *curr_picref = s->curr_frame.picref;
- if (prev_picref && frame->height == prev_picref->height
- && frame->width == prev_picref->width) {
- uint64_t sad = 0;
- double mafd, diff;
- uint64_t count = 0;
+ if (prev_picref && curr_picref
+ && curr_picref->height == prev_picref->height
+ && curr_picref->width == prev_picref->width) {
+ uint64_t sum = 0;
+ uint64_t count = 0;
for (int plane = 0; plane < s->nb_planes; plane++) {
- uint64_t plane_sad;
+ uint64_t plane_sum;
s->sad(prev_picref->data[plane], prev_picref->linesize[plane],
- frame->data[plane], frame->linesize[plane],
- s->width[plane], s->height[plane], &plane_sad);
- sad += plane_sad;
+ curr_picref->data[plane], curr_picref->linesize[plane],
+ s->width[plane], s->height[plane], &plane_sum);
+ sum += plane_sum;
count += s->width[plane] * s->height[plane];
}
- mafd = (double)sad * 100. / count / (1ULL << s->bitdepth);
- diff = fabs(mafd - s->prev_mafd);
- ret = av_clipf(FFMIN(mafd, diff), 0, 100.);
- s->prev_mafd = mafd;
- av_frame_free(&prev_picref);
+ s->curr_frame.mafd = (double)sum * 100. / count / (1ULL <<
s->bitdepth);
+ if (s->mode == MODE_LEGACY)
+ s->curr_frame.diff = fabs(s->curr_frame.mafd -
s->prev_frame.mafd);
+ else
+ s->curr_frame.diff = s->curr_frame.mafd - s->prev_frame.mafd;
+ } else {
+ s->curr_frame.mafd = 0;
+ s->curr_frame.diff = 0;
}
- s->prev_picref = av_frame_clone(frame);
- return ret;
}
-static int set_meta(SCDetContext *s, AVFrame *frame, const char *key, const
char *value)
+static int set_meta(AVFrame *frame, const char *key, const char *value)
{
return av_dict_set(&frame->metadata, key, value, 0);
}
+static int filter_frame(AVFilterContext *ctx, AVFrame *frame)
+{
+ AVFilterLink *inlink = ctx->inputs[0];
+ AVFilterLink *outlink = ctx->outputs[0];
+ SCDetContext *s = ctx->priv;
+
+ s->prev_frame = s->curr_frame;
+ s->curr_frame.picref = frame;
+
+ if ((s->mode != MODE_LEGACY && s->prev_frame.picref) || (s->mode ==
MODE_LEGACY && frame != NULL)) {
+ compute_diff(ctx);
+
+ if (s->mode == MODE_LEGACY) {
+ av_frame_free(&s->prev_frame.picref);
+ s->prev_frame = s->curr_frame;
+ s->curr_frame.picref = av_frame_clone(s->curr_frame.picref);
+ } else if (s->prev_frame.diff < -s->curr_frame.diff) {
+ s->prev_frame.diff = -s->curr_frame.diff;
+ s->prev_frame.mafd = s->curr_frame.mafd;
+ }
+ double scene_score = av_clipf(s->mode == MODE_LEGACY ?
FFMIN(s->prev_frame.mafd, s->prev_frame.diff) : FFMAX(s->prev_frame.diff,
0), 0, 100.);
+
+ char buf[64];
+ snprintf(buf, sizeof(buf), "%0.3f", s->prev_frame.mafd);
+ set_meta(s->prev_frame.picref, "lavfi.scd.mafd", buf);
+ snprintf(buf, sizeof(buf), "%0.3f", scene_score);
+ set_meta(s->prev_frame.picref, "lavfi.scd.score", buf);
+
+ if (scene_score >= s->threshold) {
+ av_log(s, AV_LOG_INFO, "lavfi.scd.score: %.3f, lavfi.scd.time:
%s\n",
+ scene_score, av_ts2timestr(s->prev_frame.picref->pts,
&inlink->time_base));
+ set_meta(s->prev_frame.picref, "lavfi.scd.time",
+ av_ts2timestr(s->prev_frame.picref->pts,
&inlink->time_base));
+ }
+
+ if (s->sc_pass) {
+ if (scene_score >= s->threshold)
+ return ff_filter_frame(outlink, s->prev_frame.picref);
+ else
+ av_frame_free(&s->prev_frame.picref);
+ }
+ else
+ return ff_filter_frame(outlink, s->prev_frame.picref);
+ }
+
+ return 0;
+}
+
static int activate(AVFilterContext *ctx)
{
int ret;
@@ -148,6 +220,8 @@ static int activate(AVFilterContext *ctx)
AVFilterLink *outlink = ctx->outputs[0];
SCDetContext *s = ctx->priv;
AVFrame *frame;
+ int64_t pts;
+ int status;
FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
@@ -155,31 +229,17 @@ static int activate(AVFilterContext *ctx)
if (ret < 0)
return ret;
- if (frame) {
- char buf[64];
- s->scene_score = get_scene_score(ctx, frame);
- snprintf(buf, sizeof(buf), "%0.3f", s->prev_mafd);
- set_meta(s, frame, "lavfi.scd.mafd", buf);
- snprintf(buf, sizeof(buf), "%0.3f", s->scene_score);
- set_meta(s, frame, "lavfi.scd.score", buf);
+ if (ret > 0)
+ return filter_frame(ctx, frame);
- if (s->scene_score >= s->threshold) {
- av_log(s, AV_LOG_INFO, "lavfi.scd.score: %.3f, lavfi.scd.time:
%s\n",
- s->scene_score, av_ts2timestr(frame->pts,
&inlink->time_base));
- set_meta(s, frame, "lavfi.scd.time",
- av_ts2timestr(frame->pts, &inlink->time_base));
- }
- if (s->sc_pass) {
- if (s->scene_score >= s->threshold)
- return ff_filter_frame(outlink, frame);
- else {
- av_frame_free(&frame);
- }
- } else
- return ff_filter_frame(outlink, frame);
+ if (ff_inlink_acknowledge_status(inlink, &status, &pts)) {
+ if (status == AVERROR_EOF)
+ ret = filter_frame(ctx, NULL);
+
+ ff_outlink_set_status(outlink, status, pts);
+ return ret;
}
- FF_FILTER_FORWARD_STATUS(inlink, outlink);
FF_FILTER_FORWARD_WANTED(outlink, inlink);
return FFERROR_NOT_READY;
diff --git a/tests/fate/filter-video.mak b/tests/fate/filter-video.mak
index ee9f0f5e40..cff48e33d9 100644
--- a/tests/fate/filter-video.mak
+++ b/tests/fate/filter-video.mak
@@ -672,6 +672,9 @@ SCDET_DEPS = LAVFI_INDEV FILE_PROTOCOL MOVIE_FILTER
SCDET_FILTER SCALE_FILTER \
FATE_METADATA_FILTER-$(call ALLYES, $(SCDET_DEPS)) +=
fate-filter-metadata-scdet
fate-filter-metadata-scdet: SRC =
$(TARGET_SAMPLES)/svq3/Vertical400kbit.sorenson3.mov
fate-filter-metadata-scdet: CMD = run $(FILTER_METADATA_COMMAND)
"sws_flags=+accurate_rnd+bitexact;movie='$(SRC)',scdet=s=1"
+FATE_METADATA_FILTER-$(call ALLYES, $(SCDET_DEPS)) +=
fate-filter-metadata-scdet1
+fate-filter-metadata-scdet1: SRC =
$(TARGET_SAMPLES)/svq3/Vertical400kbit.sorenson3.mov
+fate-filter-metadata-scdet1: CMD = run $(FILTER_METADATA_COMMAND)
"sws_flags=+accurate_rnd+bitexact;movie='$(SRC)',scdet=s=1:t=6.5:mode=1"
CROPDETECT_DEPS = LAVFI_INDEV FILE_PROTOCOL MOVIE_FILTER MOVIE_FILTER
MESTIMATE_FILTER CROPDETECT_FILTER \
SCALE_FILTER MOV_DEMUXER H264_DECODER
--
2.43.0.windows.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 10+ messages in thread
* [FFmpeg-devel] [PATCH] area changed: scdet filter
@ 2024-05-12 11:04 raduct
0 siblings, 0 replies; 10+ messages in thread
From: raduct @ 2024-05-12 11:04 UTC (permalink / raw)
To: ffmpeg-devel
Improve scene detection accuracy by comparing frame with both previous and
next frame (creates one frame delay).
Add new mode parameter and new method to compute the frame difference using
cubic square to increase the weight of small changes and new mean formula.
This improves accuracy significantly.
Slightly improve performance by not using frame clone.
Signed-off-by: raduct <radu.taraibuta@gmail.com>
---
doc/filters.texi | 13 +++
libavfilter/scene_sad.c | 167 +++++++++++++++++++++++++++++++++++-
libavfilter/scene_sad.h | 2 +
libavfilter/vf_scdet.c | 150 ++++++++++++++++++++------------
tests/fate/filter-video.mak | 3 +
5 files changed, 281 insertions(+), 54 deletions(-)
diff --git a/doc/filters.texi b/doc/filters.texi
index bfa8ccec8b..de83a5e322 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -21797,6 +21797,19 @@ Default value is @code{10.}.
@item sc_pass, s
Set the flag to pass scene change frames to the next filter. Default value
is @code{0}
You can enable it if you want to get snapshot of scene change frames only.
+
+@item mode
+Set the scene change detection method. Default value is @code{0}
+Available values are:
+
+@table @samp
+@item 0
+Regular sum of absolute linear differences.
+
+@item 1
+Sum of mean of cubic root differences.
+
+@end table
@end table
@anchor{selectivecolor}
diff --git a/libavfilter/scene_sad.c b/libavfilter/scene_sad.c
index caf911eb5d..5280e356cc 100644
--- a/libavfilter/scene_sad.c
+++ b/libavfilter/scene_sad.c
@@ -65,9 +65,174 @@ ff_scene_sad_fn ff_scene_sad_get_fn(int depth)
if (!sad) {
if (depth == 8)
sad = ff_scene_sad_c;
- if (depth == 16)
+ else if (depth == 16)
sad = ff_scene_sad16_c;
}
return sad;
}
+/*
+* Lookup table for 40.25*pow(i,1/3) - a.k.a cubic root extended to 0 - 255
interval
+* Increase the weight of small differences compared to linear
+*/
+static const uint8_t cbrtTable[256] = {
+0, 40, 51, 58, 64, 69, 73, 77, 81, 84, 87, 90, 92, 95, 97,
99,
+101, 103, 105, 107, 109, 111, 113, 114, 116, 118, 119, 121, 122, 124, 125,
126,
+128, 129, 130, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144,
145,
+146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 158, 159,
160,
+161, 162, 163, 163, 164, 165, 166, 167, 167, 168, 169, 170, 170, 171, 172,
173,
+173, 174, 175, 176, 176, 177, 178, 178, 179, 180, 180, 181, 182, 182, 183,
184,
+184, 185, 186, 186, 187, 187, 188, 189, 189, 190, 190, 191, 192, 192, 193,
193,
+194, 195, 195, 196, 196, 197, 197, 198, 199, 199, 200, 200, 201, 201, 202,
202,
+203, 203, 204, 204, 205, 205, 206, 206, 207, 207, 208, 208, 209, 209, 210,
210,
+211, 211, 212, 212, 213, 213, 214, 214, 215, 215, 216, 216, 217, 217, 218,
218,
+219, 219, 219, 220, 220, 221, 221, 222, 222, 223, 223, 223, 224, 224, 225,
225,
+226, 226, 226, 227, 227, 228, 228, 229, 229, 229, 230, 230, 231, 231, 231,
232,
+232, 233, 233, 233, 234, 234, 235, 235, 235, 236, 236, 237, 237, 237, 238,
238,
+238, 239, 239, 240, 240, 240, 241, 241, 242, 242, 242, 243, 243, 243, 244,
244,
+244, 245, 245, 246, 246, 246, 247, 247, 247, 248, 248, 248, 249, 249, 249,
250,
+250, 250, 251, 251, 252, 252, 252, 253, 253, 253, 254, 254, 254, 255, 255,
255 };
+
+/*
+* Lookup table for 101.52*pow(i,1/3) - a.k.a cubic root extended to 0 -
1023 interval
+* Increase the weight of small differences compared to linear
+*/
+static const uint16_t cbrtTable10[1024] = {
+ 0, 102, 128, 146, 161, 174, 184, 194, 203, 211, 219, 226, 232, 239, 245,
250, 256, 261, 266, 271, 276, 280, 284, 289, 293, 297, 301, 305, 308, 312,
315, 319,
+322, 326, 329, 332, 335, 338, 341, 344, 347, 350, 353, 356, 358, 361, 364,
366, 369, 371, 374, 376, 379, 381, 384, 386, 388, 391, 393, 395, 397, 400,
402, 404,
+406, 408, 410, 412, 414, 416, 418, 420, 422, 424, 426, 428, 430, 432, 434,
436, 437, 439, 441, 443, 445, 446, 448, 450, 452, 453, 455, 457, 458, 460,
462, 463,
+465, 466, 468, 470, 471, 473, 474, 476, 477, 479, 480, 482, 483, 485, 486,
488, 489, 491, 492, 494, 495, 497, 498, 499, 501, 502, 504, 505, 506, 508,
509, 510,
+512, 513, 514, 516, 517, 518, 520, 521, 522, 523, 525, 526, 527, 528, 530,
531, 532, 533, 535, 536, 537, 538, 539, 541, 542, 543, 544, 545, 547, 548,
549, 550,
+551, 552, 553, 555, 556, 557, 558, 559, 560, 561, 562, 563, 565, 566, 567,
568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 580, 581, 582, 583,
584, 585,
+586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600,
601, 602, 602, 603, 604, 605, 606, 607, 608, 609, 610, 611, 612, 613, 614,
615, 616,
+617, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, 626, 627, 628, 629,
630, 631, 632, 633, 634, 634, 635, 636, 637, 638, 639, 640, 640, 641, 642,
643, 644,
+645, 645, 646, 647, 648, 649, 650, 650, 651, 652, 653, 654, 655, 655, 656,
657, 658, 659, 659, 660, 661, 662, 663, 663, 664, 665, 666, 667, 667, 668,
669, 670,
+670, 671, 672, 673, 674, 674, 675, 676, 677, 677, 678, 679, 680, 680, 681,
682, 683, 683, 684, 685, 686, 686, 687, 688, 689, 689, 690, 691, 691, 692,
693, 694,
+694, 695, 696, 697, 697, 698, 699, 699, 700, 701, 702, 702, 703, 704, 704,
705, 706, 706, 707, 708, 709, 709, 710, 711, 711, 712, 713, 713, 714, 715,
715, 716,
+717, 717, 718, 719, 720, 720, 721, 722, 722, 723, 724, 724, 725, 726, 726,
727, 728, 728, 729, 729, 730, 731, 731, 732, 733, 733, 734, 735, 735, 736,
737, 737,
+738, 739, 739, 740, 740, 741, 742, 742, 743, 744, 744, 745, 746, 746, 747,
747, 748, 749, 749, 750, 750, 751, 752, 752, 753, 754, 754, 755, 755, 756,
757, 757,
+758, 758, 759, 760, 760, 761, 761, 762, 763, 763, 764, 764, 765, 766, 766,
767, 767, 768, 769, 769, 770, 770, 771, 772, 772, 773, 773, 774, 774, 775,
776, 776,
+777, 777, 778, 779, 779, 780, 780, 781, 781, 782, 783, 783, 784, 784, 785,
785, 786, 787, 787, 788, 788, 789, 789, 790, 790, 791, 792, 792, 793, 793,
794, 794,
+795, 795, 796, 797, 797, 798, 798, 799, 799, 800, 800, 801, 801, 802, 803,
803, 804, 804, 805, 805, 806, 806, 807, 807, 808, 808, 809, 810, 810, 811,
811, 812,
+812, 813, 813, 814, 814, 815, 815, 816, 816, 817, 817, 818, 818, 819, 819,
820, 821, 821, 822, 822, 823, 823, 824, 824, 825, 825, 826, 826, 827, 827,
828, 828,
+829, 829, 830, 830, 831, 831, 832, 832, 833, 833, 834, 834, 835, 835, 836,
836, 837, 837, 838, 838, 839, 839, 840, 840, 841, 841, 842, 842, 843, 843,
844, 844,
+845, 845, 846, 846, 847, 847, 848, 848, 849, 849, 850, 850, 851, 851, 851,
852, 852, 853, 853, 854, 854, 855, 855, 856, 856, 857, 857, 858, 858, 859,
859, 860,
+860, 861, 861, 861, 862, 862, 863, 863, 864, 864, 865, 865, 866, 866, 867,
867, 868, 868, 868, 869, 869, 870, 870, 871, 871, 872, 872, 873, 873, 874,
874, 874,
+875, 875, 876, 876, 877, 877, 878, 878, 879, 879, 879, 880, 880, 881, 881,
882, 882, 883, 883, 883, 884, 884, 885, 885, 886, 886, 887, 887, 887, 888,
888, 889,
+889, 890, 890, 891, 891, 891, 892, 892, 893, 893, 894, 894, 894, 895, 895,
896, 896, 897, 897, 898, 898, 898, 899, 899, 900, 900, 901, 901, 901, 902,
902, 903,
+903, 904, 904, 904, 905, 905, 906, 906, 907, 907, 907, 908, 908, 909, 909,
909, 910, 910, 911, 911, 912, 912, 912, 913, 913, 914, 914, 915, 915, 915,
916, 916,
+917, 917, 917, 918, 918, 919, 919, 919, 920, 920, 921, 921, 922, 922, 922,
923, 923, 924, 924, 924, 925, 925, 926, 926, 926, 927, 927, 928, 928, 928,
929, 929,
+930, 930, 930, 931, 931, 932, 932, 933, 933, 933, 934, 934, 935, 935, 935,
936, 936, 937, 937, 937, 938, 938, 938, 939, 939, 940, 940, 940, 941, 941,
942, 942,
+942, 943, 943, 944, 944, 944, 945, 945, 946, 946, 946, 947, 947, 948, 948,
948, 949, 949, 949, 950, 950, 951, 951, 951, 952, 952, 953, 953, 953, 954,
954, 954,
+955, 955, 956, 956, 956, 957, 957, 958, 958, 958, 959, 959, 959, 960, 960,
961, 961, 961, 962, 962, 962, 963, 963, 964, 964, 964, 965, 965, 965, 966,
966, 967,
+967, 967, 968, 968, 968, 969, 969, 970, 970, 970, 971, 971, 971, 972, 972,
972, 973, 973, 974, 974, 974, 975, 975, 975, 976, 976, 977, 977, 977, 978,
978, 978,
+979, 979, 979, 980, 980, 981, 981, 981, 982, 982, 982, 983, 983, 983, 984,
984, 985, 985, 985, 986, 986, 986, 987, 987, 987, 988, 988, 988, 989, 989,
990, 990,
+990, 991, 991, 991, 992, 992, 992, 993, 993, 993, 994, 994, 994, 995, 995,
996, 996, 996, 997, 997, 997, 998, 998, 998, 999, 999, 999, 1000, 1000,
1000, 1001, 1001,
+1001, 1002, 1002, 1003, 1003, 1003, 1004, 1004, 1004, 1005, 1005, 1005,
1006, 1006, 1006, 1007, 1007, 1007, 1008, 1008, 1008, 1009, 1009, 1009,
1010, 1010, 1010, 1011, 1011, 1011, 1012, 1012,
+1012, 1013, 1013, 1014, 1014, 1014, 1015, 1015, 1015, 1016, 1016, 1016,
1017, 1017, 1017, 1018, 1018, 1018, 1019, 1019, 1019, 1020, 1020, 1020,
1021, 1021, 1021, 1022, 1022, 1022, 1023, 1023 };
+
+void ff_scene_scrd_c(SCENE_SAD_PARAMS)
+{
+ uint64_t scrdPlus = 0;
+ uint64_t scrdMinus = 0;
+ int x, y;
+
+ for (y = 0; y < height; y++) {
+ for (x = 0; x < width; x++)
+ if (src1[x] > src2[x])
+ scrdMinus += cbrtTable[src1[x] - src2[x]];
+ else
+ scrdPlus += cbrtTable[src2[x] - src1[x]];
+ src1 += stride1;
+ src2 += stride2;
+ }
+
+ double mean = (sqrt(scrdPlus) + sqrt(scrdMinus)) / 2.0;
+ *sum = 2.0 * mean * mean;
+}
+
+void ff_scene_scrd2B_c(SCENE_SAD_PARAMS, int bitdepth)
+{
+ uint64_t scrdPlus = 0;
+ uint64_t scrdMinus = 0;
+ const uint16_t* src1w = (const uint16_t*)src1;
+ const uint16_t* src2w = (const uint16_t*)src2;
+ int x, y;
+ int shift = FFABS(bitdepth - 10);
+
+ stride1 /= 2;
+ stride2 /= 2;
+
+ if (bitdepth > 10) {
+ for (y = 0; y < height; y++) {
+ for (x = 0; x < width; x++)
+ if (src1w[x] > src2w[x])
+ scrdMinus += cbrtTable10[(src1w[x] - src2w[x]) >>
shift];
+ else
+ scrdPlus += cbrtTable10[(src2w[x] - src1w[x]) >>
shift];
+ src1w += stride1;
+ src2w += stride2;
+ }
+ scrdMinus <<= shift;
+ scrdPlus <<= shift;
+ }
+ else {
+ for (y = 0; y < height; y++) {
+ for (x = 0; x < width; x++)
+ if (src1w[x] > src2w[x])
+ scrdMinus += cbrtTable10[(src1w[x] - src2w[x]) <<
shift];
+ else
+ scrdPlus += cbrtTable10[(src2w[x] - src1w[x]) <<
shift];
+ src1w += stride1;
+ src2w += stride2;
+ }
+ scrdMinus >>= shift;
+ scrdPlus >>= shift;
+ }
+
+ double mean = (sqrt(scrdPlus) + sqrt(scrdMinus)) / 2.0;
+ *sum = 2.0 * mean * mean;
+}
+
+void ff_scene_scrd9_c(SCENE_SAD_PARAMS)
+{
+ ff_scene_scrd2B_c(src1, stride1, src2, stride2, width, height, sum, 9);
+}
+
+void ff_scene_scrd10_c(SCENE_SAD_PARAMS)
+{
+ ff_scene_scrd2B_c(src1, stride1, src2, stride2, width, height, sum,
10);
+}
+
+void ff_scene_scrd12_c(SCENE_SAD_PARAMS)
+{
+ ff_scene_scrd2B_c(src1, stride1, src2, stride2, width, height, sum,
12);
+}
+
+void ff_scene_scrd14_c(SCENE_SAD_PARAMS)
+{
+ ff_scene_scrd2B_c(src1, stride1, src2, stride2, width, height, sum,
14);
+}
+
+void ff_scene_scrd16_c(SCENE_SAD_PARAMS)
+{
+ ff_scene_scrd2B_c(src1, stride1, src2, stride2, width, height, sum,
16);
+}
+
+ff_scene_sad_fn ff_scene_scrd_get_fn(int depth)
+{
+ ff_scene_sad_fn scrd = NULL;
+ if (depth == 8)
+ scrd = ff_scene_scrd_c;
+ else if (depth == 9)
+ scrd = ff_scene_scrd9_c;
+ else if (depth == 10)
+ scrd = ff_scene_scrd10_c;
+ else if (depth == 12)
+ scrd = ff_scene_scrd12_c;
+ else if (depth == 14)
+ scrd = ff_scene_scrd14_c;
+ else if (depth == 16)
+ scrd = ff_scene_scrd16_c;
+ return scrd;
+}
diff --git a/libavfilter/scene_sad.h b/libavfilter/scene_sad.h
index 173a051f2b..af9b06201c 100644
--- a/libavfilter/scene_sad.h
+++ b/libavfilter/scene_sad.h
@@ -41,4 +41,6 @@ ff_scene_sad_fn ff_scene_sad_get_fn_x86(int depth);
ff_scene_sad_fn ff_scene_sad_get_fn(int depth);
+ff_scene_sad_fn ff_scene_scrd_get_fn(int depth);
+
#endif /* AVFILTER_SCENE_SAD_H */
diff --git a/libavfilter/vf_scdet.c b/libavfilter/vf_scdet.c
index 15399cfebf..6162e4615b 100644
--- a/libavfilter/vf_scdet.c
+++ b/libavfilter/vf_scdet.c
@@ -31,6 +31,17 @@
#include "scene_sad.h"
#include "video.h"
+enum SCDETMode {
+ MODE_DIFF = 0,
+ MODE_MEAN_CBRT = 1
+};
+
+typedef struct SCDETFrameInfo {
+ AVFrame* picref;
+ double mafd;
+ double diff;
+} SCDETFrameInfo;
+
typedef struct SCDetContext {
const AVClass *class;
@@ -39,11 +50,12 @@ typedef struct SCDetContext {
int nb_planes;
int bitdepth;
ff_scene_sad_fn sad;
- double prev_mafd;
- double scene_score;
- AVFrame *prev_picref;
+ SCDETFrameInfo curr_frame;
+ SCDETFrameInfo prev_frame;
+
double threshold;
int sc_pass;
+ enum SCDETMode mode;
} SCDetContext;
#define OFFSET(x) offsetof(SCDetContext, x)
@@ -55,6 +67,7 @@ static const AVOption scdet_options[] = {
{ "t", "set scene change detect threshold",
OFFSET(threshold), AV_OPT_TYPE_DOUBLE, {.dbl = 10.}, 0, 100., V|F },
{ "sc_pass", "Set the flag to pass scene change frames",
OFFSET(sc_pass), AV_OPT_TYPE_BOOL, {.dbl = 0 }, 0, 1, V|F },
{ "s", "Set the flag to pass scene change frames",
OFFSET(sc_pass), AV_OPT_TYPE_BOOL, {.dbl = 0 }, 0, 1, V|F },
+ { "mode", "scene change detection method",
OFFSET(mode), AV_OPT_TYPE_INT, {.i64 = MODE_DIFF}, MODE_DIFF,
MODE_MEAN_CBRT, V|F },
{NULL}
};
@@ -85,13 +98,16 @@ static int config_input(AVFilterLink *inlink)
s->bitdepth = desc->comp[0].depth;
s->nb_planes = is_yuv ? 1 : av_pix_fmt_count_planes(inlink->format);
- for (int plane = 0; plane < 4; plane++) {
+ for (int plane = 0; plane < s->nb_planes; plane++) {
ptrdiff_t line_size = av_image_get_linesize(inlink->format,
inlink->w, plane);
s->width[plane] = line_size >> (s->bitdepth > 8);
- s->height[plane] = inlink->h >> ((plane == 1 || plane == 2) ?
desc->log2_chroma_h : 0);
+ s->height[plane] = plane == 1 || plane == 2 ?
AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h) : inlink->h;
}
- s->sad = ff_scene_sad_get_fn(s->bitdepth == 8 ? 8 : 16);
+ if (s->mode == MODE_DIFF)
+ s->sad = ff_scene_sad_get_fn(s->bitdepth == 8 ? 8 : 16);
+ else if (s->mode == MODE_MEAN_CBRT)
+ s->sad = ff_scene_scrd_get_fn(s->bitdepth);
if (!s->sad)
return AVERROR(EINVAL);
@@ -101,46 +117,86 @@ static int config_input(AVFilterLink *inlink)
static av_cold void uninit(AVFilterContext *ctx)
{
SCDetContext *s = ctx->priv;
-
- av_frame_free(&s->prev_picref);
}
-static double get_scene_score(AVFilterContext *ctx, AVFrame *frame)
+static void compute_diff(AVFilterContext *ctx)
{
- double ret = 0;
SCDetContext *s = ctx->priv;
- AVFrame *prev_picref = s->prev_picref;
+ AVFrame *prev_picref = s->prev_frame.picref;
+ AVFrame *curr_picref = s->curr_frame.picref;
- if (prev_picref && frame->height == prev_picref->height
- && frame->width == prev_picref->width) {
- uint64_t sad = 0;
- double mafd, diff;
- uint64_t count = 0;
+ if (prev_picref && curr_picref
+ && curr_picref->height == prev_picref->height
+ && curr_picref->width == prev_picref->width) {
+ uint64_t sum = 0;
+ uint64_t count = 0;
for (int plane = 0; plane < s->nb_planes; plane++) {
- uint64_t plane_sad;
+ uint64_t plane_sum;
s->sad(prev_picref->data[plane], prev_picref->linesize[plane],
- frame->data[plane], frame->linesize[plane],
- s->width[plane], s->height[plane], &plane_sad);
- sad += plane_sad;
+ curr_picref->data[plane], curr_picref->linesize[plane],
+ s->width[plane], s->height[plane], &plane_sum);
+ sum += plane_sum;
count += s->width[plane] * s->height[plane];
}
- mafd = (double)sad * 100. / count / (1ULL << s->bitdepth);
- diff = fabs(mafd - s->prev_mafd);
- ret = av_clipf(FFMIN(mafd, diff), 0, 100.);
- s->prev_mafd = mafd;
- av_frame_free(&prev_picref);
+ s->curr_frame.mafd = (double)sum * 100. / count / (1ULL <<
s->bitdepth);
+ s->curr_frame.diff = s->curr_frame.mafd - s->prev_frame.mafd;
+ } else {
+ s->curr_frame.mafd = 0;
+ s->curr_frame.diff = 0;
}
- s->prev_picref = av_frame_clone(frame);
- return ret;
}
-static int set_meta(SCDetContext *s, AVFrame *frame, const char *key, const
char *value)
+static int set_meta(AVFrame *frame, const char *key, const char *value)
{
return av_dict_set(&frame->metadata, key, value, 0);
}
+static int filter_frame(AVFilterContext* ctx, AVFrame* frame)
+{
+ AVFilterLink* inlink = ctx->inputs[0];
+ AVFilterLink* outlink = ctx->outputs[0];
+ SCDetContext* s = ctx->priv;
+
+ s->prev_frame = s->curr_frame;
+ s->curr_frame.picref = frame;
+
+ if (s->prev_frame.picref) {
+ compute_diff(ctx);
+
+ if (s->prev_frame.diff < -s->curr_frame.diff) {
+ s->prev_frame.diff = -s->curr_frame.diff;
+ s->prev_frame.mafd = s->curr_frame.mafd;
+ }
+ double scene_score = av_clipf(FFMAX(s->prev_frame.diff, 0), 0,
100.);
+
+ char buf[64];
+ snprintf(buf, sizeof(buf), "%0.3f", s->prev_frame.mafd);
+ set_meta(s->prev_frame.picref, "lavfi.scd.mafd", buf);
+ snprintf(buf, sizeof(buf), "%0.3f", scene_score);
+ set_meta(s->prev_frame.picref, "lavfi.scd.score", buf);
+
+ if (scene_score >= s->threshold) {
+ av_log(s, AV_LOG_INFO, "lavfi.scd.score: %.3f, lavfi.scd.time:
%s\n",
+ scene_score, av_ts2timestr(s->prev_frame.picref->pts,
&inlink->time_base));
+ set_meta(s->prev_frame.picref, "lavfi.scd.time",
+ av_ts2timestr(s->prev_frame.picref->pts,
&inlink->time_base));
+ }
+
+ if (s->sc_pass) {
+ if (scene_score >= s->threshold)
+ return ff_filter_frame(outlink, s->prev_frame.picref);
+ else
+ av_frame_free(&s->prev_frame.picref);
+ }
+ else
+ return ff_filter_frame(outlink, s->prev_frame.picref);
+ }
+
+ return 0;
+}
+
static int activate(AVFilterContext *ctx)
{
int ret;
@@ -148,6 +204,8 @@ static int activate(AVFilterContext *ctx)
AVFilterLink *outlink = ctx->outputs[0];
SCDetContext *s = ctx->priv;
AVFrame *frame;
+ int64_t pts;
+ int status;
FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
@@ -155,31 +213,17 @@ static int activate(AVFilterContext *ctx)
if (ret < 0)
return ret;
- if (frame) {
- char buf[64];
- s->scene_score = get_scene_score(ctx, frame);
- snprintf(buf, sizeof(buf), "%0.3f", s->prev_mafd);
- set_meta(s, frame, "lavfi.scd.mafd", buf);
- snprintf(buf, sizeof(buf), "%0.3f", s->scene_score);
- set_meta(s, frame, "lavfi.scd.score", buf);
+ if (ret > 0)
+ return filter_frame(ctx, frame);
- if (s->scene_score >= s->threshold) {
- av_log(s, AV_LOG_INFO, "lavfi.scd.score: %.3f, lavfi.scd.time:
%s\n",
- s->scene_score, av_ts2timestr(frame->pts,
&inlink->time_base));
- set_meta(s, frame, "lavfi.scd.time",
- av_ts2timestr(frame->pts, &inlink->time_base));
- }
- if (s->sc_pass) {
- if (s->scene_score >= s->threshold)
- return ff_filter_frame(outlink, frame);
- else {
- av_frame_free(&frame);
- }
- } else
- return ff_filter_frame(outlink, frame);
+ if (ff_inlink_acknowledge_status(inlink, &status, &pts)) {
+ if (status == AVERROR_EOF)
+ ret = filter_frame(ctx, NULL);
+
+ ff_outlink_set_status(outlink, status, pts);
+ return ret;
}
- FF_FILTER_FORWARD_STATUS(inlink, outlink);
FF_FILTER_FORWARD_WANTED(outlink, inlink);
return FFERROR_NOT_READY;
@@ -190,12 +234,12 @@ static const AVFilterPad scdet_inputs[] = {
.name = "default",
.type = AVMEDIA_TYPE_VIDEO,
.config_props = config_input,
- },
+ }
};
const AVFilter ff_vf_scdet = {
.name = "scdet",
- .description = NULL_IF_CONFIG_SMALL("Detect video scene change"),
+ .description = NULL_IF_CONFIG_SMALL("Detect video scene change."),
.priv_size = sizeof(SCDetContext),
.priv_class = &scdet_class,
.uninit = uninit,
@@ -203,5 +247,5 @@ const AVFilter ff_vf_scdet = {
FILTER_INPUTS(scdet_inputs),
FILTER_OUTPUTS(ff_video_default_filterpad),
FILTER_PIXFMTS_ARRAY(pix_fmts),
- .activate = activate,
+ .activate = activate
};
diff --git a/tests/fate/filter-video.mak b/tests/fate/filter-video.mak
index ee9f0f5e40..cff48e33d9 100644
--- a/tests/fate/filter-video.mak
+++ b/tests/fate/filter-video.mak
@@ -672,6 +672,9 @@ SCDET_DEPS = LAVFI_INDEV FILE_PROTOCOL MOVIE_FILTER
SCDET_FILTER SCALE_FILTER \
FATE_METADATA_FILTER-$(call ALLYES, $(SCDET_DEPS)) +=
fate-filter-metadata-scdet
fate-filter-metadata-scdet: SRC =
$(TARGET_SAMPLES)/svq3/Vertical400kbit.sorenson3.mov
fate-filter-metadata-scdet: CMD = run $(FILTER_METADATA_COMMAND)
"sws_flags=+accurate_rnd+bitexact;movie='$(SRC)',scdet=s=1"
+FATE_METADATA_FILTER-$(call ALLYES, $(SCDET_DEPS)) +=
fate-filter-metadata-scdet1
+fate-filter-metadata-scdet1: SRC =
$(TARGET_SAMPLES)/svq3/Vertical400kbit.sorenson3.mov
+fate-filter-metadata-scdet1: CMD = run $(FILTER_METADATA_COMMAND)
"sws_flags=+accurate_rnd+bitexact;movie='$(SRC)',scdet=s=1:t=6.5:mode=1"
CROPDETECT_DEPS = LAVFI_INDEV FILE_PROTOCOL MOVIE_FILTER MOVIE_FILTER
MESTIMATE_FILTER CROPDETECT_FILTER \
SCALE_FILTER MOV_DEMUXER H264_DECODER
--
2.43.0.windows.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2024-06-03 22:42 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-05-12 11:05 [FFmpeg-devel] [PATCH] area changed: scdet filter radu.taraibuta
2024-05-12 11:34 ` Paul B Mahol
-- strict thread matches above, loose matches on Subject: below --
2024-05-13 15:52 radu.taraibuta
2024-05-19 16:05 ` radu.taraibuta
2024-05-28 7:51 ` radu.taraibuta
2024-05-28 13:16 ` Paul B Mahol
2024-05-30 21:31 ` Michael Niedermayer
2024-06-02 20:17 ` radu.taraibuta
2024-06-03 22:42 ` Michael Niedermayer
2024-05-12 11:04 raduct
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git