From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100]) by master.gitmailbox.com (Postfix) with ESMTP id A6E294325C for ; Thu, 26 May 2022 04:43:41 +0000 (UTC) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 71A9E68B509; Thu, 26 May 2022 07:43:39 +0300 (EEST) Received: from mail-lf1-f50.google.com (mail-lf1-f50.google.com [209.85.167.50]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id 74C1F68B4AC for ; Thu, 26 May 2022 07:43:33 +0300 (EEST) Received: by mail-lf1-f50.google.com with SMTP id bu29so802934lfb.0 for ; Wed, 25 May 2022 21:43:33 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20210112; h=from:to:cc:subject:date:message-id:mime-version :content-transfer-encoding; bh=3oUNlnFeG3XPhTg2C4OXM03j8MnoTZGm8e8rylToD2U=; b=aTdtWwpkF6yf41ZsEDFkBwKWcBPFb5uIBxV4F7VPKPt5V87iz+LxW9w98x0jdN6hfh g1NLkc5ReNmOiFZz4NIcDn9fyU9F28JcpNcJGRIzwoUaqBHlCT7yYLliCy5W0NCL7sco VYt/9TL/gbsvVkrk2egYyALxR5MPQEjZ6kjLWfLB0/p8TXXM2EF0DKpeqWOhAZfCEA7R P5BkYSvQUWIR3TmQl0rVVrSJJ6pxZavYqxV2uYeXQy5h/cMbfbxb1LKhHHzVv8N5gN+K q6yTGZsMCZhludhhJfWNy+UI6JR2qjhHyLwXiKVfLVkHXxQFfJZSNbR9vkVjiVRN9aMY 2a2w== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:from:to:cc:subject:date:message-id:mime-version :content-transfer-encoding; bh=3oUNlnFeG3XPhTg2C4OXM03j8MnoTZGm8e8rylToD2U=; b=o46z0PhgHak8mKnTBXEaxwagZn9vezYxch+92wsk43RYqtO3eBPA5ilg8lUgvACyCa 6yhWlSeAad2NU9pi6FhugH91NbPmXTKRdKHMdzNO5nIqZd10NnLp5iScOLZ8GGnFdxFs JEXcuNQeh5uHU0NuLMGZUpPPcnUPP4MEizK4jqxhHpvMHUz+Fzt1DWrF9I8IZb390Heo fumbbHIb4TD1BJ4Dx5/UYRFT30WQnWnvnZSZqiCpk5+bLY6Ytso1fo94+wuzGrV4AcJF hCIwcvnaSyt0su8CRBLDGsgEok21cYfXXsOw9lti/T1gKd6lIxOprkyEYUfjU66SYWEt m5pw== X-Gm-Message-State: AOAM531pRjN+rotLEmvFC7l9vkjXHaipcmE+FV3HmBo2p/plKpuIiJbS qmre7iKTS1/xU3RhS7zIt3FwPyvDzHA= X-Google-Smtp-Source: ABdhPJwepW6WeGWLCSAbVDI9VUJ59hDRyzz4Fu5t2NYkHzQ//31/pgD3j9saSxy25Qc257zhHuvanw== X-Received: by 2002:a05:6512:2614:b0:445:777d:3530 with SMTP id bt20-20020a056512261400b00445777d3530mr25628502lfb.647.1653540212126; Wed, 25 May 2022 21:43:32 -0700 (PDT) Received: from NotANoble.wifi.nsu.ru ([84.237.55.6]) by smtp.gmail.com with ESMTPSA id m26-20020a19711a000000b0047255d210dcsm127667lfc.11.2022.05.25.21.43.30 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Wed, 25 May 2022 21:43:31 -0700 (PDT) From: FacelessLake X-Google-Original-From: FacelessLake To: ffmpeg-devel@ffmpeg.org Date: Thu, 26 May 2022 11:42:25 +0700 Message-Id: <20220526044225.10466-1-sinonim147@gmail.com> X-Mailer: git-send-email 2.36.1 MIME-Version: 1.0 Subject: [FFmpeg-devel] [PATCH] avcodec/vp9: ipred_hd_16x16_16 avx2 implementation X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Cc: Semen Belozerov Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" Archived-At: List-Archive: List-Post: From: Semen Belozerov --- libavcodec/x86/vp9dsp_init_16bpp.c | 2 + libavcodec/x86/vp9intrapred_16bpp.asm | 54 +++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) diff --git a/libavcodec/x86/vp9dsp_init_16bpp.c b/libavcodec/x86/vp9dsp_init_16bpp.c index b17826326f..e5afea1512 100644 --- a/libavcodec/x86/vp9dsp_init_16bpp.c +++ b/libavcodec/x86/vp9dsp_init_16bpp.c @@ -55,6 +55,7 @@ decl_ipred_fn(dl, 32, 16, avx2); decl_ipred_fn(dr, 16, 16, avx2); decl_ipred_fn(dr, 32, 16, avx2); decl_ipred_fn(vl, 16, 16, avx2); +decl_ipred_fn(hd, 16, 16, avx2); #define decl_ipred_dir_funcs(type) \ decl_ipred_fns(type, 16, sse2, sse2); \ @@ -141,6 +142,7 @@ av_cold void ff_vp9dsp_init_16bpp_x86(VP9DSPContext *dsp) init_ipred_func(dl, DIAG_DOWN_LEFT, 32, 16, avx2); init_ipred_func(dr, DIAG_DOWN_RIGHT, 16, 16, avx2); init_ipred_func(vl, VERT_LEFT, 16, 16, avx2); + init_ipred_func(hd, HOR_DOWN, 16, 16, avx2); #if ARCH_X86_64 init_ipred_func(dr, DIAG_DOWN_RIGHT, 32, 16, avx2); #endif diff --git a/libavcodec/x86/vp9intrapred_16bpp.asm b/libavcodec/x86/vp9intrapred_16bpp.asm index 0dad91ac5c..808056a809 100644 --- a/libavcodec/x86/vp9intrapred_16bpp.asm +++ b/libavcodec/x86/vp9intrapred_16bpp.asm @@ -1273,6 +1273,60 @@ cglobal vp9_ipred_vl_16x16_16, 4, 5, 7, dst, stride, l, a mova [dst4q+stride3q*4], m1 ; 15 IJKLMNOPPPPPPPPP RET +cglobal vp9_ipred_hd_16x16_16, 4, 5, 7, dst, stride, l, a + movu m0, [aq-2] ; *abcdefghijklmno + mova m1, [lq] ; klmnopqrstuvwxyz + vperm2i128 m2, m1, m0, q0201 ; stuvwxyz*abcdefg + vpalignr m3, m2, m1, 2 ; lmnopqrstuvwxyz* + vpalignr m4, m2, m1, 4 ; mnopqrstuvwxyz*a + LOWPASS 4, 3, 1 ; LMNOPQRSTUVWXYZ# + pavgw m3, m1 ; klmnopqrstuvwxyz + mova m1, [aq] ; abcdefghijklmnop + movu m2, [aq+2] ; bcdefghijklmnop. + LOWPASS 2, 1, 0 ; ABCDEFGHIJKLMNO. + vpunpcklwd m0, m3, m4 ; kLlMmNnOsTtUuVvW + vpunpckhwd m1, m3, m4 ; oPpQqRrSwXxYyZz# + vperm2i128 m3, m1, m0, q0002 ; kLlMmNnOoPpQqRrS + vperm2i128 m4, m0, m1, q0301 ; sTtUuVvWwXxYyZz# + vperm2i128 m0, m4, m2, q0201 ; wXxYyZz#ABCDEFGH + vperm2i128 m1, m3, m4, q0201 ; oPpQqRrSsTtUuVvW + DEFINE_ARGS dst, stride, stride3, stride5, dst5 + lea stride3q, [strideq*3] + lea stride5q, [stride3q+strideq*2] + lea dst5q, [dstq+stride5q] + + mova [dst5q+stride5q*2], m3 ; 15 kLlMmNnOoPpQqRrS + mova [dst5q+stride3q*2], m1 ; 11 oPpQqRrSsTtUuVvW + mova [dst5q+strideq*2], m4 ; 7 sTtUuVvWwXxYyZz# + mova [dstq+stride3q*1], m0 ; 3 wXxYyZz#ABCDEFGH + vpalignr m5, m4, m1, 4 + mova [dstq+stride5q*2], m5 ; 10 pQqRrSsTtUuVvWwX + vpalignr m5, m0, m4, 4 + vpalignr m6, m2, m0, 4 + mova [dstq+stride3q*2], m5 ; 6 tUuVvWwXxYyZz#AB + mova [dstq+strideq*2], m6 ; 2 xYyZz#ABCDEFGHIJ + vpalignr m5, m4, m1, 8 + mova [dst5q+strideq*4], m5 ; 9 qRrSsTtUuVvWwXxY + vpalignr m5, m0, m4, 8 + vpalignr m6, m2, m0, 8 + mova [dstq+stride5q*1], m5 ; 5 uVvWwXxYyZz#ABCD + mova [dstq+strideq*1], m6 ; 1 yZz#ABCDEFGHIJKL + vpalignr m5, m1, m3, 12 + vpalignr m6, m4, m1, 12 + mova [dstq+stride3q*4], m5 ; 12 nOoPpQqRrSsTtUuV + mova [dst5q+stride3q], m6 ; 8 rSsTtUuVvWwXxYyZ + vpalignr m5, m0, m4, 12 + vpalignr m6, m2, m0, 12 + mova [dstq+strideq*4], m5 ; 4 nOoPpQqRrSsTtUuV + mova [dstq+strideq*0], m6 ; 0 z#ABCDEFGHIJKLMN + sub dst5q, strideq + vpalignr m5, m1, m3, 4 + mova [dst5q+stride5q*2], m5 ; 14 lMmNnOoPpQqRrSsT + sub dst5q, strideq + vpalignr m5, m1, m3, 8 + mova [dst5q+stride5q*2], m5 ; 13 mNnOoPpQqRrSsTtU + RET + %if ARCH_X86_64 cglobal vp9_ipred_dr_32x32_16, 4, 7, 10, dst, stride, l, a mova m0, [lq+mmsize*0+0] ; l[0-15] -- 2.36.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".