mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
Optimze ABGRToI420 for AVX2
libyuv_test --gunit_filter=*ABGRToI420_Opt --libyuv_width=1280 --libyuv_height=720 --libyuv_repeat=1000 --libyuv_flags=-1 --libyuv_cpu_info=-1 Was SSSE3 ABGRToI420_Opt (324 ms) Now AVX2 ABGRToI420_Opt (253 ms) Bug: b/155989084 Change-Id: I4f3831e29b379be758f9d3fcb244be088bb1ca3c Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/2229606 Reviewed-by: Miguel Casas <mcasas@chromium.org> Commit-Queue: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
parent
ce5b333853
commit
c5e45dcae5
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: http://code.google.com/p/libyuv/
|
URL: http://code.google.com/p/libyuv/
|
||||||
Version: 1758
|
Version: 1759
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|
||||||
|
|||||||
@ -39,10 +39,14 @@ LIBYUV_API extern const struct YuvConstants kYvu2020Constants; // BT.2020
|
|||||||
#define kYuvJPEGConstantsVU kYvuJPEGConstants
|
#define kYuvJPEGConstantsVU kYvuJPEGConstants
|
||||||
#define kYuvH709ConstantsVU kYvuH709Constants
|
#define kYuvH709ConstantsVU kYvuH709Constants
|
||||||
#define kYuv2020ConstantsVU kYvu2020Constants
|
#define kYuv2020ConstantsVU kYvu2020Constants
|
||||||
#define NV12ToABGRMatrix(a, b, c, d, e, f, g, h, i) NV21ToARGBMatrix(a, b, c, d, e, f, g##VU, h, i)
|
#define NV12ToABGRMatrix(a, b, c, d, e, f, g, h, i) \
|
||||||
#define NV21ToABGRMatrix(a, b, c, d, e, f, g, h, i) NV12ToARGBMatrix(a, b, c, d, e, f, g##VU, h, i)
|
NV21ToARGBMatrix(a, b, c, d, e, f, g##VU, h, i)
|
||||||
#define NV12ToRAWMatrix(a, b, c, d, e, f, g, h, i) NV21ToRGB24Matrix(a, b, c, d, e, f, g##VU, h, i)
|
#define NV21ToABGRMatrix(a, b, c, d, e, f, g, h, i) \
|
||||||
#define NV21ToRAWMatrix(a, b, c, d, e, f, g, h, i) NV12ToRGB24Matrix(a, b, c, d, e, f, g##VU, h, i)
|
NV12ToARGBMatrix(a, b, c, d, e, f, g##VU, h, i)
|
||||||
|
#define NV12ToRAWMatrix(a, b, c, d, e, f, g, h, i) \
|
||||||
|
NV21ToRGB24Matrix(a, b, c, d, e, f, g##VU, h, i)
|
||||||
|
#define NV21ToRAWMatrix(a, b, c, d, e, f, g, h, i) \
|
||||||
|
NV12ToRGB24Matrix(a, b, c, d, e, f, g##VU, h, i)
|
||||||
|
|
||||||
// Alias.
|
// Alias.
|
||||||
#define ARGBToARGB ARGBCopy
|
#define ARGBToARGB ARGBCopy
|
||||||
|
|||||||
@ -11,6 +11,6 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 1758
|
#define LIBYUV_VERSION 1759
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|||||||
@ -608,11 +608,9 @@ int NV21ToI420(const uint8_t* src_y,
|
|||||||
int dst_stride_v,
|
int dst_stride_v,
|
||||||
int width,
|
int width,
|
||||||
int height) {
|
int height) {
|
||||||
return NV12ToI420(src_y, src_stride_y,
|
return NV12ToI420(src_y, src_stride_y, src_vu, src_stride_vu, dst_y,
|
||||||
src_vu, src_stride_vu,
|
dst_stride_y, dst_v, dst_stride_v, dst_u, dst_stride_u,
|
||||||
dst_y, dst_stride_y,
|
width, height);
|
||||||
dst_v, dst_stride_v,
|
|
||||||
dst_u, dst_stride_u, width, height);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Convert YUY2 to I420.
|
// Convert YUY2 to I420.
|
||||||
@ -1170,6 +1168,16 @@ int ABGRToI420(const uint8_t* src_abgr,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#if defined(HAS_ABGRTOYROW_AVX2) && defined(HAS_ABGRTOUVROW_AVX2)
|
||||||
|
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||||
|
ABGRToUVRow = ABGRToUVRow_Any_AVX2;
|
||||||
|
ABGRToYRow = ABGRToYRow_Any_AVX2;
|
||||||
|
if (IS_ALIGNED(width, 32)) {
|
||||||
|
ABGRToUVRow = ABGRToUVRow_AVX2;
|
||||||
|
ABGRToYRow = ABGRToYRow_AVX2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#if defined(HAS_ABGRTOYROW_NEON)
|
#if defined(HAS_ABGRTOYROW_NEON)
|
||||||
if (TestCpuFlag(kCpuHasNEON)) {
|
if (TestCpuFlag(kCpuHasNEON)) {
|
||||||
ABGRToYRow = ABGRToYRow_Any_NEON;
|
ABGRToYRow = ABGRToYRow_Any_NEON;
|
||||||
|
|||||||
@ -349,9 +349,7 @@ void I400ToARGBRow_NEON(const uint8_t* src_y,
|
|||||||
asm volatile(
|
asm volatile(
|
||||||
YUVTORGB_SETUP
|
YUVTORGB_SETUP
|
||||||
"vmov.u8 d23, #255 \n"
|
"vmov.u8 d23, #255 \n"
|
||||||
"1: \n"
|
"1: \n" READYUV400 YUVTORGB
|
||||||
READYUV400
|
|
||||||
YUVTORGB
|
|
||||||
"subs %2, %2, #8 \n"
|
"subs %2, %2, #8 \n"
|
||||||
"vst4.8 {d20, d21, d22, d23}, [%1]! \n"
|
"vst4.8 {d20, d21, d22, d23}, [%1]! \n"
|
||||||
"bgt 1b \n"
|
"bgt 1b \n"
|
||||||
|
|||||||
35
source/test.sh
Executable file
35
source/test.sh
Executable file
@ -0,0 +1,35 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -x
|
||||||
|
|
||||||
|
function runbenchmark1 {
|
||||||
|
perf record /google/src/cloud/fbarchard/clean/google3/blaze-bin/third_party/libyuv/libyuv_test --gunit_filter=*$1 --libyuv_width=1280 --libyuv_height=720 --libyuv_repeat=1000 --libyuv_flags=-1 --libyuv_cpu_info=-1
|
||||||
|
perf report | grep AVX
|
||||||
|
}
|
||||||
|
|
||||||
|
runbenchmark1 ABGRToI420
|
||||||
|
runbenchmark1 Android420ToI420
|
||||||
|
runbenchmark1 ARGBToI420
|
||||||
|
runbenchmark1 Convert16To8Plane
|
||||||
|
runbenchmark1 ConvertToARGB
|
||||||
|
runbenchmark1 ConvertToI420
|
||||||
|
runbenchmark1 CopyPlane
|
||||||
|
runbenchmark1 H010ToAB30
|
||||||
|
runbenchmark1 H010ToAR30
|
||||||
|
runbenchmark1 HalfFloatPlane
|
||||||
|
runbenchmark1 I010ToAB30
|
||||||
|
runbenchmark1 I010ToAR30
|
||||||
|
runbenchmark1 I420Copy
|
||||||
|
runbenchmark1 I420Psnr
|
||||||
|
runbenchmark1 I420Scale
|
||||||
|
runbenchmark1 I420Ssim
|
||||||
|
runbenchmark1 I420ToARGB
|
||||||
|
runbenchmark1 I420ToNV12
|
||||||
|
runbenchmark1 I420ToUYVY
|
||||||
|
runbenchmark1 I422ToI420
|
||||||
|
runbenchmark1 InitCpuFlags
|
||||||
|
runbenchmark1 J420ToARGB
|
||||||
|
runbenchmark1 NV12ToARGB
|
||||||
|
runbenchmark1 NV12ToI420
|
||||||
|
runbenchmark1 NV12ToI420Rotate
|
||||||
|
runbenchmark1 SetCpuFlags
|
||||||
|
runbenchmark1 YUY2ToI420
|
||||||
Loading…
x
Reference in New Issue
Block a user