mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
AVX2 port of H010ToAR30_AVX2
Was SSSE3 H010ToAR30_Opt (635 ms) Now AVX2 H010ToAR30_Opt (448 ms) Bug: libyuv:751 Test: LibYUVConvertTest.H010ToAR30_Opt Change-Id: I17b1a0e3268c4a9836e09683dd3377fb1ce60932 Reviewed-on: https://chromium-review.googlesource.com/889906 Commit-Queue: Frank Barchard <fbarchard@chromium.org> Reviewed-by: Miguel Casas <mcasas@chromium.org>
This commit is contained in:
parent
c95fd57993
commit
ed96b7b2c7
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: http://code.google.com/p/libyuv/
|
URL: http://code.google.com/p/libyuv/
|
||||||
Version: 1693
|
Version: 1694
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|
||||||
|
|||||||
@ -272,6 +272,7 @@ extern "C" {
|
|||||||
#define HAS_CONVERT16TO8ROW_AVX2
|
#define HAS_CONVERT16TO8ROW_AVX2
|
||||||
#define HAS_CONVERT8TO16ROW_AVX2
|
#define HAS_CONVERT8TO16ROW_AVX2
|
||||||
#define HAS_I210TOARGBROW_AVX2
|
#define HAS_I210TOARGBROW_AVX2
|
||||||
|
#define HAS_I210TOAR30ROW_AVX2
|
||||||
#define HAS_I422TOAR30ROW_AVX2
|
#define HAS_I422TOAR30ROW_AVX2
|
||||||
#define HAS_MERGEUVROW_16_AVX2
|
#define HAS_MERGEUVROW_16_AVX2
|
||||||
#define HAS_MULTIPLYROW_16_AVX2
|
#define HAS_MULTIPLYROW_16_AVX2
|
||||||
@ -1918,6 +1919,12 @@ void I210ToARGBRow_AVX2(const uint16_t* src_y,
|
|||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width);
|
int width);
|
||||||
|
void I210ToAR30Row_AVX2(const uint16_t* src_y,
|
||||||
|
const uint16_t* src_u,
|
||||||
|
const uint16_t* src_v,
|
||||||
|
uint8_t* dst_ar30,
|
||||||
|
const struct YuvConstants* yuvconstants,
|
||||||
|
int width);
|
||||||
void I422AlphaToARGBRow_SSSE3(const uint8_t* y_buf,
|
void I422AlphaToARGBRow_SSSE3(const uint8_t* y_buf,
|
||||||
const uint8_t* u_buf,
|
const uint8_t* u_buf,
|
||||||
const uint8_t* v_buf,
|
const uint8_t* v_buf,
|
||||||
@ -2092,6 +2099,12 @@ void I210ToARGBRow_Any_AVX2(const uint16_t* src_y,
|
|||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width);
|
int width);
|
||||||
|
void I210ToAR30Row_Any_AVX2(const uint16_t* src_y,
|
||||||
|
const uint16_t* src_u,
|
||||||
|
const uint16_t* src_v,
|
||||||
|
uint8_t* dst_ar30,
|
||||||
|
const struct YuvConstants* yuvconstants,
|
||||||
|
int width);
|
||||||
void I422AlphaToARGBRow_Any_SSSE3(const uint8_t* y_buf,
|
void I422AlphaToARGBRow_Any_SSSE3(const uint8_t* y_buf,
|
||||||
const uint8_t* u_buf,
|
const uint8_t* u_buf,
|
||||||
const uint8_t* v_buf,
|
const uint8_t* v_buf,
|
||||||
|
|||||||
@ -11,6 +11,6 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 1693
|
#define LIBYUV_VERSION 1694
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|||||||
@ -230,6 +230,9 @@ ANY31CT(I210ToARGBRow_Any_SSSE3, I210ToARGBRow_SSSE3, 1, 0, uint16_t, 2, 4, 7)
|
|||||||
#ifdef HAS_I210TOARGBROW_AVX2
|
#ifdef HAS_I210TOARGBROW_AVX2
|
||||||
ANY31CT(I210ToARGBRow_Any_AVX2, I210ToARGBRow_AVX2, 1, 0, uint16_t, 2, 4, 15)
|
ANY31CT(I210ToARGBRow_Any_AVX2, I210ToARGBRow_AVX2, 1, 0, uint16_t, 2, 4, 15)
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef HAS_I210TOAR30ROW_AVX2
|
||||||
|
ANY31CT(I210ToAR30Row_Any_AVX2, I210ToAR30Row_AVX2, 1, 0, uint16_t, 2, 4, 15)
|
||||||
|
#endif
|
||||||
#undef ANY31CT
|
#undef ANY31CT
|
||||||
|
|
||||||
// Any 2 planes to 1.
|
// Any 2 planes to 1.
|
||||||
|
|||||||
@ -2501,6 +2501,46 @@ void OMITFP I210ToARGBRow_AVX2(const uint16_t* y_buf,
|
|||||||
}
|
}
|
||||||
#endif // HAS_I210TOARGBROW_AVX2
|
#endif // HAS_I210TOARGBROW_AVX2
|
||||||
|
|
||||||
|
#if defined(HAS_I210TOAR30ROW_AVX2)
|
||||||
|
// 16 pixels
|
||||||
|
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 AR30 (64 bytes).
|
||||||
|
void OMITFP I210ToAR30Row_AVX2(const uint16_t* y_buf,
|
||||||
|
const uint16_t* u_buf,
|
||||||
|
const uint16_t* v_buf,
|
||||||
|
uint8_t* dst_ar30,
|
||||||
|
const struct YuvConstants* yuvconstants,
|
||||||
|
int width) {
|
||||||
|
asm volatile (
|
||||||
|
YUVTORGB_SETUP_AVX2(yuvconstants)
|
||||||
|
"sub %[u_buf],%[v_buf] \n"
|
||||||
|
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // AR30 constants
|
||||||
|
"vpsrlw $14,%%ymm5,%%ymm5 \n"
|
||||||
|
"vpsllw $4,%%ymm5,%%ymm5 \n" // 2 alpha bits
|
||||||
|
"vpxor %%ymm6,%%ymm6,%%ymm6 \n" // 0 for min
|
||||||
|
"vpcmpeqb %%ymm7,%%ymm7,%%ymm7 \n" // 1023 for max
|
||||||
|
"vpsrlw $6,%%ymm7,%%ymm7 \n"
|
||||||
|
|
||||||
|
LABELALIGN
|
||||||
|
"1: \n"
|
||||||
|
READYUV210_AVX2
|
||||||
|
YUVTORGB16_AVX2(yuvconstants)
|
||||||
|
STOREAR30_AVX2
|
||||||
|
"sub $0x10,%[width] \n"
|
||||||
|
"jg 1b \n"
|
||||||
|
|
||||||
|
"vzeroupper \n"
|
||||||
|
: [y_buf]"+r"(y_buf), // %[y_buf]
|
||||||
|
[u_buf]"+r"(u_buf), // %[u_buf]
|
||||||
|
[v_buf]"+r"(v_buf), // %[v_buf]
|
||||||
|
[dst_ar30]"+r"(dst_ar30), // %[dst_ar30]
|
||||||
|
[width]"+rm"(width) // %[width]
|
||||||
|
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
|
||||||
|
: "memory", "cc", YUVTORGB_REGS_AVX2
|
||||||
|
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
#endif // HAS_I210TOAR30ROW_AVX2
|
||||||
|
|
||||||
#if defined(HAS_I422ALPHATOARGBROW_AVX2)
|
#if defined(HAS_I422ALPHATOARGBROW_AVX2)
|
||||||
// 16 pixels
|
// 16 pixels
|
||||||
// 8 UV values upsampled to 16 UV, mixed with 16 Y and 16 A producing 16 ARGB.
|
// 8 UV values upsampled to 16 UV, mixed with 16 Y and 16 A producing 16 ARGB.
|
||||||
|
|||||||
@ -2083,6 +2083,7 @@ TEST_F(LibYUVConvertTest, ARGBToAR30Row_Opt) {
|
|||||||
|
|
||||||
TESTPLANAR16TOB(I010, 2, 2, ARGB, 4, 4, 1, 2)
|
TESTPLANAR16TOB(I010, 2, 2, ARGB, 4, 4, 1, 2)
|
||||||
TESTPLANAR16TOB(I010, 2, 2, ABGR, 4, 4, 1, 2)
|
TESTPLANAR16TOB(I010, 2, 2, ABGR, 4, 4, 1, 2)
|
||||||
|
TESTPLANAR16TOB(I010, 2, 2, AR30, 4, 4, 1, 2)
|
||||||
TESTPLANAR16TOB(H010, 2, 2, ARGB, 4, 4, 1, 2)
|
TESTPLANAR16TOB(H010, 2, 2, ARGB, 4, 4, 1, 2)
|
||||||
TESTPLANAR16TOB(H010, 2, 2, ABGR, 4, 4, 1, 2)
|
TESTPLANAR16TOB(H010, 2, 2, ABGR, 4, 4, 1, 2)
|
||||||
TESTPLANAR16TOB(H010, 2, 2, AR30, 4, 4, 1, 2)
|
TESTPLANAR16TOB(H010, 2, 2, AR30, 4, 4, 1, 2)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user