fix yvu constants for avx2 yuv to rgb

the yvu matrix for yuv to rgb had an incorrect entry, affecting yuv to bgra,
yuv to abgr and yuv to raw.
fix the matrix and reenable avx2 functions.

R=harryjin@google.com
BUG=libyuv:522

Review URL: https://codereview.chromium.org/1411763004 .
This commit is contained in:
Frank Barchard 2015-11-10 10:45:44 -08:00
parent 72a9e282ec
commit 6100f50f13
5 changed files with 89 additions and 17 deletions

View File

@ -1,6 +1,6 @@
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1535
Version: 1536
License: BSD
License File: LICENSE

View File

@ -200,13 +200,11 @@ extern "C" {
#if !(defined(_DEBUG) && defined(__i386__))
// TODO(fbarchard): fix build error on android_full_debug=1
// https://code.google.com/p/libyuv/issues/detail?id=517
// AVX2 versions fail on gcc. b/522
// #define HAS_I422ALPHATOARGBROW_AVX2
#define HAS_I422ALPHATOARGBROW_AVX2
#endif
// AVX2 versions fail on gcc. b/522
// #define HAS_I422TOARGBROW_AVX2
// #define HAS_I422TORGB24ROW_AVX2
// #define HAS_I422TORGBAROW_AVX2
#define HAS_I422TOARGBROW_AVX2
#define HAS_I422TORGB24ROW_AVX2
#define HAS_I422TORGBAROW_AVX2
#define HAS_INTERPOLATEROW_AVX2
#define HAS_J422TOARGBROW_AVX2
#define HAS_MERGEUVROW_AVX2
@ -241,9 +239,9 @@ extern "C" {
#define HAS_ARGBTOARGB4444ROW_AVX2
#define HAS_ARGBTORGB565ROW_AVX2
#define HAS_I411TOARGBROW_AVX2
// #define HAS_I422TOARGB1555ROW_AVX2
// #define HAS_I422TOARGB4444ROW_AVX2
// #define HAS_I422TORGB565ROW_AVX2
#define HAS_I422TOARGB1555ROW_AVX2
#define HAS_I422TOARGB4444ROW_AVX2
#define HAS_I422TORGB565ROW_AVX2
#define HAS_I444TOARGBROW_AVX2
#define HAS_J400TOARGBROW_AVX2
#define HAS_NV12TORGB565ROW_AVX2

View File

@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1535
#define LIBYUV_VERSION 1536
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT

View File

@ -1077,11 +1077,11 @@ const YuvConstants SIMD_ALIGNED(kYuvI601Constants) = {
};
const YuvConstants SIMD_ALIGNED(kYvuI601Constants) = {
{ VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, UB, 0 },
VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0 },
{ VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG },
{ 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB,
0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, VR },
0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB },
{ BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR },
{ BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG },
{ BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB },
@ -1164,11 +1164,11 @@ const YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = {
};
const YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = {
{ VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, UB, 0 },
VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0 },
{ VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG },
{ 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB,
0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, VR },
0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB },
{ BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR },
{ BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG },
{ BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB },
@ -1252,11 +1252,11 @@ const YuvConstants SIMD_ALIGNED(kYuvH709Constants) = {
};
const YuvConstants SIMD_ALIGNED(kYvuH709Constants) = {
{ VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, UB, 0 },
VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0 },
{ VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG },
{ 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB,
0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, VR },
0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB },
{ BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR },
{ BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG },
{ BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB },

View File

@ -1792,4 +1792,78 @@ TESTPLANARTOE(I422, 2, 1, UYVY, 2, ARGB, 4)
// TESTPLANARTOE(I420, 2, 2, ARGB, 4, I400, 1)
// TESTPLANARTOE(J420, 2, 2, ARGB, 4, J400, 1)
#define TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
W1280, N, NEG, OFF, FMT_C, BPP_C, ATTEN) \
TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##_##FMT_C##N) { \
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
const int kHeight = benchmark_height_; \
const int kStrideB = kWidth * BPP_B; \
const int kSizeUV = \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y); \
align_buffer_64(src_y, kWidth * kHeight + OFF); \
align_buffer_64(src_u, kSizeUV + OFF); \
align_buffer_64(src_v, kSizeUV + OFF); \
align_buffer_64(src_a, kWidth * kHeight + OFF); \
align_buffer_64(dst_argb_b, kStrideB * kHeight + OFF); \
for (int i = 0; i < kWidth * kHeight; ++i) { \
src_y[i + OFF] = (fastrand() & 0xff); \
src_a[i + OFF] = (fastrand() & 0xff); \
} \
for (int i = 0; i < kSizeUV; ++i) { \
src_u[i + OFF] = (fastrand() & 0xff); \
src_v[i + OFF] = (fastrand() & 0xff); \
} \
memset(dst_argb_b + OFF, 1, kStrideB * kHeight); \
for (int i = 0; i < benchmark_iterations_; ++i) { \
FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \
src_u + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \
src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \
src_a + OFF, kWidth, \
dst_argb_b + OFF, kStrideB, \
kWidth, NEG kHeight, ATTEN); \
} \
int max_diff = 0; \
/* Convert to a 3rd format in 1 step and 2 steps and compare */ \
const int kStrideC = kWidth * BPP_C; \
align_buffer_64(dst_argb_c, kStrideC * kHeight + OFF); \
align_buffer_64(dst_argb_bc, kStrideC * kHeight + OFF); \
memset(dst_argb_c + OFF, 2, kStrideC * kHeight); \
memset(dst_argb_bc + OFF, 3, kStrideC * kHeight); \
FMT_PLANAR##To##FMT_C(src_y + OFF, kWidth, \
src_u + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \
src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \
src_a + OFF, kWidth, \
dst_argb_c + OFF, kStrideC, \
kWidth, NEG kHeight, ATTEN); \
/* Convert B to C */ \
FMT_B##To##FMT_C(dst_argb_b + OFF, kStrideB, \
dst_argb_bc + OFF, kStrideC, \
kWidth, kHeight); \
for (int i = 0; i < kStrideC * kHeight; ++i) { \
EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_bc[i + OFF]); \
} \
free_aligned_buffer_64(src_y); \
free_aligned_buffer_64(src_u); \
free_aligned_buffer_64(src_v); \
free_aligned_buffer_64(dst_argb_b); \
free_aligned_buffer_64(dst_argb_c); \
free_aligned_buffer_64(dst_argb_bc); \
}
#define TESTQPLANARTOE(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
FMT_C, BPP_C) \
TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
benchmark_width_ - 4, _Any, +, 0, FMT_C, BPP_C, 0) \
TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
benchmark_width_, _Unaligned, +, 1, FMT_C, BPP_C, 0) \
TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
benchmark_width_, _Invert, -, 0, FMT_C, BPP_C, 0) \
TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
benchmark_width_, _Opt, +, 0, FMT_C, BPP_C, 0) \
TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
benchmark_width_, _Premult, +, 0, FMT_C, BPP_C, 1)
TESTQPLANARTOE(I420Alpha, 2, 2, ARGB, 4, ABGR, 4)
TESTQPLANARTOE(I420Alpha, 2, 2, ABGR, 4, ARGB, 4)
} // namespace libyuv