From 6100f50f136829b9769baf79306b31e0665ec16a Mon Sep 17 00:00:00 2001 From: Frank Barchard Date: Tue, 10 Nov 2015 10:45:44 -0800 Subject: [PATCH] fix yvu constants for avx2 yuv to rgb the yvu matrix for yuv to rgb had an incorrect entry, affecting yuv to bgra, yuv to abgr and yuv to raw. fix the matrix and reenable avx2 functions. R=harryjin@google.com BUG=libyuv:522 Review URL: https://codereview.chromium.org/1411763004 . --- README.chromium | 2 +- include/libyuv/row.h | 16 ++++----- include/libyuv/version.h | 2 +- source/row_common.cc | 12 +++---- unit_test/convert_test.cc | 74 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 89 insertions(+), 17 deletions(-) diff --git a/README.chromium b/README.chromium index 7a5c0d57f..b2bfd5b59 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1535 +Version: 1536 License: BSD License File: LICENSE diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 888021043..5de839df1 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -200,13 +200,11 @@ extern "C" { #if !(defined(_DEBUG) && defined(__i386__)) // TODO(fbarchard): fix build error on android_full_debug=1 // https://code.google.com/p/libyuv/issues/detail?id=517 -// AVX2 versions fail on gcc. b/522 -// #define HAS_I422ALPHATOARGBROW_AVX2 +#define HAS_I422ALPHATOARGBROW_AVX2 #endif -// AVX2 versions fail on gcc. b/522 -// #define HAS_I422TOARGBROW_AVX2 -// #define HAS_I422TORGB24ROW_AVX2 -// #define HAS_I422TORGBAROW_AVX2 +#define HAS_I422TOARGBROW_AVX2 +#define HAS_I422TORGB24ROW_AVX2 +#define HAS_I422TORGBAROW_AVX2 #define HAS_INTERPOLATEROW_AVX2 #define HAS_J422TOARGBROW_AVX2 #define HAS_MERGEUVROW_AVX2 @@ -241,9 +239,9 @@ extern "C" { #define HAS_ARGBTOARGB4444ROW_AVX2 #define HAS_ARGBTORGB565ROW_AVX2 #define HAS_I411TOARGBROW_AVX2 -// #define HAS_I422TOARGB1555ROW_AVX2 -// #define HAS_I422TOARGB4444ROW_AVX2 -// #define HAS_I422TORGB565ROW_AVX2 +#define HAS_I422TOARGB1555ROW_AVX2 +#define HAS_I422TOARGB4444ROW_AVX2 +#define HAS_I422TORGB565ROW_AVX2 #define HAS_I444TOARGBROW_AVX2 #define HAS_J400TOARGBROW_AVX2 #define HAS_NV12TORGB565ROW_AVX2 diff --git a/include/libyuv/version.h b/include/libyuv/version.h index a30b7c8a7..26a9130c9 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1535 +#define LIBYUV_VERSION 1536 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/row_common.cc b/source/row_common.cc index 4cc7591c3..cb806b911 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -1077,11 +1077,11 @@ const YuvConstants SIMD_ALIGNED(kYuvI601Constants) = { }; const YuvConstants SIMD_ALIGNED(kYvuI601Constants) = { { VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, - VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, UB, 0 }, + VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0 }, { VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG }, { 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, - 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, VR }, + 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB }, { BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR }, { BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG }, { BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB }, @@ -1164,11 +1164,11 @@ const YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = { }; const YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = { { VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, - VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, UB, 0 }, + VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0 }, { VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG }, { 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, - 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, VR }, + 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB }, { BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR }, { BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG }, { BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB }, @@ -1252,11 +1252,11 @@ const YuvConstants SIMD_ALIGNED(kYuvH709Constants) = { }; const YuvConstants SIMD_ALIGNED(kYvuH709Constants) = { { VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, - VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, UB, 0 }, + VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0 }, { VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG }, { 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, - 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, VR }, + 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB }, { BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR }, { BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG }, { BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB }, diff --git a/unit_test/convert_test.cc b/unit_test/convert_test.cc index ffb277883..2988ad740 100644 --- a/unit_test/convert_test.cc +++ b/unit_test/convert_test.cc @@ -1792,4 +1792,78 @@ TESTPLANARTOE(I422, 2, 1, UYVY, 2, ARGB, 4) // TESTPLANARTOE(I420, 2, 2, ARGB, 4, I400, 1) // TESTPLANARTOE(J420, 2, 2, ARGB, 4, J400, 1) +#define TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ + W1280, N, NEG, OFF, FMT_C, BPP_C, ATTEN) \ +TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##_##FMT_C##N) { \ + const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ + const int kHeight = benchmark_height_; \ + const int kStrideB = kWidth * BPP_B; \ + const int kSizeUV = \ + SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y); \ + align_buffer_64(src_y, kWidth * kHeight + OFF); \ + align_buffer_64(src_u, kSizeUV + OFF); \ + align_buffer_64(src_v, kSizeUV + OFF); \ + align_buffer_64(src_a, kWidth * kHeight + OFF); \ + align_buffer_64(dst_argb_b, kStrideB * kHeight + OFF); \ + for (int i = 0; i < kWidth * kHeight; ++i) { \ + src_y[i + OFF] = (fastrand() & 0xff); \ + src_a[i + OFF] = (fastrand() & 0xff); \ + } \ + for (int i = 0; i < kSizeUV; ++i) { \ + src_u[i + OFF] = (fastrand() & 0xff); \ + src_v[i + OFF] = (fastrand() & 0xff); \ + } \ + memset(dst_argb_b + OFF, 1, kStrideB * kHeight); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \ + src_u + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \ + src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \ + src_a + OFF, kWidth, \ + dst_argb_b + OFF, kStrideB, \ + kWidth, NEG kHeight, ATTEN); \ + } \ + int max_diff = 0; \ + /* Convert to a 3rd format in 1 step and 2 steps and compare */ \ + const int kStrideC = kWidth * BPP_C; \ + align_buffer_64(dst_argb_c, kStrideC * kHeight + OFF); \ + align_buffer_64(dst_argb_bc, kStrideC * kHeight + OFF); \ + memset(dst_argb_c + OFF, 2, kStrideC * kHeight); \ + memset(dst_argb_bc + OFF, 3, kStrideC * kHeight); \ + FMT_PLANAR##To##FMT_C(src_y + OFF, kWidth, \ + src_u + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \ + src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \ + src_a + OFF, kWidth, \ + dst_argb_c + OFF, kStrideC, \ + kWidth, NEG kHeight, ATTEN); \ + /* Convert B to C */ \ + FMT_B##To##FMT_C(dst_argb_b + OFF, kStrideB, \ + dst_argb_bc + OFF, kStrideC, \ + kWidth, kHeight); \ + for (int i = 0; i < kStrideC * kHeight; ++i) { \ + EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_bc[i + OFF]); \ + } \ + free_aligned_buffer_64(src_y); \ + free_aligned_buffer_64(src_u); \ + free_aligned_buffer_64(src_v); \ + free_aligned_buffer_64(dst_argb_b); \ + free_aligned_buffer_64(dst_argb_c); \ + free_aligned_buffer_64(dst_argb_bc); \ +} + +#define TESTQPLANARTOE(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ + FMT_C, BPP_C) \ + TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ + benchmark_width_ - 4, _Any, +, 0, FMT_C, BPP_C, 0) \ + TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ + benchmark_width_, _Unaligned, +, 1, FMT_C, BPP_C, 0) \ + TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ + benchmark_width_, _Invert, -, 0, FMT_C, BPP_C, 0) \ + TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ + benchmark_width_, _Opt, +, 0, FMT_C, BPP_C, 0) \ + TESTQPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ + benchmark_width_, _Premult, +, 0, FMT_C, BPP_C, 1) + +TESTQPLANARTOE(I420Alpha, 2, 2, ARGB, 4, ABGR, 4) +TESTQPLANARTOE(I420Alpha, 2, 2, ABGR, 4, ARGB, 4) + } // namespace libyuv