From 9487b9d6d873ef5ba593a7356b29f334394521e2 Mon Sep 17 00:00:00 2001 From: Frank Barchard Date: Wed, 1 Jul 2015 17:50:48 -0700 Subject: [PATCH] any allow for avx2 32 pixels at a time of argb R=harryjin@google.com BUG=libyuv:461 Review URL: https://webrtc-codereview.appspot.com/54779004. --- README.chromium | 2 +- include/libyuv/version.h | 2 +- source/row_any.cc | 25 +++++++++++++------------ unit_test/color_test.cc | 14 +++++++------- 4 files changed, 22 insertions(+), 21 deletions(-) diff --git a/README.chromium b/README.chromium index 4d25adf17..4a123b6bb 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1442 +Version: 1443 License: BSD License File: LICENSE diff --git a/include/libyuv/version.h b/include/libyuv/version.h index e3bc15012..3d63eebe9 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1442 +#define LIBYUV_VERSION 1443 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/row_any.cc b/source/row_any.cc index 482a98e6d..279b889c5 100644 --- a/source/row_any.cc +++ b/source/row_any.cc @@ -224,16 +224,16 @@ ANY21(SobelXYRow_Any_NEON, SobelXYRow_NEON, 0, 1, 1, 4, 7) // Any 1 to 1. #define ANY11(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \ void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) { \ - SIMD_ALIGNED(uint8 temp[64 * 2]); \ - memset(temp, 0, 64); /* for YUY2 and msan */ \ + SIMD_ALIGNED(uint8 temp[128 * 2]); \ + memset(temp, 0, 128); /* for YUY2 and msan */ \ int r = width & MASK; \ int n = width & ~MASK; \ if (n > 0) { \ ANY_SIMD(src_ptr, dst_ptr, n); \ } \ memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \ - ANY_SIMD(temp, temp + 64, MASK + 1); \ - memcpy(dst_ptr + n * BPP, temp + 64, r * BPP); \ + ANY_SIMD(temp, temp + 128, MASK + 1); \ + memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \ } #ifdef HAS_COPYROW_AVX @@ -593,28 +593,29 @@ ANY12(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON, 1, 4, 1, 15) #undef ANY12 // Any 1 to 2 with source stride (2 rows of source). Outputs UV planes. +// 128 byte row allows for 32 avx ARGB pixels. #define ANY12S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK) \ void NAMEANY(const uint8* src_ptr, int src_stride_ptr, \ uint8* dst_u, uint8* dst_v, int width) { \ - SIMD_ALIGNED(uint8 temp[64 * 4]); \ - memset(temp, 0, 64 * 2); /* for msan */ \ + SIMD_ALIGNED(uint8 temp[128 * 4]); \ + memset(temp, 0, 128 * 2); /* for msan */ \ int r = width & MASK; \ int n = width & ~MASK; \ if (n > 0) { \ ANY_SIMD(src_ptr, src_stride_ptr, dst_u, dst_v, n); \ } \ memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \ - memcpy(temp + 64, src_ptr + src_stride_ptr + (n >> UVSHIFT) * BPP, \ + memcpy(temp + 128, src_ptr + src_stride_ptr + (n >> UVSHIFT) * BPP, \ SS(r, UVSHIFT) * BPP); \ if ((width & 1) && BPP == 4) { /* repeat last 4 bytes for subsampler */ \ memcpy(temp + SS(r, UVSHIFT) * BPP, \ temp + SS(r, UVSHIFT) * BPP - BPP, 4); \ - memcpy(temp + 64 + SS(r, UVSHIFT) * BPP, \ - temp + 64 + SS(r, UVSHIFT) * BPP - BPP, 4); \ + memcpy(temp + 128 + SS(r, UVSHIFT) * BPP, \ + temp + 128 + SS(r, UVSHIFT) * BPP - BPP, 4); \ } \ - ANY_SIMD(temp, 64, temp + 128, temp + 192, MASK + 1); \ - memcpy(dst_u + (n >> 1), temp + 128, SS(r, 1)); \ - memcpy(dst_v + (n >> 1), temp + 192, SS(r, 1)); \ + ANY_SIMD(temp, 128, temp + 256, temp + 384, MASK + 1); \ + memcpy(dst_u + (n >> 1), temp + 256, SS(r, 1)); \ + memcpy(dst_v + (n >> 1), temp + 384, SS(r, 1)); \ } #ifdef HAS_ARGBTOUVROW_AVX2 diff --git a/unit_test/color_test.cc b/unit_test/color_test.cc index da3f34a58..412c3dd09 100644 --- a/unit_test/color_test.cc +++ b/unit_test/color_test.cc @@ -34,7 +34,7 @@ namespace libyuv { #define ERROR_FULL 5 #endif -#define TESTCS(TESTNAME, YUVTOARGB, ARGBTOYUV, HS1, HS, HN, DIFF, CDIFF) \ +#define TESTCS(TESTNAME, YUVTOARGB, ARGBTOYUV, HS1, HS, HN, DIFF) \ TEST_F(libyuvTest, TESTNAME) { \ const int kPixels = benchmark_width_ * benchmark_height_; \ const int kHalfPixels = ((benchmark_width_ + 1) / 2) * \ @@ -93,7 +93,7 @@ TEST_F(libyuvTest, TESTNAME) { \ temp_v, (benchmark_width_ + 1) / 2, \ benchmark_width_, benchmark_height_); \ \ - MaskCpuFlags(disable_cpu_flags_); \ + MaskCpuFlags(disable_cpu_flags_); \ YUVTOARGB(temp_y, benchmark_width_, \ temp_u, (benchmark_width_ + 1) / 2, \ temp_v, (benchmark_width_ + 1) / 2, \ @@ -110,7 +110,7 @@ TEST_F(libyuvTest, TESTNAME) { \ } \ /* Test C and SIMD match. */ \ for (int i = 0; i < kPixels * 4; ++i) { \ - EXPECT_NEAR(dst_pixels_c[i], dst_pixels_opt[i], CDIFF); \ + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); \ } \ /* Test SIMD is close to original. */ \ for (int i = 0; i < kPixels * 4; ++i) { \ @@ -129,10 +129,10 @@ TEST_F(libyuvTest, TESTNAME) { \ free_aligned_buffer_64(dst_pixels_c); \ } \ -TESTCS(TestI420, I420ToARGB, ARGBToI420, 1, 2, benchmark_width_, ERROR_FULL, 0) -TESTCS(TestI422, I422ToARGB, ARGBToI422, 0, 1, 0, ERROR_FULL, 0) -TESTCS(TestJ420, J420ToARGB, ARGBToJ420, 1, 2, benchmark_width_, 3, 0) -TESTCS(TestJ422, J422ToARGB, ARGBToJ422, 0, 1, 0, 3, 0) +TESTCS(TestI420, I420ToARGB, ARGBToI420, 1, 2, benchmark_width_, ERROR_FULL) +TESTCS(TestI422, I422ToARGB, ARGBToI422, 0, 1, 0, ERROR_FULL) +TESTCS(TestJ420, J420ToARGB, ARGBToJ420, 1, 2, benchmark_width_, 3) +TESTCS(TestJ422, J422ToARGB, ARGBToJ422, 0, 1, 0, 3) static void YUVToRGB(int y, int u, int v, int* r, int* g, int* b) { const int kWidth = 16;