From afd1d6b4ec467aef31f4605bc7c6be2b130036fc Mon Sep 17 00:00:00 2001 From: "fbarchard@google.com" Date: Fri, 20 Sep 2013 01:00:54 +0000 Subject: [PATCH] Fix 2 bugs with Luma scale BUG=267 TEST=luma unittest improved R=ryanpetrie@google.com Review URL: https://webrtc-codereview.appspot.com/2260005 git-svn-id: http://libyuv.googlecode.com/svn/trunk@794 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- README.chromium | 2 +- include/libyuv/row.h | 3 +- include/libyuv/version.h | 2 +- source/planar_functions.cc | 2 +- source/row_win.cc | 2 +- unit_test/planar_test.cc | 120 ++++++++++++++++++++++++------------- 6 files changed, 84 insertions(+), 47 deletions(-) diff --git a/README.chromium b/README.chromium index 92fb87c4b..6c9bd9260 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 793 +Version: 794 License: BSD License File: LICENSE diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 9a2768f7e..46983bf2f 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -146,7 +146,8 @@ extern "C" { // TODO(fbarchard): Port to gcc. #if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) // Effects: -#define HAS_ARGBLUMACOLORTABLEROW_SSSE3 +// TODO(fbarchard): Optimize and enable +// #define HAS_ARGBLUMACOLORTABLEROW_SSSE3 // Caveat: Visual C 2012 required for AVX2. #if _MSC_VER >= 1700 diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 3ce716bec..721d69058 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 793 +#define LIBYUV_VERSION 794 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/planar_functions.cc b/source/planar_functions.cc index 0fef75159..cd95f758a 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -2090,7 +2090,7 @@ int ARGBLumaColorTable(const uint8* src_argb, int src_stride_argb, uint8* dst_argb, const uint8* luma, int width) = ARGBLumaColorTableRow_C; #if defined(HAS_ARGBLUMACOLORTABLEROW_SSSE3) - if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4)) { + if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4)) { ARGBLumaColorTableRow = ARGBLumaColorTableRow_SSSE3; } #endif diff --git a/source/row_win.cc b/source/row_win.cc index 677dd1c2e..c03dd6b84 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -6893,8 +6893,8 @@ void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb, const uint8* luma, int width) { SIMD_ALIGNED(uint8* lut4[4]); - ARGBToYJx4_SSSE3(src_argb, luma, lut4); for (int i = 0; i < width - 3; i += 4) { + ARGBToYJx4_SSSE3(src_argb, luma, lut4); // Luminance in rows, color values in columns. const uint8* luma0 = lut4[0]; dst_argb[0] = luma0[src_argb[0]]; diff --git a/unit_test/planar_test.cc b/unit_test/planar_test.cc index e67c87fb6..28d156fa6 100644 --- a/unit_test/planar_test.cc +++ b/unit_test/planar_test.cc @@ -1658,7 +1658,8 @@ TEST_F(libyuvTest, ARGBBlur_Opt) { TEST_F(libyuvTest, TestARGBPolynomial) { SIMD_ALIGNED(uint8 orig_pixels[1280][4]); - SIMD_ALIGNED(uint8 dst_pixels[1280][4]); + SIMD_ALIGNED(uint8 dst_pixels_opt[1280][4]); + SIMD_ALIGNED(uint8 dst_pixels_c[1280][4]); memset(orig_pixels, 0, sizeof(orig_pixels)); SIMD_ALIGNED(static const float kWarmifyPolynomial[16]) = { @@ -1683,30 +1684,39 @@ TEST_F(libyuvTest, TestARGBPolynomial) { orig_pixels[2][1] = 0u; orig_pixels[2][2] = 255u; orig_pixels[2][3] = 255u; + // Test white + orig_pixels[3][0] = 255u; + orig_pixels[3][1] = 255u; + orig_pixels[3][2] = 255u; + orig_pixels[3][3] = 255u; // Test color - orig_pixels[3][0] = 16u; - orig_pixels[3][1] = 64u; - orig_pixels[3][2] = 192u; - orig_pixels[3][3] = 224u; + orig_pixels[4][0] = 16u; + orig_pixels[4][1] = 64u; + orig_pixels[4][2] = 192u; + orig_pixels[4][3] = 224u; // Do 16 to test asm version. - ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels[0][0], 0, + ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0, &kWarmifyPolynomial[0], 16, 1); - EXPECT_EQ(235u, dst_pixels[0][0]); - EXPECT_EQ(0u, dst_pixels[0][1]); - EXPECT_EQ(0u, dst_pixels[0][2]); - EXPECT_EQ(128u, dst_pixels[0][3]); - EXPECT_EQ(0u, dst_pixels[1][0]); - EXPECT_EQ(233u, dst_pixels[1][1]); - EXPECT_EQ(0u, dst_pixels[1][2]); - EXPECT_EQ(0u, dst_pixels[1][3]); - EXPECT_EQ(0u, dst_pixels[2][0]); - EXPECT_EQ(0u, dst_pixels[2][1]); - EXPECT_EQ(241u, dst_pixels[2][2]); - EXPECT_EQ(255u, dst_pixels[2][3]); - EXPECT_EQ(10u, dst_pixels[3][0]); - EXPECT_EQ(59u, dst_pixels[3][1]); - EXPECT_EQ(188u, dst_pixels[3][2]); - EXPECT_EQ(224u, dst_pixels[3][3]); + EXPECT_EQ(235u, dst_pixels_opt[0][0]); + EXPECT_EQ(0u, dst_pixels_opt[0][1]); + EXPECT_EQ(0u, dst_pixels_opt[0][2]); + EXPECT_EQ(128u, dst_pixels_opt[0][3]); + EXPECT_EQ(0u, dst_pixels_opt[1][0]); + EXPECT_EQ(233u, dst_pixels_opt[1][1]); + EXPECT_EQ(0u, dst_pixels_opt[1][2]); + EXPECT_EQ(0u, dst_pixels_opt[1][3]); + EXPECT_EQ(0u, dst_pixels_opt[2][0]); + EXPECT_EQ(0u, dst_pixels_opt[2][1]); + EXPECT_EQ(241u, dst_pixels_opt[2][2]); + EXPECT_EQ(255u, dst_pixels_opt[2][3]); + EXPECT_EQ(235u, dst_pixels_opt[3][0]); + EXPECT_EQ(233u, dst_pixels_opt[3][1]); + EXPECT_EQ(241u, dst_pixels_opt[3][2]); + EXPECT_EQ(255u, dst_pixels_opt[3][3]); + EXPECT_EQ(10u, dst_pixels_opt[4][0]); + EXPECT_EQ(59u, dst_pixels_opt[4][1]); + EXPECT_EQ(188u, dst_pixels_opt[4][2]); + EXPECT_EQ(224u, dst_pixels_opt[4][3]); for (int i = 0; i < 1280; ++i) { orig_pixels[i][0] = i; @@ -1714,15 +1724,29 @@ TEST_F(libyuvTest, TestARGBPolynomial) { orig_pixels[i][2] = i / 3; orig_pixels[i][3] = i; } + + MaskCpuFlags(0); + ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0, + &kWarmifyPolynomial[0], 1280, 1); + MaskCpuFlags(-1); + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { - ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels[0][0], 0, + ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0, &kWarmifyPolynomial[0], 1280, 1); } + + for (int i = 0; i < 1280; ++i) { + EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]); + EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]); + EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]); + EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]); + } } TEST_F(libyuvTest, TestARGBLumaColorTable) { SIMD_ALIGNED(uint8 orig_pixels[1280][4]); - SIMD_ALIGNED(uint8 dst_pixels[1280][4]); + SIMD_ALIGNED(uint8 dst_pixels_opt[1280][4]); + SIMD_ALIGNED(uint8 dst_pixels_c[1280][4]); memset(orig_pixels, 0, sizeof(orig_pixels)); SIMD_ALIGNED(uint8 kLumaColorTable[32768]); @@ -1752,24 +1776,24 @@ TEST_F(libyuvTest, TestARGBLumaColorTable) { orig_pixels[3][2] = 192u; orig_pixels[3][3] = 224u; // Do 16 to test asm version. - ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels[0][0], 0, + ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0, &kLumaColorTable[0], 16, 1); - EXPECT_EQ(253u, dst_pixels[0][0]); - EXPECT_EQ(0u, dst_pixels[0][1]); - EXPECT_EQ(0u, dst_pixels[0][2]); - EXPECT_EQ(128u, dst_pixels[0][3]); - EXPECT_EQ(0u, dst_pixels[1][0]); - EXPECT_EQ(253u, dst_pixels[1][1]); - EXPECT_EQ(0u, dst_pixels[1][2]); - EXPECT_EQ(0u, dst_pixels[1][3]); - EXPECT_EQ(0u, dst_pixels[2][0]); - EXPECT_EQ(0u, dst_pixels[2][1]); - EXPECT_EQ(253u, dst_pixels[2][2]); - EXPECT_EQ(255u, dst_pixels[2][3]); - EXPECT_EQ(48u, dst_pixels[3][0]); - EXPECT_EQ(192u, dst_pixels[3][1]); - EXPECT_EQ(64u, dst_pixels[3][2]); - EXPECT_EQ(224u, dst_pixels[3][3]); + EXPECT_EQ(253u, dst_pixels_opt[0][0]); + EXPECT_EQ(0u, dst_pixels_opt[0][1]); + EXPECT_EQ(0u, dst_pixels_opt[0][2]); + EXPECT_EQ(128u, dst_pixels_opt[0][3]); + EXPECT_EQ(0u, dst_pixels_opt[1][0]); + EXPECT_EQ(253u, dst_pixels_opt[1][1]); + EXPECT_EQ(0u, dst_pixels_opt[1][2]); + EXPECT_EQ(0u, dst_pixels_opt[1][3]); + EXPECT_EQ(0u, dst_pixels_opt[2][0]); + EXPECT_EQ(0u, dst_pixels_opt[2][1]); + EXPECT_EQ(253u, dst_pixels_opt[2][2]); + EXPECT_EQ(255u, dst_pixels_opt[2][3]); + EXPECT_EQ(48u, dst_pixels_opt[3][0]); + EXPECT_EQ(192u, dst_pixels_opt[3][1]); + EXPECT_EQ(64u, dst_pixels_opt[3][2]); + EXPECT_EQ(224u, dst_pixels_opt[3][3]); for (int i = 0; i < 1280; ++i) { orig_pixels[i][0] = i; @@ -1777,10 +1801,22 @@ TEST_F(libyuvTest, TestARGBLumaColorTable) { orig_pixels[i][2] = i / 3; orig_pixels[i][3] = i; } + + MaskCpuFlags(0); + ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0, + &kLumaColorTable[0], 1280, 1); + MaskCpuFlags(-1); + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { - ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels[0][0], 0, + ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0, &kLumaColorTable[0], 1280, 1); } + for (int i = 0; i < 1280; ++i) { + EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]); + EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]); + EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]); + EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]); + } }