diff --git a/README.chromium b/README.chromium index 5afb157a8..9f34c0425 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 578 +Version: 579 License: BSD License File: LICENSE diff --git a/include/libyuv/row.h b/include/libyuv/row.h index ae1ba00db..85cf923d2 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -1318,6 +1318,8 @@ void ARGBAttenuateRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb, int width); void ARGBAttenuateRow_Any_SSSE3(const uint8* src_argb, uint8* dst_argb, int width); +void ARGBAttenuateRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb, + int width); void ARGBAttenuateRow_Any_NEON(const uint8* src_argb, uint8* dst_argb, int width); @@ -1326,6 +1328,10 @@ extern uint32 fixed_invtbl8[256]; void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width); void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width); void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width); +void ARGBUnattenuateRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb, + int width); +void ARGBUnattenuateRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb, + int width); void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width); void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width); diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 5849fc527..9e158dc47 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 578 +#define LIBYUV_VERSION 579 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/planar_functions.cc b/source/planar_functions.cc index 075c937f0..357b081a5 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -1034,9 +1034,12 @@ int ARGBAttenuate(const uint8* src_argb, int src_stride_argb, #endif #if defined(HAS_ARGBATTENUATEROW_AVX2) bool clear = false; - if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 8)) { - bool clear = true; - ARGBAttenuateRow = ARGBAttenuateRow_AVX2; + if (TestCpuFlag(kCpuHasAVX2) && width >= 8) { + clear = true; + ARGBAttenuateRow = ARGBAttenuateRow_Any_AVX2; + if (IS_ALIGNED(width, 8)) { + ARGBAttenuateRow = ARGBAttenuateRow_AVX2; + } } #endif #if defined(HAS_ARGBATTENUATEROW_NEON) @@ -1077,19 +1080,25 @@ int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb, src_stride_argb = -src_stride_argb; } void (*ARGBUnattenuateRow)(const uint8* src_argb, uint8* dst_argb, - int width) = ARGBUnattenuateRow_C; + int width) = ARGBUnattenuateRow_C; #if defined(HAS_ARGBUNATTENUATEROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4) && + if (TestCpuFlag(kCpuHasSSE2) && width >= 4 && IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) && IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { - ARGBUnattenuateRow = ARGBUnattenuateRow_SSE2; + ARGBUnattenuateRow = ARGBUnattenuateRow_Any_SSE2; + if (IS_ALIGNED(width, 4)) { + ARGBUnattenuateRow = ARGBUnattenuateRow_SSE2; + } } #endif #if defined(HAS_ARGBUNATTENUATEROW_AVX2) bool clear = false; - if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 8)) { - bool clear = true; - ARGBUnattenuateRow = ARGBUnattenuateRow_AVX2; + if (TestCpuFlag(kCpuHasAVX2) && width >= 8) { + clear = true; + ARGBUnattenuateRow = ARGBUnattenuateRow_Any_AVX2; + if (IS_ALIGNED(width, 8)) { + ARGBUnattenuateRow = ARGBUnattenuateRow_AVX2; + } } #endif // TODO(fbarchard): Neon version. @@ -1099,6 +1108,13 @@ int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb, src_argb += src_stride_argb; dst_argb += dst_stride_argb; } + +#if defined(HAS_ARGBUNATTENUATEROW_AVX2) + if (clear) { + __asm vzeroupper; + } +#endif + return 0; } diff --git a/source/row_any.cc b/source/row_any.cc index ffbc8f399..6b434f384 100644 --- a/source/row_any.cc +++ b/source/row_any.cc @@ -250,6 +250,18 @@ YANY(ARGBAttenuateRow_Any_SSSE3, ARGBAttenuateRow_SSSE3, ARGBAttenuateRow_C, YANY(ARGBAttenuateRow_Any_SSE2, ARGBAttenuateRow_SSE2, ARGBAttenuateRow_C, 4, 4, 3) #endif +#ifdef HAS_ARGBUNATTENUATEROW_SSE2 +YANY(ARGBUnattenuateRow_Any_SSE2, ARGBUnattenuateRow_SSE2, ARGBUnattenuateRow_C, + 4, 4, 3) +#endif +#ifdef HAS_ARGBATTENUATEROW_AVX2 +YANY(ARGBAttenuateRow_Any_AVX2, ARGBAttenuateRow_AVX2, ARGBAttenuateRow_C, + 4, 4, 7) +#endif +#ifdef HAS_ARGBUNATTENUATEROW_AVX2 +YANY(ARGBUnattenuateRow_Any_AVX2, ARGBUnattenuateRow_AVX2, ARGBUnattenuateRow_C, + 4, 4, 7) +#endif #ifdef HAS_ARGBATTENUATEROW_NEON YANY(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, ARGBAttenuateRow_C, 4, 4, 7) diff --git a/unit_test/planar_test.cc b/unit_test/planar_test.cc index 8089f9cb9..e3bbacccc 100644 --- a/unit_test/planar_test.cc +++ b/unit_test/planar_test.cc @@ -59,9 +59,9 @@ TEST_F(libyuvTest, TestAttenuate) { EXPECT_EQ(255u, unatten_pixels[0][1]); EXPECT_EQ(254u, unatten_pixels[0][2]); EXPECT_EQ(128u, unatten_pixels[0][3]); - EXPECT_EQ(16u, unatten_pixels[1][0]); - EXPECT_EQ(64u, unatten_pixels[1][1]); - EXPECT_EQ(192u, unatten_pixels[1][2]); + EXPECT_EQ(0u, unatten_pixels[1][0]); + EXPECT_EQ(0u, unatten_pixels[1][1]); + EXPECT_EQ(0u, unatten_pixels[1][2]); EXPECT_EQ(0u, unatten_pixels[1][3]); EXPECT_EQ(16u, unatten_pixels[2][0]); EXPECT_EQ(64u, unatten_pixels[2][1]);