From ec1f854f8685d7754af5c731294c82e349225e22 Mon Sep 17 00:00:00 2001 From: "fbarchard@google.com" Date: Wed, 12 Nov 2014 01:45:27 +0000 Subject: [PATCH] Use broadcast to duplicate constants from 16 bytes to 32 bytes to save data space. BUG=none TESTED=intelsde R=brucedawson@google.com Review URL: https://webrtc-codereview.appspot.com/32029004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@1161 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- README.chromium | 2 +- include/libyuv/version.h | 2 +- source/row_win.cc | 15 ++++++--------- 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/README.chromium b/README.chromium index 36b1d8ad9..9393cd6a1 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1160 +Version: 1161 License: BSD License File: LICENSE diff --git a/include/libyuv/version.h b/include/libyuv/version.h index ad93f49bc..80b81afa0 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1160 +#define LIBYUV_VERSION 1161 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/row_win.cc b/source/row_win.cc index efeff7d3e..6c3e43ef5 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -3804,11 +3804,9 @@ void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) { #ifdef HAS_ARGBATTENUATEROW_AVX2 // Shuffle table duplicating alpha. -static const ulvec8 kShuffleAlpha_AVX2 = { +static const uvec8 kShuffleAlpha_AVX2 = { 6u, 7u, 6u, 7u, 6u, 7u, 128u, 128u, - 14u, 15u, 14u, 15u, 14u, 15u, 128u, 128u, - 6u, 7u, 6u, 7u, 6u, 7u, 128u, 128u, - 14u, 15u, 14u, 15u, 14u, 15u, 128u, 128u, + 14u, 15u, 14u, 15u, 14u, 15u, 128u, 128u }; __declspec(naked) __declspec(align(16)) void ARGBAttenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width) { @@ -3817,7 +3815,7 @@ void ARGBAttenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width) { mov edx, [esp + 8] // dst_argb mov ecx, [esp + 12] // width sub edx, eax - vmovdqa ymm4, kShuffleAlpha_AVX2 + vbroadcastf128 ymm4,kShuffleAlpha_AVX2 vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0xff000000 vpslld ymm5, ymm5, 24 @@ -3899,8 +3897,7 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, #ifdef HAS_ARGBUNATTENUATEROW_AVX2 // Shuffle table duplicating alpha. static const ulvec8 kUnattenShuffleAlpha_AVX2 = { - 0u, 1u, 0u, 1u, 0u, 1u, 6u, 7u, 8u, 9u, 8u, 9u, 8u, 9u, 14u, 15, - 0u, 1u, 0u, 1u, 0u, 1u, 6u, 7u, 8u, 9u, 8u, 9u, 8u, 9u, 14u, 15, + 0u, 1u, 0u, 1u, 0u, 1u, 6u, 7u, 8u, 9u, 8u, 9u, 8u, 9u, 14u, 15u }; // TODO(fbarchard): Enable USE_GATHER for future hardware if faster. // USE_GATHER is not on by default, due to being a slow instruction. @@ -3913,7 +3910,7 @@ void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, mov edx, [esp + 8] // dst_argb mov ecx, [esp + 12] // width sub edx, eax - vmovdqa ymm4, kUnattenShuffleAlpha_AVX2 + vbroadcastf128 ymm4, kUnattenShuffleAlpha_AVX2 align 4 convertloop: @@ -3949,7 +3946,7 @@ void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, mov edx, [esp + 8] // dst_argb mov ecx, [esp + 12] // width sub edx, eax - vmovdqa ymm5, kUnattenShuffleAlpha_AVX2 + vbroadcastf128 ymm5, kUnattenShuffleAlpha_AVX2 push esi push edi