Fix SSE2 version of ScalePlaneUp2_16_Bilinear

- Define HAS_SCALEROWUP2_BILINEAR_16_SSE2: it's now fixed.
- Correct function name to ScaleRowUp2_Bilinear_16_Any_SSE2:
   this row function uses only SSE2 instructions.

Bug: libyuv:882
Change-Id: Ib1c7ac5b09997cb5b32bc54109d8c566af762433
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/3800842
Reviewed-by: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
Yuan Tong 2022-08-02 17:43:54 +08:00 committed by Frank Barchard
parent b028453ba6
commit 98ec7c28d5
4 changed files with 6 additions and 7 deletions

View File

@ -83,8 +83,7 @@ extern "C" {
#define HAS_SCALEROWUP2_LINEAR_12_SSSE3
#define HAS_SCALEROWUP2_BILINEAR_12_SSSE3
#define HAS_SCALEROWUP2_LINEAR_16_SSE2
// TODO(libyuv:882): Fix SSE2 version of BILINEAR
//#define HAS_SCALEROWUP2_BILINEAR_16_SSE2
#define HAS_SCALEROWUP2_BILINEAR_16_SSE2
#define HAS_SCALEUVROWUP2_LINEAR_SSSE3
#define HAS_SCALEUVROWUP2_BILINEAR_SSSE3
#define HAS_SCALEUVROWUP2_LINEAR_16_SSE41
@ -693,7 +692,7 @@ void ScaleRowUp2_Bilinear_12_Any_SSSE3(const uint16_t* src_ptr,
void ScaleRowUp2_Linear_16_Any_SSE2(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_16_Any_SSSE3(const uint16_t* src_ptr,
void ScaleRowUp2_Bilinear_16_Any_SSE2(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,

View File

@ -1654,8 +1654,8 @@ void ScalePlaneUp2_16_Bilinear(int src_width,
assert(src_height == ((dst_height + 1) / 2));
#ifdef HAS_SCALEROWUP2_BILINEAR_16_SSE2
if (TestCpuFlag(kCpuHasSSSE3)) {
Scale2RowUp = ScaleRowUp2_Bilinear_16_Any_SSSE3;
if (TestCpuFlag(kCpuHasSSE2)) {
Scale2RowUp = ScaleRowUp2_Bilinear_16_Any_SSE2;
}
#endif

View File

@ -813,7 +813,7 @@ SU2BLANY(ScaleRowUp2_Bilinear_12_Any_SSSE3,
#endif
#ifdef HAS_SCALEROWUP2_BILINEAR_16_SSE2
SU2BLANY(ScaleRowUp2_Bilinear_16_Any_SSSE3,
SU2BLANY(ScaleRowUp2_Bilinear_16_Any_SSE2,
ScaleRowUp2_Bilinear_16_SSE2,
ScaleRowUp2_Bilinear_16_C,
7,

View File

@ -1242,7 +1242,7 @@ void ScaleRowUp2_Bilinear_16_SSE2(const uint16_t* src_ptr,
"pshufd $0b11011000,%%xmm4,%%xmm4 \n"
"movdqu %%xmm4,(%1) \n" // store above
"packssdw %%xmm2,%%xmm5 \n"
"pshufd $0b11011000,%%xmm4,%%xmm4 \n"
"pshufd $0b11011000,%%xmm5,%%xmm5 \n"
"movdqu %%xmm5,(%1,%4,2) \n" // store below
"lea 0x8(%0),%0 \n"