Make constant 0x0101 using vpcmpeqb+vpabsb

Was
      vpcmpeqb    %%ymm4,%%ymm4,%%ymm4
      vpsrlw      $0xf,%%ymm4,%%ymm4
      vpackuswb   %%ymm4,%%ymm4,%%ymm4
Now
      vpcmpeqb    %%ymm4,%%ymm4,%%ymm4
      vpabsb      %%ymm4,%%ymm4

Bug: 381138208
Change-Id: Ib70c24ac636fff95a10c7f06ed8f0a3bc7514906
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/6312925
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Reviewed-by: Justin Green <greenjustin@google.com>
This commit is contained in:
Frank Barchard 2025-03-10 11:51:56 -07:00 committed by libyuv LUCI CQ
parent c060118bea
commit 918329caee
3 changed files with 23 additions and 26 deletions

View File

@ -3418,19 +3418,23 @@ int RAWToJ420(const uint8_t* src_raw,
}
}
#endif
#if defined(HAS_RAWTOYJROW_LSX)
#if defined(HAS_RAWTOYJROW_LSX) && defined(HAS_RAWTOUVJROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
RAWToUVJRow = RAWToUVJRow_Any_LSX;
RAWToYJRow = RAWToYJRow_Any_LSX;
if (IS_ALIGNED(width, 16)) {
RAWToYJRow = RAWToYJRow_LSX;
RAWToUVJRow = RAWToUVJRow_LSX;
}
}
#endif
#if defined(HAS_RAWTOYJROW_LASX)
#if defined(HAS_RAWTOYJROW_LASX) && defined(HAS_RAWTOUVJROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
RAWToUVJRow = RAWToUVJRow_Any_LASX;
RAWToYJRow = RAWToYJRow_Any_LASX;
if (IS_ALIGNED(width, 32)) {
RAWToYJRow = RAWToYJRow_LASX;
RAWToUVJRow = RAWToUVJRow_LASX;
}
}
#endif

View File

@ -9396,9 +9396,8 @@ void HalfMergeUVRow_SSSE3(const uint8_t* src_u,
uint8_t* dst_uv,
int width) {
asm volatile(
"pcmpeqb %%xmm4,%%xmm4 \n"
"psrlw $0xf,%%xmm4 \n"
"packuswb %%xmm4,%%xmm4 \n"
"pcmpeqb %%xmm4,%%xmm4 \n" // 0x0101
"pabsb %%xmm4,%%xmm4 \n"
"pxor %%xmm5,%%xmm5 \n"
LABELALIGN
@ -9443,8 +9442,7 @@ void HalfMergeUVRow_AVX2(const uint8_t* src_u,
int width) {
asm volatile(
"vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n"
"vpsrlw $0xf,%%ymm4,%%ymm4 \n"
"vpackuswb %%ymm4,%%ymm4,%%ymm4 \n"
"vpabsb %%ymm4,%%ymm4 \n"
"vpxor %%ymm5,%%ymm5,%%ymm5 \n"
LABELALIGN

View File

@ -126,9 +126,9 @@ void ScaleRowDown2Linear_SSSE3(const uint8_t* src_ptr,
int dst_width) {
(void)src_stride;
asm volatile(
"pcmpeqb %%xmm4,%%xmm4 \n"
"psrlw $0xf,%%xmm4 \n"
"packuswb %%xmm4,%%xmm4 \n"
"pcmpeqb %%xmm4,%%xmm4 \n" // 0x0101
"pabsb %%xmm4,%%xmm4 \n"
"pxor %%xmm5,%%xmm5 \n"
LABELALIGN
@ -157,9 +157,8 @@ void ScaleRowDown2Box_SSSE3(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int dst_width) {
asm volatile(
"pcmpeqb %%xmm4,%%xmm4 \n"
"psrlw $0xf,%%xmm4 \n"
"packuswb %%xmm4,%%xmm4 \n"
"pcmpeqb %%xmm4,%%xmm4 \n" // 0x0101
"pabsb %%xmm4,%%xmm4 \n"
"pxor %%xmm5,%%xmm5 \n"
LABELALIGN
@ -225,8 +224,7 @@ void ScaleRowDown2Linear_AVX2(const uint8_t* src_ptr,
(void)src_stride;
asm volatile(
"vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n"
"vpsrlw $0xf,%%ymm4,%%ymm4 \n"
"vpackuswb %%ymm4,%%ymm4,%%ymm4 \n"
"vpabsb %%ymm4,%%ymm4 \n"
"vpxor %%ymm5,%%ymm5,%%ymm5 \n"
LABELALIGN
@ -258,8 +256,7 @@ void ScaleRowDown2Box_AVX2(const uint8_t* src_ptr,
int dst_width) {
asm volatile(
"vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n"
"vpsrlw $0xf,%%ymm4,%%ymm4 \n"
"vpackuswb %%ymm4,%%ymm4,%%ymm4 \n"
"vpabsb %%ymm4,%%ymm4 \n"
"vpxor %%ymm5,%%ymm5,%%ymm5 \n"
LABELALIGN
@ -332,10 +329,9 @@ void ScaleRowDown4Box_SSSE3(const uint8_t* src_ptr,
intptr_t stridex3;
asm volatile(
"pcmpeqb %%xmm4,%%xmm4 \n"
"psrlw $0xf,%%xmm4 \n"
"movdqa %%xmm4,%%xmm5 \n"
"packuswb %%xmm4,%%xmm4 \n"
"psllw $0x3,%%xmm5 \n"
"pabsw %%xmm4,%%xmm5 \n"
"pabsb %%xmm4,%%xmm4 \n" // 0x0101
"psllw $0x3,%%xmm5 \n" // 0x0008
"lea 0x00(%4,%4,2),%3 \n"
LABELALIGN
@ -420,9 +416,9 @@ void ScaleRowDown4Box_AVX2(const uint8_t* src_ptr,
int dst_width) {
asm volatile(
"vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n"
"vpsrlw $0xf,%%ymm4,%%ymm4 \n"
"vpsllw $0x3,%%ymm4,%%ymm5 \n"
"vpackuswb %%ymm4,%%ymm4,%%ymm4 \n"
"vpabsw %%ymm4,%%ymm5 \n"
"vpabsb %%ymm4,%%ymm4 \n" // 0x0101
"vpsllw $0x3,%%ymm5,%%ymm5 \n" // 0x0008
LABELALIGN
"1: \n"
@ -2385,8 +2381,7 @@ void ScaleUVRowDown2Box_AVX2(const uint8_t* src_ptr,
int dst_width) {
asm volatile(
"vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n" // 01010101
"vpsrlw $0xf,%%ymm4,%%ymm4 \n"
"vpackuswb %%ymm4,%%ymm4,%%ymm4 \n"
"vpabsb %%ymm4,%%ymm4 \n"
"vpxor %%ymm5,%%ymm5,%%ymm5 \n" // zero
"vbroadcastf128 %4,%%ymm1 \n" // split shuffler
"vbroadcastf128 %5,%%ymm3 \n" // merge shuffler