Make constant 0x0101 using vpcmpeqb+vpabsb

Was
      vpcmpeqb    %%ymm4,%%ymm4,%%ymm4
      vpsrlw      $0xf,%%ymm4,%%ymm4
      vpackuswb   %%ymm4,%%ymm4,%%ymm4
Now
      vpcmpeqb    %%ymm4,%%ymm4,%%ymm4
      vpabsb      %%ymm4,%%ymm4

Bug: 381138208
Change-Id: Ib70c24ac636fff95a10c7f06ed8f0a3bc7514906
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/6312925
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Reviewed-by: Justin Green <greenjustin@google.com>
This commit is contained in:
Frank Barchard 2025-03-10 11:51:56 -07:00 committed by libyuv LUCI CQ
parent c060118bea
commit 918329caee
3 changed files with 23 additions and 26 deletions

View File

@ -3418,19 +3418,23 @@ int RAWToJ420(const uint8_t* src_raw,
} }
} }
#endif #endif
#if defined(HAS_RAWTOYJROW_LSX) #if defined(HAS_RAWTOYJROW_LSX) && defined(HAS_RAWTOUVJROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) { if (TestCpuFlag(kCpuHasLSX)) {
RAWToUVJRow = RAWToUVJRow_Any_LSX;
RAWToYJRow = RAWToYJRow_Any_LSX; RAWToYJRow = RAWToYJRow_Any_LSX;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
RAWToYJRow = RAWToYJRow_LSX; RAWToYJRow = RAWToYJRow_LSX;
RAWToUVJRow = RAWToUVJRow_LSX;
} }
} }
#endif #endif
#if defined(HAS_RAWTOYJROW_LASX) #if defined(HAS_RAWTOYJROW_LASX) && defined(HAS_RAWTOUVJROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) { if (TestCpuFlag(kCpuHasLASX)) {
RAWToUVJRow = RAWToUVJRow_Any_LASX;
RAWToYJRow = RAWToYJRow_Any_LASX; RAWToYJRow = RAWToYJRow_Any_LASX;
if (IS_ALIGNED(width, 32)) { if (IS_ALIGNED(width, 32)) {
RAWToYJRow = RAWToYJRow_LASX; RAWToYJRow = RAWToYJRow_LASX;
RAWToUVJRow = RAWToUVJRow_LASX;
} }
} }
#endif #endif

View File

@ -9396,9 +9396,8 @@ void HalfMergeUVRow_SSSE3(const uint8_t* src_u,
uint8_t* dst_uv, uint8_t* dst_uv,
int width) { int width) {
asm volatile( asm volatile(
"pcmpeqb %%xmm4,%%xmm4 \n" "pcmpeqb %%xmm4,%%xmm4 \n" // 0x0101
"psrlw $0xf,%%xmm4 \n" "pabsb %%xmm4,%%xmm4 \n"
"packuswb %%xmm4,%%xmm4 \n"
"pxor %%xmm5,%%xmm5 \n" "pxor %%xmm5,%%xmm5 \n"
LABELALIGN LABELALIGN
@ -9443,8 +9442,7 @@ void HalfMergeUVRow_AVX2(const uint8_t* src_u,
int width) { int width) {
asm volatile( asm volatile(
"vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n" "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n"
"vpsrlw $0xf,%%ymm4,%%ymm4 \n" "vpabsb %%ymm4,%%ymm4 \n"
"vpackuswb %%ymm4,%%ymm4,%%ymm4 \n"
"vpxor %%ymm5,%%ymm5,%%ymm5 \n" "vpxor %%ymm5,%%ymm5,%%ymm5 \n"
LABELALIGN LABELALIGN

View File

@ -126,9 +126,9 @@ void ScaleRowDown2Linear_SSSE3(const uint8_t* src_ptr,
int dst_width) { int dst_width) {
(void)src_stride; (void)src_stride;
asm volatile( asm volatile(
"pcmpeqb %%xmm4,%%xmm4 \n" "pcmpeqb %%xmm4,%%xmm4 \n" // 0x0101
"psrlw $0xf,%%xmm4 \n" "pabsb %%xmm4,%%xmm4 \n"
"packuswb %%xmm4,%%xmm4 \n"
"pxor %%xmm5,%%xmm5 \n" "pxor %%xmm5,%%xmm5 \n"
LABELALIGN LABELALIGN
@ -157,9 +157,8 @@ void ScaleRowDown2Box_SSSE3(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int dst_width) { int dst_width) {
asm volatile( asm volatile(
"pcmpeqb %%xmm4,%%xmm4 \n" "pcmpeqb %%xmm4,%%xmm4 \n" // 0x0101
"psrlw $0xf,%%xmm4 \n" "pabsb %%xmm4,%%xmm4 \n"
"packuswb %%xmm4,%%xmm4 \n"
"pxor %%xmm5,%%xmm5 \n" "pxor %%xmm5,%%xmm5 \n"
LABELALIGN LABELALIGN
@ -225,8 +224,7 @@ void ScaleRowDown2Linear_AVX2(const uint8_t* src_ptr,
(void)src_stride; (void)src_stride;
asm volatile( asm volatile(
"vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n" "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n"
"vpsrlw $0xf,%%ymm4,%%ymm4 \n" "vpabsb %%ymm4,%%ymm4 \n"
"vpackuswb %%ymm4,%%ymm4,%%ymm4 \n"
"vpxor %%ymm5,%%ymm5,%%ymm5 \n" "vpxor %%ymm5,%%ymm5,%%ymm5 \n"
LABELALIGN LABELALIGN
@ -258,8 +256,7 @@ void ScaleRowDown2Box_AVX2(const uint8_t* src_ptr,
int dst_width) { int dst_width) {
asm volatile( asm volatile(
"vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n" "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n"
"vpsrlw $0xf,%%ymm4,%%ymm4 \n" "vpabsb %%ymm4,%%ymm4 \n"
"vpackuswb %%ymm4,%%ymm4,%%ymm4 \n"
"vpxor %%ymm5,%%ymm5,%%ymm5 \n" "vpxor %%ymm5,%%ymm5,%%ymm5 \n"
LABELALIGN LABELALIGN
@ -332,10 +329,9 @@ void ScaleRowDown4Box_SSSE3(const uint8_t* src_ptr,
intptr_t stridex3; intptr_t stridex3;
asm volatile( asm volatile(
"pcmpeqb %%xmm4,%%xmm4 \n" "pcmpeqb %%xmm4,%%xmm4 \n"
"psrlw $0xf,%%xmm4 \n" "pabsw %%xmm4,%%xmm5 \n"
"movdqa %%xmm4,%%xmm5 \n" "pabsb %%xmm4,%%xmm4 \n" // 0x0101
"packuswb %%xmm4,%%xmm4 \n" "psllw $0x3,%%xmm5 \n" // 0x0008
"psllw $0x3,%%xmm5 \n"
"lea 0x00(%4,%4,2),%3 \n" "lea 0x00(%4,%4,2),%3 \n"
LABELALIGN LABELALIGN
@ -420,9 +416,9 @@ void ScaleRowDown4Box_AVX2(const uint8_t* src_ptr,
int dst_width) { int dst_width) {
asm volatile( asm volatile(
"vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n" "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n"
"vpsrlw $0xf,%%ymm4,%%ymm4 \n" "vpabsw %%ymm4,%%ymm5 \n"
"vpsllw $0x3,%%ymm4,%%ymm5 \n" "vpabsb %%ymm4,%%ymm4 \n" // 0x0101
"vpackuswb %%ymm4,%%ymm4,%%ymm4 \n" "vpsllw $0x3,%%ymm5,%%ymm5 \n" // 0x0008
LABELALIGN LABELALIGN
"1: \n" "1: \n"
@ -2385,8 +2381,7 @@ void ScaleUVRowDown2Box_AVX2(const uint8_t* src_ptr,
int dst_width) { int dst_width) {
asm volatile( asm volatile(
"vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n" // 01010101 "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n" // 01010101
"vpsrlw $0xf,%%ymm4,%%ymm4 \n" "vpabsb %%ymm4,%%ymm4 \n"
"vpackuswb %%ymm4,%%ymm4,%%ymm4 \n"
"vpxor %%ymm5,%%ymm5,%%ymm5 \n" // zero "vpxor %%ymm5,%%ymm5,%%ymm5 \n" // zero
"vbroadcastf128 %4,%%ymm1 \n" // split shuffler "vbroadcastf128 %4,%%ymm1 \n" // split shuffler
"vbroadcastf128 %5,%%ymm3 \n" // merge shuffler "vbroadcastf128 %5,%%ymm3 \n" // merge shuffler