mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2026-04-30 19:09:18 +08:00
Remove LIBYUV_SSSE3_ONLY and ARGBSHUFFLEROW_SSE2
LIBYUV_SSSE3_ONLY was for functions that have SSE2 and SSSE3 but are compiling for SSSE3, so SSE2 will never be used. Remove the SSE2 implementation of ARGBSHUFFLEROW_SSE2 and rely on SSSE3. Bug: libyuv: 769 Test: ~/intelsde/sde -p4 -- out/Release/libyuv_unittest --gtest_filter=LibYUVConvertTest.ARGBToABGR_Opt Change-Id: I7443f4d8ee3c6f47edd2cf1d5a1eb0f8d7a1eeeb Reviewed-on: https://chromium-review.googlesource.com/846541 Reviewed-by: Weiyong Yao <braveyao@chromium.org> Reviewed-by: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
parent
768f103b8b
commit
140fc0a261
@ -40,11 +40,6 @@ extern "C" {
|
||||
// define LIBYUV_DISABLE_X86
|
||||
#endif
|
||||
#endif
|
||||
// True if compiling for SSSE3 as a requirement.
|
||||
#if defined(__SSSE3__) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 3))
|
||||
#define LIBYUV_SSSE3_ONLY
|
||||
#endif
|
||||
|
||||
#if defined(__native_client__)
|
||||
#define LIBYUV_DISABLE_NEON
|
||||
#endif
|
||||
@ -85,7 +80,6 @@ extern "C" {
|
||||
#define HAS_ARGB4444TOARGBROW_SSE2
|
||||
#define HAS_ARGBEXTRACTALPHAROW_SSE2
|
||||
#define HAS_ARGBSETROW_X86
|
||||
#define HAS_ARGBSHUFFLEROW_SSE2
|
||||
#define HAS_ARGBSHUFFLEROW_SSSE3
|
||||
#define HAS_ARGBTOARGB1555ROW_SSE2
|
||||
#define HAS_ARGBTOARGB4444ROW_SSE2
|
||||
@ -1536,10 +1530,6 @@ void ARGBShuffleRow_C(const uint8* src_argb,
|
||||
uint8* dst_argb,
|
||||
const uint8* shuffler,
|
||||
int width);
|
||||
void ARGBShuffleRow_SSE2(const uint8* src_argb,
|
||||
uint8* dst_argb,
|
||||
const uint8* shuffler,
|
||||
int width);
|
||||
void ARGBShuffleRow_SSSE3(const uint8* src_argb,
|
||||
uint8* dst_argb,
|
||||
const uint8* shuffler,
|
||||
@ -1556,10 +1546,6 @@ void ARGBShuffleRow_MSA(const uint8* src_argb,
|
||||
uint8* dst_argb,
|
||||
const uint8* shuffler,
|
||||
int width);
|
||||
void ARGBShuffleRow_Any_SSE2(const uint8* src_argb,
|
||||
uint8* dst_argb,
|
||||
const uint8* shuffler,
|
||||
int width);
|
||||
void ARGBShuffleRow_Any_SSSE3(const uint8* src_argb,
|
||||
uint8* dst_argb,
|
||||
const uint8* shuffler,
|
||||
|
||||
@ -2647,14 +2647,6 @@ int ARGBShuffle(const uint8* src_bgra,
|
||||
height = 1;
|
||||
src_stride_bgra = dst_stride_argb = 0;
|
||||
}
|
||||
#if defined(HAS_ARGBSHUFFLEROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
ARGBShuffleRow = ARGBShuffleRow_Any_SSE2;
|
||||
if (IS_ALIGNED(width, 4)) {
|
||||
ARGBShuffleRow = ARGBShuffleRow_SSE2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBSHUFFLEROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
ARGBShuffleRow = ARGBShuffleRow_Any_SSSE3;
|
||||
|
||||
@ -681,9 +681,6 @@ ANY11P(ARGBToRGB565DitherRow_Any_MSA,
|
||||
2,
|
||||
7)
|
||||
#endif
|
||||
#ifdef HAS_ARGBSHUFFLEROW_SSE2
|
||||
ANY11P(ARGBShuffleRow_Any_SSE2, ARGBShuffleRow_SSE2, const uint8*, 4, 4, 3)
|
||||
#endif
|
||||
#ifdef HAS_ARGBSHUFFLEROW_SSSE3
|
||||
ANY11P(ARGBShuffleRow_Any_SSSE3, ARGBShuffleRow_SSSE3, const uint8*, 4, 4, 7)
|
||||
#endif
|
||||
|
||||
@ -5687,128 +5687,6 @@ void ARGBShuffleRow_AVX2(const uint8* src_argb,
|
||||
}
|
||||
#endif // HAS_ARGBSHUFFLEROW_AVX2
|
||||
|
||||
#ifdef HAS_ARGBSHUFFLEROW_SSE2
|
||||
// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
|
||||
void ARGBShuffleRow_SSE2(const uint8* src_argb,
|
||||
uint8* dst_argb,
|
||||
const uint8* shuffler,
|
||||
int width) {
|
||||
uintptr_t pixel_temp;
|
||||
asm volatile (
|
||||
"pxor %%xmm5,%%xmm5 \n"
|
||||
"mov " MEMACCESS(4) ",%k2 \n"
|
||||
"cmp $0x3000102,%k2 \n"
|
||||
"je 3012f \n"
|
||||
"cmp $0x10203,%k2 \n"
|
||||
"je 123f \n"
|
||||
"cmp $0x30201,%k2 \n"
|
||||
"je 321f \n"
|
||||
"cmp $0x2010003,%k2 \n"
|
||||
"je 2103f \n"
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movzb " MEMACCESS(4) ",%2 \n"
|
||||
MEMOPARG(movzb,0x00,0,2,1,2) " \n" // movzb (%0,%2,1),%2
|
||||
"mov %b2," MEMACCESS(1) " \n"
|
||||
"movzb " MEMACCESS2(0x1,4) ",%2 \n"
|
||||
MEMOPARG(movzb,0x00,0,2,1,2) " \n" // movzb (%0,%2,1),%2
|
||||
"mov %b2," MEMACCESS2(0x1,1) " \n"
|
||||
"movzb " MEMACCESS2(0x2,4) ",%2 \n"
|
||||
MEMOPARG(movzb,0x00,0,2,1,2) " \n" // movzb (%0,%2,1),%2
|
||||
"mov %b2," MEMACCESS2(0x2,1) " \n"
|
||||
"movzb " MEMACCESS2(0x3,4) ",%2 \n"
|
||||
MEMOPARG(movzb,0x00,0,2,1,2) " \n" // movzb (%0,%2,1),%2
|
||||
"mov %b2," MEMACCESS2(0x3,1) " \n"
|
||||
"lea " MEMLEA(0x4,0) ",%0 \n"
|
||||
"lea " MEMLEA(0x4,1) ",%1 \n"
|
||||
"sub $0x1,%3 \n"
|
||||
"jg 1b \n"
|
||||
"jmp 99f \n"
|
||||
|
||||
LABELALIGN
|
||||
"123: \n"
|
||||
"movdqu " MEMACCESS(0) ",%%xmm0 \n"
|
||||
"lea " MEMLEA(0x10,0) ",%0 \n"
|
||||
"movdqa %%xmm0,%%xmm1 \n"
|
||||
"punpcklbw %%xmm5,%%xmm0 \n"
|
||||
"punpckhbw %%xmm5,%%xmm1 \n"
|
||||
"pshufhw $0x1b,%%xmm0,%%xmm0 \n"
|
||||
"pshuflw $0x1b,%%xmm0,%%xmm0 \n"
|
||||
"pshufhw $0x1b,%%xmm1,%%xmm1 \n"
|
||||
"pshuflw $0x1b,%%xmm1,%%xmm1 \n"
|
||||
"packuswb %%xmm1,%%xmm0 \n"
|
||||
"movdqu %%xmm0," MEMACCESS(1) " \n"
|
||||
"lea " MEMLEA(0x10,1) ",%1 \n"
|
||||
"sub $0x4,%3 \n"
|
||||
"jg 123b \n"
|
||||
"jmp 99f \n"
|
||||
|
||||
LABELALIGN
|
||||
"321: \n"
|
||||
"movdqu " MEMACCESS(0) ",%%xmm0 \n"
|
||||
"lea " MEMLEA(0x10,0) ",%0 \n"
|
||||
"movdqa %%xmm0,%%xmm1 \n"
|
||||
"punpcklbw %%xmm5,%%xmm0 \n"
|
||||
"punpckhbw %%xmm5,%%xmm1 \n"
|
||||
"pshufhw $0x39,%%xmm0,%%xmm0 \n"
|
||||
"pshuflw $0x39,%%xmm0,%%xmm0 \n"
|
||||
"pshufhw $0x39,%%xmm1,%%xmm1 \n"
|
||||
"pshuflw $0x39,%%xmm1,%%xmm1 \n"
|
||||
"packuswb %%xmm1,%%xmm0 \n"
|
||||
"movdqu %%xmm0," MEMACCESS(1) " \n"
|
||||
"lea " MEMLEA(0x10,1) ",%1 \n"
|
||||
"sub $0x4,%3 \n"
|
||||
"jg 321b \n"
|
||||
"jmp 99f \n"
|
||||
|
||||
LABELALIGN
|
||||
"2103: \n"
|
||||
"movdqu " MEMACCESS(0) ",%%xmm0 \n"
|
||||
"lea " MEMLEA(0x10,0) ",%0 \n"
|
||||
"movdqa %%xmm0,%%xmm1 \n"
|
||||
"punpcklbw %%xmm5,%%xmm0 \n"
|
||||
"punpckhbw %%xmm5,%%xmm1 \n"
|
||||
"pshufhw $0x93,%%xmm0,%%xmm0 \n"
|
||||
"pshuflw $0x93,%%xmm0,%%xmm0 \n"
|
||||
"pshufhw $0x93,%%xmm1,%%xmm1 \n"
|
||||
"pshuflw $0x93,%%xmm1,%%xmm1 \n"
|
||||
"packuswb %%xmm1,%%xmm0 \n"
|
||||
"movdqu %%xmm0," MEMACCESS(1) " \n"
|
||||
"lea " MEMLEA(0x10,1) ",%1 \n"
|
||||
"sub $0x4,%3 \n"
|
||||
"jg 2103b \n"
|
||||
"jmp 99f \n"
|
||||
|
||||
LABELALIGN
|
||||
"3012: \n"
|
||||
"movdqu " MEMACCESS(0) ",%%xmm0 \n"
|
||||
"lea " MEMLEA(0x10,0) ",%0 \n"
|
||||
"movdqa %%xmm0,%%xmm1 \n"
|
||||
"punpcklbw %%xmm5,%%xmm0 \n"
|
||||
"punpckhbw %%xmm5,%%xmm1 \n"
|
||||
"pshufhw $0xc6,%%xmm0,%%xmm0 \n"
|
||||
"pshuflw $0xc6,%%xmm0,%%xmm0 \n"
|
||||
"pshufhw $0xc6,%%xmm1,%%xmm1 \n"
|
||||
"pshuflw $0xc6,%%xmm1,%%xmm1 \n"
|
||||
"packuswb %%xmm1,%%xmm0 \n"
|
||||
"movdqu %%xmm0," MEMACCESS(1) " \n"
|
||||
"lea " MEMLEA(0x10,1) ",%1 \n"
|
||||
"sub $0x4,%3 \n"
|
||||
"jg 3012b \n"
|
||||
|
||||
"99: \n"
|
||||
: "+r"(src_argb), // %0
|
||||
"+r"(dst_argb), // %1
|
||||
"=&d"(pixel_temp), // %2
|
||||
"+r"(width) // %3
|
||||
: "r"(shuffler) // %4
|
||||
: "memory", "cc", NACL_R14
|
||||
"xmm0", "xmm1", "xmm5"
|
||||
);
|
||||
}
|
||||
#endif // HAS_ARGBSHUFFLEROW_SSE2
|
||||
|
||||
#ifdef HAS_I422TOYUY2ROW_SSE2
|
||||
void I422ToYUY2Row_SSE2(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
|
||||
@ -5761,123 +5761,6 @@ __declspec(naked) void ARGBShuffleRow_AVX2(const uint8* src_argb,
|
||||
}
|
||||
#endif // HAS_ARGBSHUFFLEROW_AVX2
|
||||
|
||||
__declspec(naked) void ARGBShuffleRow_SSE2(const uint8* src_argb,
|
||||
uint8* dst_argb,
|
||||
const uint8* shuffler,
|
||||
int width) {
|
||||
__asm {
|
||||
push ebx
|
||||
push esi
|
||||
mov eax, [esp + 8 + 4] // src_argb
|
||||
mov edx, [esp + 8 + 8] // dst_argb
|
||||
mov esi, [esp + 8 + 12] // shuffler
|
||||
mov ecx, [esp + 8 + 16] // width
|
||||
pxor xmm5, xmm5
|
||||
|
||||
mov ebx, [esi] // shuffler
|
||||
cmp ebx, 0x03000102
|
||||
je shuf_3012
|
||||
cmp ebx, 0x00010203
|
||||
je shuf_0123
|
||||
cmp ebx, 0x00030201
|
||||
je shuf_0321
|
||||
cmp ebx, 0x02010003
|
||||
je shuf_2103
|
||||
|
||||
// TODO(fbarchard): Use one source pointer and 3 offsets.
|
||||
shuf_any1:
|
||||
movzx ebx, byte ptr [esi]
|
||||
movzx ebx, byte ptr [eax + ebx]
|
||||
mov [edx], bl
|
||||
movzx ebx, byte ptr [esi + 1]
|
||||
movzx ebx, byte ptr [eax + ebx]
|
||||
mov [edx + 1], bl
|
||||
movzx ebx, byte ptr [esi + 2]
|
||||
movzx ebx, byte ptr [eax + ebx]
|
||||
mov [edx + 2], bl
|
||||
movzx ebx, byte ptr [esi + 3]
|
||||
movzx ebx, byte ptr [eax + ebx]
|
||||
mov [edx + 3], bl
|
||||
lea eax, [eax + 4]
|
||||
lea edx, [edx + 4]
|
||||
sub ecx, 1
|
||||
jg shuf_any1
|
||||
jmp shuf99
|
||||
|
||||
shuf_0123:
|
||||
movdqu xmm0, [eax]
|
||||
lea eax, [eax + 16]
|
||||
movdqa xmm1, xmm0
|
||||
punpcklbw xmm0, xmm5
|
||||
punpckhbw xmm1, xmm5
|
||||
pshufhw xmm0, xmm0, 01Bh // 1B = 00011011 = 0x0123 = BGRAToARGB
|
||||
pshuflw xmm0, xmm0, 01Bh
|
||||
pshufhw xmm1, xmm1, 01Bh
|
||||
pshuflw xmm1, xmm1, 01Bh
|
||||
packuswb xmm0, xmm1
|
||||
movdqu [edx], xmm0
|
||||
lea edx, [edx + 16]
|
||||
sub ecx, 4
|
||||
jg shuf_0123
|
||||
jmp shuf99
|
||||
|
||||
shuf_0321:
|
||||
movdqu xmm0, [eax]
|
||||
lea eax, [eax + 16]
|
||||
movdqa xmm1, xmm0
|
||||
punpcklbw xmm0, xmm5
|
||||
punpckhbw xmm1, xmm5
|
||||
pshufhw xmm0, xmm0, 039h // 39 = 00111001 = 0x0321 = RGBAToARGB
|
||||
pshuflw xmm0, xmm0, 039h
|
||||
pshufhw xmm1, xmm1, 039h
|
||||
pshuflw xmm1, xmm1, 039h
|
||||
packuswb xmm0, xmm1
|
||||
movdqu [edx], xmm0
|
||||
lea edx, [edx + 16]
|
||||
sub ecx, 4
|
||||
jg shuf_0321
|
||||
jmp shuf99
|
||||
|
||||
shuf_2103:
|
||||
movdqu xmm0, [eax]
|
||||
lea eax, [eax + 16]
|
||||
movdqa xmm1, xmm0
|
||||
punpcklbw xmm0, xmm5
|
||||
punpckhbw xmm1, xmm5
|
||||
pshufhw xmm0, xmm0, 093h // 93 = 10010011 = 0x2103 = ARGBToRGBA
|
||||
pshuflw xmm0, xmm0, 093h
|
||||
pshufhw xmm1, xmm1, 093h
|
||||
pshuflw xmm1, xmm1, 093h
|
||||
packuswb xmm0, xmm1
|
||||
movdqu [edx], xmm0
|
||||
lea edx, [edx + 16]
|
||||
sub ecx, 4
|
||||
jg shuf_2103
|
||||
jmp shuf99
|
||||
|
||||
shuf_3012:
|
||||
movdqu xmm0, [eax]
|
||||
lea eax, [eax + 16]
|
||||
movdqa xmm1, xmm0
|
||||
punpcklbw xmm0, xmm5
|
||||
punpckhbw xmm1, xmm5
|
||||
pshufhw xmm0, xmm0, 0C6h // C6 = 11000110 = 0x3012 = ABGRToARGB
|
||||
pshuflw xmm0, xmm0, 0C6h
|
||||
pshufhw xmm1, xmm1, 0C6h
|
||||
pshuflw xmm1, xmm1, 0C6h
|
||||
packuswb xmm0, xmm1
|
||||
movdqu [edx], xmm0
|
||||
lea edx, [edx + 16]
|
||||
sub ecx, 4
|
||||
jg shuf_3012
|
||||
|
||||
shuf99:
|
||||
pop esi
|
||||
pop ebx
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
// YUY2 - Macro-pixel = 2 image pixels
|
||||
// Y0U0Y1V0....Y2U2Y3V2...Y4U4Y5V4....
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user