diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 2b8aabfb6..cc0005caf 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -40,11 +40,6 @@ extern "C" { // define LIBYUV_DISABLE_X86 #endif #endif -// True if compiling for SSSE3 as a requirement. -#if defined(__SSSE3__) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 3)) -#define LIBYUV_SSSE3_ONLY -#endif - #if defined(__native_client__) #define LIBYUV_DISABLE_NEON #endif @@ -85,7 +80,6 @@ extern "C" { #define HAS_ARGB4444TOARGBROW_SSE2 #define HAS_ARGBEXTRACTALPHAROW_SSE2 #define HAS_ARGBSETROW_X86 -#define HAS_ARGBSHUFFLEROW_SSE2 #define HAS_ARGBSHUFFLEROW_SSSE3 #define HAS_ARGBTOARGB1555ROW_SSE2 #define HAS_ARGBTOARGB4444ROW_SSE2 @@ -1536,10 +1530,6 @@ void ARGBShuffleRow_C(const uint8* src_argb, uint8* dst_argb, const uint8* shuffler, int width); -void ARGBShuffleRow_SSE2(const uint8* src_argb, - uint8* dst_argb, - const uint8* shuffler, - int width); void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb, const uint8* shuffler, @@ -1556,10 +1546,6 @@ void ARGBShuffleRow_MSA(const uint8* src_argb, uint8* dst_argb, const uint8* shuffler, int width); -void ARGBShuffleRow_Any_SSE2(const uint8* src_argb, - uint8* dst_argb, - const uint8* shuffler, - int width); void ARGBShuffleRow_Any_SSSE3(const uint8* src_argb, uint8* dst_argb, const uint8* shuffler, diff --git a/source/planar_functions.cc b/source/planar_functions.cc index 8127fe3a2..a08cce927 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -2647,14 +2647,6 @@ int ARGBShuffle(const uint8* src_bgra, height = 1; src_stride_bgra = dst_stride_argb = 0; } -#if defined(HAS_ARGBSHUFFLEROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2)) { - ARGBShuffleRow = ARGBShuffleRow_Any_SSE2; - if (IS_ALIGNED(width, 4)) { - ARGBShuffleRow = ARGBShuffleRow_SSE2; - } - } -#endif #if defined(HAS_ARGBSHUFFLEROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBShuffleRow = ARGBShuffleRow_Any_SSSE3; diff --git a/source/row_any.cc b/source/row_any.cc index 64695288a..4dda9099b 100644 --- a/source/row_any.cc +++ b/source/row_any.cc @@ -681,9 +681,6 @@ ANY11P(ARGBToRGB565DitherRow_Any_MSA, 2, 7) #endif -#ifdef HAS_ARGBSHUFFLEROW_SSE2 -ANY11P(ARGBShuffleRow_Any_SSE2, ARGBShuffleRow_SSE2, const uint8*, 4, 4, 3) -#endif #ifdef HAS_ARGBSHUFFLEROW_SSSE3 ANY11P(ARGBShuffleRow_Any_SSSE3, ARGBShuffleRow_SSSE3, const uint8*, 4, 4, 7) #endif diff --git a/source/row_gcc.cc b/source/row_gcc.cc index a4a26c1ed..59b4b726d 100644 --- a/source/row_gcc.cc +++ b/source/row_gcc.cc @@ -5687,128 +5687,6 @@ void ARGBShuffleRow_AVX2(const uint8* src_argb, } #endif // HAS_ARGBSHUFFLEROW_AVX2 -#ifdef HAS_ARGBSHUFFLEROW_SSE2 -// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA. -void ARGBShuffleRow_SSE2(const uint8* src_argb, - uint8* dst_argb, - const uint8* shuffler, - int width) { - uintptr_t pixel_temp; - asm volatile ( - "pxor %%xmm5,%%xmm5 \n" - "mov " MEMACCESS(4) ",%k2 \n" - "cmp $0x3000102,%k2 \n" - "je 3012f \n" - "cmp $0x10203,%k2 \n" - "je 123f \n" - "cmp $0x30201,%k2 \n" - "je 321f \n" - "cmp $0x2010003,%k2 \n" - "je 2103f \n" - - LABELALIGN - "1: \n" - "movzb " MEMACCESS(4) ",%2 \n" - MEMOPARG(movzb,0x00,0,2,1,2) " \n" // movzb (%0,%2,1),%2 - "mov %b2," MEMACCESS(1) " \n" - "movzb " MEMACCESS2(0x1,4) ",%2 \n" - MEMOPARG(movzb,0x00,0,2,1,2) " \n" // movzb (%0,%2,1),%2 - "mov %b2," MEMACCESS2(0x1,1) " \n" - "movzb " MEMACCESS2(0x2,4) ",%2 \n" - MEMOPARG(movzb,0x00,0,2,1,2) " \n" // movzb (%0,%2,1),%2 - "mov %b2," MEMACCESS2(0x2,1) " \n" - "movzb " MEMACCESS2(0x3,4) ",%2 \n" - MEMOPARG(movzb,0x00,0,2,1,2) " \n" // movzb (%0,%2,1),%2 - "mov %b2," MEMACCESS2(0x3,1) " \n" - "lea " MEMLEA(0x4,0) ",%0 \n" - "lea " MEMLEA(0x4,1) ",%1 \n" - "sub $0x1,%3 \n" - "jg 1b \n" - "jmp 99f \n" - - LABELALIGN - "123: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklbw %%xmm5,%%xmm0 \n" - "punpckhbw %%xmm5,%%xmm1 \n" - "pshufhw $0x1b,%%xmm0,%%xmm0 \n" - "pshuflw $0x1b,%%xmm0,%%xmm0 \n" - "pshufhw $0x1b,%%xmm1,%%xmm1 \n" - "pshuflw $0x1b,%%xmm1,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x4,%3 \n" - "jg 123b \n" - "jmp 99f \n" - - LABELALIGN - "321: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklbw %%xmm5,%%xmm0 \n" - "punpckhbw %%xmm5,%%xmm1 \n" - "pshufhw $0x39,%%xmm0,%%xmm0 \n" - "pshuflw $0x39,%%xmm0,%%xmm0 \n" - "pshufhw $0x39,%%xmm1,%%xmm1 \n" - "pshuflw $0x39,%%xmm1,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x4,%3 \n" - "jg 321b \n" - "jmp 99f \n" - - LABELALIGN - "2103: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklbw %%xmm5,%%xmm0 \n" - "punpckhbw %%xmm5,%%xmm1 \n" - "pshufhw $0x93,%%xmm0,%%xmm0 \n" - "pshuflw $0x93,%%xmm0,%%xmm0 \n" - "pshufhw $0x93,%%xmm1,%%xmm1 \n" - "pshuflw $0x93,%%xmm1,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x4,%3 \n" - "jg 2103b \n" - "jmp 99f \n" - - LABELALIGN - "3012: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklbw %%xmm5,%%xmm0 \n" - "punpckhbw %%xmm5,%%xmm1 \n" - "pshufhw $0xc6,%%xmm0,%%xmm0 \n" - "pshuflw $0xc6,%%xmm0,%%xmm0 \n" - "pshufhw $0xc6,%%xmm1,%%xmm1 \n" - "pshuflw $0xc6,%%xmm1,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x4,%3 \n" - "jg 3012b \n" - - "99: \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "=&d"(pixel_temp), // %2 - "+r"(width) // %3 - : "r"(shuffler) // %4 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm5" - ); -} -#endif // HAS_ARGBSHUFFLEROW_SSE2 - #ifdef HAS_I422TOYUY2ROW_SSE2 void I422ToYUY2Row_SSE2(const uint8* src_y, const uint8* src_u, diff --git a/source/row_win.cc b/source/row_win.cc index 03a7e9506..596d7df73 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -5761,123 +5761,6 @@ __declspec(naked) void ARGBShuffleRow_AVX2(const uint8* src_argb, } #endif // HAS_ARGBSHUFFLEROW_AVX2 -__declspec(naked) void ARGBShuffleRow_SSE2(const uint8* src_argb, - uint8* dst_argb, - const uint8* shuffler, - int width) { - __asm { - push ebx - push esi - mov eax, [esp + 8 + 4] // src_argb - mov edx, [esp + 8 + 8] // dst_argb - mov esi, [esp + 8 + 12] // shuffler - mov ecx, [esp + 8 + 16] // width - pxor xmm5, xmm5 - - mov ebx, [esi] // shuffler - cmp ebx, 0x03000102 - je shuf_3012 - cmp ebx, 0x00010203 - je shuf_0123 - cmp ebx, 0x00030201 - je shuf_0321 - cmp ebx, 0x02010003 - je shuf_2103 - - // TODO(fbarchard): Use one source pointer and 3 offsets. - shuf_any1: - movzx ebx, byte ptr [esi] - movzx ebx, byte ptr [eax + ebx] - mov [edx], bl - movzx ebx, byte ptr [esi + 1] - movzx ebx, byte ptr [eax + ebx] - mov [edx + 1], bl - movzx ebx, byte ptr [esi + 2] - movzx ebx, byte ptr [eax + ebx] - mov [edx + 2], bl - movzx ebx, byte ptr [esi + 3] - movzx ebx, byte ptr [eax + ebx] - mov [edx + 3], bl - lea eax, [eax + 4] - lea edx, [edx + 4] - sub ecx, 1 - jg shuf_any1 - jmp shuf99 - - shuf_0123: - movdqu xmm0, [eax] - lea eax, [eax + 16] - movdqa xmm1, xmm0 - punpcklbw xmm0, xmm5 - punpckhbw xmm1, xmm5 - pshufhw xmm0, xmm0, 01Bh // 1B = 00011011 = 0x0123 = BGRAToARGB - pshuflw xmm0, xmm0, 01Bh - pshufhw xmm1, xmm1, 01Bh - pshuflw xmm1, xmm1, 01Bh - packuswb xmm0, xmm1 - movdqu [edx], xmm0 - lea edx, [edx + 16] - sub ecx, 4 - jg shuf_0123 - jmp shuf99 - - shuf_0321: - movdqu xmm0, [eax] - lea eax, [eax + 16] - movdqa xmm1, xmm0 - punpcklbw xmm0, xmm5 - punpckhbw xmm1, xmm5 - pshufhw xmm0, xmm0, 039h // 39 = 00111001 = 0x0321 = RGBAToARGB - pshuflw xmm0, xmm0, 039h - pshufhw xmm1, xmm1, 039h - pshuflw xmm1, xmm1, 039h - packuswb xmm0, xmm1 - movdqu [edx], xmm0 - lea edx, [edx + 16] - sub ecx, 4 - jg shuf_0321 - jmp shuf99 - - shuf_2103: - movdqu xmm0, [eax] - lea eax, [eax + 16] - movdqa xmm1, xmm0 - punpcklbw xmm0, xmm5 - punpckhbw xmm1, xmm5 - pshufhw xmm0, xmm0, 093h // 93 = 10010011 = 0x2103 = ARGBToRGBA - pshuflw xmm0, xmm0, 093h - pshufhw xmm1, xmm1, 093h - pshuflw xmm1, xmm1, 093h - packuswb xmm0, xmm1 - movdqu [edx], xmm0 - lea edx, [edx + 16] - sub ecx, 4 - jg shuf_2103 - jmp shuf99 - - shuf_3012: - movdqu xmm0, [eax] - lea eax, [eax + 16] - movdqa xmm1, xmm0 - punpcklbw xmm0, xmm5 - punpckhbw xmm1, xmm5 - pshufhw xmm0, xmm0, 0C6h // C6 = 11000110 = 0x3012 = ABGRToARGB - pshuflw xmm0, xmm0, 0C6h - pshufhw xmm1, xmm1, 0C6h - pshuflw xmm1, xmm1, 0C6h - packuswb xmm0, xmm1 - movdqu [edx], xmm0 - lea edx, [edx + 16] - sub ecx, 4 - jg shuf_3012 - - shuf99: - pop esi - pop ebx - ret - } -} - // YUY2 - Macro-pixel = 2 image pixels // Y0U0Y1V0....Y2U2Y3V2...Y4U4Y5V4....