mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
remove mmx functions
BUG=none TEST=builds Review URL: http://webrtc-codereview.appspot.com/269010 git-svn-id: http://libyuv.googlecode.com/svn/trunk@77 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
c82af4a59c
commit
eaedc1d727
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: http://code.google.com/p/libyuv/
|
URL: http://code.google.com/p/libyuv/
|
||||||
Version: 69
|
Version: 77
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|
||||||
|
|||||||
@ -1149,11 +1149,6 @@ int I420ToARGB(const uint8* src_y, int src_stride_y,
|
|||||||
(width % 2 == 0)) {
|
(width % 2 == 0)) {
|
||||||
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_SSE2;
|
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_SSE2;
|
||||||
} else
|
} else
|
||||||
#endif
|
|
||||||
#if defined(HAS_FASTCONVERTYUVTOARGBROW_MMX)
|
|
||||||
if (width % 2 == 0) {
|
|
||||||
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_MMX;
|
|
||||||
} else
|
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_C;
|
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_C;
|
||||||
@ -1167,8 +1162,6 @@ int I420ToARGB(const uint8* src_y, int src_stride_y,
|
|||||||
src_v += src_stride_v;
|
src_v += src_stride_v;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// MMX used for FastConvertYUVToARGBRow requires an emms instruction.
|
|
||||||
EMMS();
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1201,11 +1194,6 @@ int I420ToBGRA(const uint8* src_y, int src_stride_y,
|
|||||||
(width % 2 == 0)) {
|
(width % 2 == 0)) {
|
||||||
FastConvertYUVToBGRARow = FastConvertYUVToBGRARow_SSE2;
|
FastConvertYUVToBGRARow = FastConvertYUVToBGRARow_SSE2;
|
||||||
} else
|
} else
|
||||||
#endif
|
|
||||||
#if defined(HAS_FASTCONVERTYUVTOBGRAROW_MMX)
|
|
||||||
if (width % 2 == 0) {
|
|
||||||
FastConvertYUVToBGRARow = FastConvertYUVToBGRARow_MMX;
|
|
||||||
} else
|
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
FastConvertYUVToBGRARow = FastConvertYUVToBGRARow_C;
|
FastConvertYUVToBGRARow = FastConvertYUVToBGRARow_C;
|
||||||
@ -1219,7 +1207,6 @@ int I420ToBGRA(const uint8* src_y, int src_stride_y,
|
|||||||
src_v += src_stride_v;
|
src_v += src_stride_v;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
EMMS();
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1252,11 +1239,6 @@ int I420ToABGR(const uint8* src_y, int src_stride_y,
|
|||||||
(width % 2 == 0)) {
|
(width % 2 == 0)) {
|
||||||
FastConvertYUVToABGRRow = FastConvertYUVToABGRRow_SSE2;
|
FastConvertYUVToABGRRow = FastConvertYUVToABGRRow_SSE2;
|
||||||
} else
|
} else
|
||||||
#endif
|
|
||||||
#if defined(HAS_FASTCONVERTYUVTOABGRROW_MMX)
|
|
||||||
if (width % 2 == 0) {
|
|
||||||
FastConvertYUVToABGRRow = FastConvertYUVToABGRRow_MMX;
|
|
||||||
} else
|
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
FastConvertYUVToABGRRow = FastConvertYUVToABGRRow_C;
|
FastConvertYUVToABGRRow = FastConvertYUVToABGRRow_C;
|
||||||
@ -1270,7 +1252,6 @@ int I420ToABGR(const uint8* src_y, int src_stride_y,
|
|||||||
src_v += src_stride_v;
|
src_v += src_stride_v;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
EMMS();
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1303,11 +1284,6 @@ int I422ToARGB(const uint8* src_y, int src_stride_y,
|
|||||||
(width % 2 == 0)) {
|
(width % 2 == 0)) {
|
||||||
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_SSE2;
|
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_SSE2;
|
||||||
} else
|
} else
|
||||||
#endif
|
|
||||||
#if defined(HAS_FASTCONVERTYUVTOARGBROW_MMX)
|
|
||||||
if (width % 2 == 0) {
|
|
||||||
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_MMX;
|
|
||||||
} else
|
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_C;
|
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_C;
|
||||||
@ -1319,8 +1295,6 @@ int I422ToARGB(const uint8* src_y, int src_stride_y,
|
|||||||
src_u += src_stride_u;
|
src_u += src_stride_u;
|
||||||
src_v += src_stride_v;
|
src_v += src_stride_v;
|
||||||
}
|
}
|
||||||
// MMX used for FastConvertYUVToARGBRow requires an emms instruction.
|
|
||||||
EMMS();
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1353,13 +1327,9 @@ int I444ToARGB(const uint8* src_y, int src_stride_y,
|
|||||||
FastConvertYUV444ToARGBRow = FastConvertYUV444ToARGBRow_SSE2;
|
FastConvertYUV444ToARGBRow = FastConvertYUV444ToARGBRow_SSE2;
|
||||||
} else
|
} else
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_FASTCONVERTYUVTOARGBROW_MMX)
|
|
||||||
FastConvertYUV444ToARGBRow = FastConvertYUV444ToARGBRow_MMX;
|
|
||||||
#else
|
|
||||||
{
|
{
|
||||||
FastConvertYUV444ToARGBRow = FastConvertYUV444ToARGBRow_C;
|
FastConvertYUV444ToARGBRow = FastConvertYUV444ToARGBRow_C;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
for (int y = 0; y < height; ++y) {
|
for (int y = 0; y < height; ++y) {
|
||||||
FastConvertYUV444ToARGBRow(src_y, src_u, src_v, dst_argb, width);
|
FastConvertYUV444ToARGBRow(src_y, src_u, src_v, dst_argb, width);
|
||||||
dst_argb += dst_stride_argb;
|
dst_argb += dst_stride_argb;
|
||||||
@ -1367,8 +1337,6 @@ int I444ToARGB(const uint8* src_y, int src_stride_y,
|
|||||||
src_u += src_stride_u;
|
src_u += src_stride_u;
|
||||||
src_v += src_stride_v;
|
src_v += src_stride_v;
|
||||||
}
|
}
|
||||||
// MMX used for FastConvertYUVToARGBRow requires an emms instruction.
|
|
||||||
EMMS();
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1391,11 +1359,6 @@ int I400ToARGB_Reference(const uint8* src_y, int src_stride_y,
|
|||||||
IS_ALIGNED(dst_argb, 16) && (dst_stride_argb % 16 == 0)) {
|
IS_ALIGNED(dst_argb, 16) && (dst_stride_argb % 16 == 0)) {
|
||||||
FastConvertYToARGBRow = FastConvertYToARGBRow_SSE2;
|
FastConvertYToARGBRow = FastConvertYToARGBRow_SSE2;
|
||||||
} else
|
} else
|
||||||
#endif
|
|
||||||
#if defined(HAS_FASTCONVERTYUVTOARGBROW_MMX)
|
|
||||||
if (width % 2 == 0) {
|
|
||||||
FastConvertYToARGBRow = FastConvertYToARGBRow_MMX;
|
|
||||||
} else
|
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
FastConvertYToARGBRow = FastConvertYToARGBRow_C;
|
FastConvertYToARGBRow = FastConvertYToARGBRow_C;
|
||||||
@ -1405,8 +1368,6 @@ int I400ToARGB_Reference(const uint8* src_y, int src_stride_y,
|
|||||||
dst_argb += dst_stride_argb;
|
dst_argb += dst_stride_argb;
|
||||||
src_y += src_stride_y;
|
src_y += src_stride_y;
|
||||||
}
|
}
|
||||||
// MMX used for FastConvertYUVToARGBRow requires an emms instruction.
|
|
||||||
EMMS();
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -13,21 +13,19 @@
|
|||||||
#include "libyuv/cpu_id.h"
|
#include "libyuv/cpu_id.h"
|
||||||
#include "libyuv/planar_functions.h"
|
#include "libyuv/planar_functions.h"
|
||||||
#include "rotate_priv.h"
|
#include "rotate_priv.h"
|
||||||
|
#include "row.h"
|
||||||
|
|
||||||
namespace libyuv {
|
namespace libyuv {
|
||||||
|
|
||||||
#if (defined(WIN32) || defined(__x86_64__) || defined(__i386__)) \
|
#if (defined(WIN32) || defined(__x86_64__) || defined(__i386__)) && \
|
||||||
&& !defined(COVERAGE_ENABLED) && !defined(TARGET_IPHONE_SIMULATOR)
|
!defined(__APPLE__) && \
|
||||||
#if defined(_MSC_VER)
|
!defined(COVERAGE_ENABLED) && !defined(TARGET_IPHONE_SIMULATOR)
|
||||||
#define TALIGN16(t, var) static __declspec(align(16)) t _ ## var
|
|
||||||
#else
|
|
||||||
#define TALIGN16(t, var) t var __attribute__((aligned(16)))
|
|
||||||
#endif
|
|
||||||
// Shuffle table for reversing the bytes.
|
// Shuffle table for reversing the bytes.
|
||||||
extern "C" TALIGN16(const uint8, kShuffleReverse[16]) =
|
static const uvec8 kShuffleReverse =
|
||||||
{ 15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u };
|
{ 15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u };
|
||||||
// Shuffle table for reversing the bytes of UV channels.
|
// Shuffle table for reversing the bytes of UV channels.
|
||||||
extern "C" TALIGN16(const uint8, kShuffleReverseUV[16]) =
|
static const uvec8 kShuffleReverseUV =
|
||||||
{ 14u, 12u, 10u, 8u, 6u, 4u, 2u, 0u, 15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u };
|
{ 14u, 12u, 10u, 8u, 6u, 4u, 2u, 0u, 15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u };
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -73,7 +71,7 @@ __asm {
|
|||||||
mov edx, [esp + 12 + 12] // dst
|
mov edx, [esp + 12 + 12] // dst
|
||||||
mov esi, [esp + 12 + 16] // dst_stride
|
mov esi, [esp + 12 + 16] // dst_stride
|
||||||
mov ecx, [esp + 12 + 20] // width
|
mov ecx, [esp + 12 + 20] // width
|
||||||
convertloop :
|
convertloop:
|
||||||
// Read in the data from the source pointer.
|
// Read in the data from the source pointer.
|
||||||
// First round of bit swap.
|
// First round of bit swap.
|
||||||
movq xmm0, qword ptr [eax]
|
movq xmm0, qword ptr [eax]
|
||||||
@ -172,7 +170,7 @@ __asm {
|
|||||||
and esp, ~15
|
and esp, ~15
|
||||||
mov [esp + 16], ecx
|
mov [esp + 16], ecx
|
||||||
mov ecx, [ecx + 16 + 28] // w
|
mov ecx, [ecx + 16 + 28] // w
|
||||||
convertloop :
|
convertloop:
|
||||||
// Read in the data from the source pointer.
|
// Read in the data from the source pointer.
|
||||||
// First round of bit swap.
|
// First round of bit swap.
|
||||||
movdqa xmm0, [eax]
|
movdqa xmm0, [eax]
|
||||||
@ -863,9 +861,9 @@ __asm {
|
|||||||
mov eax, [esp + 4] // src
|
mov eax, [esp + 4] // src
|
||||||
mov edx, [esp + 8] // dst
|
mov edx, [esp + 8] // dst
|
||||||
mov ecx, [esp + 12] // width
|
mov ecx, [esp + 12] // width
|
||||||
movdqa xmm5, _kShuffleReverse
|
movdqa xmm5, kShuffleReverse
|
||||||
lea eax, [eax + ecx - 16]
|
lea eax, [eax + ecx - 16]
|
||||||
convertloop :
|
convertloop:
|
||||||
movdqa xmm0, [eax]
|
movdqa xmm0, [eax]
|
||||||
lea eax, [eax - 16]
|
lea eax, [eax - 16]
|
||||||
pshufb xmm0, xmm5
|
pshufb xmm0, xmm5
|
||||||
@ -878,12 +876,16 @@ __asm {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#elif (defined(__i386__) || defined(__x86_64__)) && \
|
#elif (defined(__i386__) || defined(__x86_64__)) && \
|
||||||
|
!defined(__APPLE__) && \
|
||||||
!defined(COVERAGE_ENABLED) && !defined(TARGET_IPHONE_SIMULATOR)
|
!defined(COVERAGE_ENABLED) && !defined(TARGET_IPHONE_SIMULATOR)
|
||||||
#define HAS_REVERSE_LINE_SSSE3
|
#define HAS_REVERSE_LINE_SSSE3
|
||||||
static void ReverseLine_SSSE3(const uint8* src, uint8* dst, int width) {
|
static void ReverseLine_SSSE3(const uint8* src, uint8* dst, int width) {
|
||||||
intptr_t temp_width = static_cast<intptr_t>(width);
|
intptr_t temp_width = static_cast<intptr_t>(width);
|
||||||
asm volatile (
|
asm volatile (
|
||||||
"movdqa (%3),%%xmm5 \n"
|
"movdqa %0,%%xmm5 \n"
|
||||||
|
:: "m"(kShuffleReverse)
|
||||||
|
);
|
||||||
|
asm volatile (
|
||||||
"lea -0x10(%0,%2,1),%0 \n"
|
"lea -0x10(%0,%2,1),%0 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movdqa (%0),%%xmm0 \n"
|
"movdqa (%0),%%xmm0 \n"
|
||||||
@ -896,12 +898,12 @@ static void ReverseLine_SSSE3(const uint8* src, uint8* dst, int width) {
|
|||||||
: "+r"(src), // %0
|
: "+r"(src), // %0
|
||||||
"+r"(dst), // %1
|
"+r"(dst), // %1
|
||||||
"+r"(temp_width) // %2
|
"+r"(temp_width) // %2
|
||||||
: "r"(kShuffleReverse) // %3
|
:
|
||||||
: "memory", "cc"
|
: "memory", "cc"
|
||||||
#if defined(__SSE2__)
|
#if defined(__SSE2__)
|
||||||
, "xmm0", "xmm5"
|
, "xmm0", "xmm5"
|
||||||
#endif
|
#endif
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -1066,10 +1068,10 @@ __asm {
|
|||||||
mov edx, [esp + 4 + 8] // dst_a
|
mov edx, [esp + 4 + 8] // dst_a
|
||||||
mov edi, [esp + 4 + 12] // dst_b
|
mov edi, [esp + 4 + 12] // dst_b
|
||||||
mov ecx, [esp + 4 + 16] // width
|
mov ecx, [esp + 4 + 16] // width
|
||||||
movdqa xmm5, _kShuffleReverseUV
|
movdqa xmm5, kShuffleReverseUV
|
||||||
lea eax, [eax + ecx * 2 - 16]
|
lea eax, [eax + ecx * 2 - 16]
|
||||||
|
|
||||||
convertloop :
|
convertloop:
|
||||||
movdqa xmm0, [eax]
|
movdqa xmm0, [eax]
|
||||||
lea eax, [eax - 16]
|
lea eax, [eax - 16]
|
||||||
pshufb xmm0, xmm5
|
pshufb xmm0, xmm5
|
||||||
@ -1085,6 +1087,7 @@ __asm {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#elif (defined(__i386__) || defined(__x86_64__)) && \
|
#elif (defined(__i386__) || defined(__x86_64__)) && \
|
||||||
|
!defined(__APPLE__) && \
|
||||||
!defined(COVERAGE_ENABLED) && !defined(TARGET_IPHONE_SIMULATOR)
|
!defined(COVERAGE_ENABLED) && !defined(TARGET_IPHONE_SIMULATOR)
|
||||||
#define HAS_REVERSE_LINE_UV_SSSE3
|
#define HAS_REVERSE_LINE_UV_SSSE3
|
||||||
void ReverseLineUV_SSSE3(const uint8* src,
|
void ReverseLineUV_SSSE3(const uint8* src,
|
||||||
@ -1092,28 +1095,31 @@ void ReverseLineUV_SSSE3(const uint8* src,
|
|||||||
int width) {
|
int width) {
|
||||||
intptr_t temp_width = static_cast<intptr_t>(width);
|
intptr_t temp_width = static_cast<intptr_t>(width);
|
||||||
asm volatile (
|
asm volatile (
|
||||||
"movdqa (%4),%%xmm5 \n"
|
"movdqa %0,%%xmm5 \n"
|
||||||
"lea -0x10(%0,%3,2),%0 \n"
|
:: "m"(kShuffleReverseUV)
|
||||||
|
);
|
||||||
|
asm volatile (
|
||||||
|
"lea -16(%0,%3,2),%0 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movdqa (%0),%%xmm0 \n"
|
"movdqa (%0),%%xmm0 \n"
|
||||||
"lea -0x10(%0),%0 \n"
|
"lea -16(%0),%0 \n"
|
||||||
"pshufb %%xmm5,%%xmm0 \n"
|
"pshufb %%xmm5,%%xmm0 \n"
|
||||||
"movlpd %%xmm0,(%1) \n"
|
"movlpd %%xmm0,(%1) \n"
|
||||||
"lea 0x8(%1),%1 \n"
|
"lea 8(%1),%1 \n"
|
||||||
"movhpd %%xmm0,(%2) \n"
|
"movhpd %%xmm0,(%2) \n"
|
||||||
"lea 0x8(%2),%2 \n"
|
"lea 8(%2),%2 \n"
|
||||||
"sub $0x8,%3 \n"
|
"sub $8,%3 \n"
|
||||||
"ja 1b \n"
|
"ja 1b \n"
|
||||||
: "+r"(src), // %0
|
: "+r"(src), // %0
|
||||||
"+r"(dst_a), // %1
|
"+r"(dst_a), // %1
|
||||||
"+r"(dst_b), // %2
|
"+r"(dst_b), // %2
|
||||||
"+r"(temp_width) // %3
|
"+r"(temp_width) // %3
|
||||||
: "r"(kShuffleReverseUV) // %4
|
:
|
||||||
: "memory", "cc"
|
: "memory", "cc"
|
||||||
#if defined(__SSE2__)
|
#if defined(__SSE2__)
|
||||||
, "xmm0", "xmm5"
|
, "xmm0", "xmm5"
|
||||||
#endif
|
#endif
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|||||||
81
source/row.h
81
source/row.h
@ -51,15 +51,6 @@
|
|||||||
#define HAS_FASTCONVERTYTOARGBROW_SSE2
|
#define HAS_FASTCONVERTYTOARGBROW_SSE2
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// The following are available on Windows and GCC 32 bit
|
|
||||||
#if (defined(WIN32) || \
|
|
||||||
defined(__i386__)) && \
|
|
||||||
!defined(LIBYUV_DISABLE_ASM)
|
|
||||||
#define HAS_FASTCONVERTYUVTOARGBROW_MMX
|
|
||||||
#define HAS_FASTCONVERTYUVTOBGRAROW_MMX
|
|
||||||
#define HAS_FASTCONVERTYUVTOABGRROW_MMX
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// The following are available on Windows
|
// The following are available on Windows
|
||||||
#if defined(WIN32) && \
|
#if defined(WIN32) && \
|
||||||
!defined(LIBYUV_DISABLE_ASM)
|
!defined(LIBYUV_DISABLE_ASM)
|
||||||
@ -128,12 +119,14 @@ void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int pix);
|
|||||||
|
|
||||||
#if defined(_MSC_VER)
|
#if defined(_MSC_VER)
|
||||||
#define SIMD_ALIGNED(var) __declspec(align(16)) var
|
#define SIMD_ALIGNED(var) __declspec(align(16)) var
|
||||||
#define TALIGN16(t, var) static __declspec(align(16)) t _ ## var
|
typedef __declspec(align(16)) signed char vec8[16];
|
||||||
|
typedef __declspec(align(16)) unsigned char uvec8[16];
|
||||||
|
typedef __declspec(align(16)) signed short vec16[8];
|
||||||
#else // __GNUC__
|
#else // __GNUC__
|
||||||
#define SIMD_ALIGNED(var) var __attribute__((aligned(16)))
|
#define SIMD_ALIGNED(var) var __attribute__((aligned(16)))
|
||||||
#define TALIGN16(t, var) t var __attribute__((aligned(16)))
|
|
||||||
typedef signed char __attribute__((vector_size(16))) vec8;
|
typedef signed char __attribute__((vector_size(16))) vec8;
|
||||||
typedef unsigned char __attribute__((vector_size(16))) uvec8;
|
typedef unsigned char __attribute__((vector_size(16))) uvec8;
|
||||||
|
typedef signed short __attribute__((vector_size(16))) vec16;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
extern "C" SIMD_ALIGNED(const int16 kCoefficientsRgbY[768][4]);
|
extern "C" SIMD_ALIGNED(const int16 kCoefficientsRgbY[768][4]);
|
||||||
@ -204,36 +197,6 @@ void FastConvertYToARGBRow_SSE2(const uint8* y_buf,
|
|||||||
int width);
|
int width);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_FASTCONVERTYUVTOARGBROW_MMX
|
|
||||||
void FastConvertYUVToARGBRow_MMX(const uint8* y_buf,
|
|
||||||
const uint8* u_buf,
|
|
||||||
const uint8* v_buf,
|
|
||||||
uint8* rgb_buf,
|
|
||||||
int width);
|
|
||||||
|
|
||||||
void FastConvertYUVToBGRARow_MMX(const uint8* y_buf,
|
|
||||||
const uint8* u_buf,
|
|
||||||
const uint8* v_buf,
|
|
||||||
uint8* rgb_buf,
|
|
||||||
int width);
|
|
||||||
|
|
||||||
void FastConvertYUVToABGRRow_MMX(const uint8* y_buf,
|
|
||||||
const uint8* u_buf,
|
|
||||||
const uint8* v_buf,
|
|
||||||
uint8* rgb_buf,
|
|
||||||
int width);
|
|
||||||
|
|
||||||
void FastConvertYUV444ToARGBRow_MMX(const uint8* y_buf,
|
|
||||||
const uint8* u_buf,
|
|
||||||
const uint8* v_buf,
|
|
||||||
uint8* rgb_buf,
|
|
||||||
int width);
|
|
||||||
|
|
||||||
void FastConvertYToARGBRow_MMX(const uint8* y_buf,
|
|
||||||
uint8* rgb_buf,
|
|
||||||
int width);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef HAS_FASTCONVERTYUVTOARGBROW_SSSE3
|
#ifdef HAS_FASTCONVERTYUVTOARGBROW_SSSE3
|
||||||
void FastConvertYUVToARGBRow_SSSE3(const uint8* y_buf,
|
void FastConvertYUVToARGBRow_SSSE3(const uint8* y_buf,
|
||||||
const uint8* u_buf,
|
const uint8* u_buf,
|
||||||
@ -268,42 +231,6 @@ void FastConvertYToARGBRow_SSE2(const uint8* y_buf,
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Method to force C version.
|
|
||||||
//#define USE_MMX 0
|
|
||||||
//#define USE_SSE2 0
|
|
||||||
|
|
||||||
#if !defined(USE_MMX)
|
|
||||||
// Windows, Mac and Linux use MMX
|
|
||||||
#if defined(__i386__) || defined(_MSC_VER)
|
|
||||||
#define USE_MMX 1
|
|
||||||
#else
|
|
||||||
#define USE_MMX 0
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(USE_SSE2)
|
|
||||||
#if defined(__SSE2__) || defined(ARCH_CPU_X86_64) || _M_IX86_FP==2
|
|
||||||
#define USE_SSE2 1
|
|
||||||
#else
|
|
||||||
#define USE_SSE2 0
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// x64 uses MMX2 (SSE) so emms is not required.
|
|
||||||
// Warning C4799: function has no EMMS instruction.
|
|
||||||
// EMMS() is slow and should be called by the calling function once per image.
|
|
||||||
#if USE_MMX && !defined(ARCH_CPU_X86_64)
|
|
||||||
#if defined(_MSC_VER)
|
|
||||||
#define EMMS() __asm emms
|
|
||||||
#pragma warning(disable: 4799)
|
|
||||||
#else
|
|
||||||
#define EMMS() asm("emms")
|
|
||||||
#endif
|
|
||||||
#else
|
|
||||||
#define EMMS()
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
} // extern "C"
|
} // extern "C"
|
||||||
|
|
||||||
#endif // LIBYUV_SOURCE_ROW_H_
|
#endif // LIBYUV_SOURCE_ROW_H_
|
||||||
|
|||||||
@ -542,231 +542,6 @@ void FastConvertYToARGBRow_SSE2(const uint8* y_buf, // rdi
|
|||||||
#endif
|
#endif
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef HAS_FASTCONVERTYUVTOARGBROW_MMX
|
|
||||||
// 32 bit mmx gcc version
|
|
||||||
|
|
||||||
#ifdef OSX
|
|
||||||
#define UNDERSCORE "_"
|
|
||||||
#else
|
|
||||||
#define UNDERSCORE ""
|
|
||||||
#endif
|
|
||||||
|
|
||||||
void FastConvertYUVToARGBRow_MMX(const uint8* y_buf,
|
|
||||||
const uint8* u_buf,
|
|
||||||
const uint8* v_buf,
|
|
||||||
uint8* rgb_buf,
|
|
||||||
int width);
|
|
||||||
asm(
|
|
||||||
".text \n"
|
|
||||||
#if defined(OSX) || defined(IOS)
|
|
||||||
".globl _FastConvertYUVToARGBRow_MMX \n"
|
|
||||||
"_FastConvertYUVToARGBRow_MMX: \n"
|
|
||||||
#else
|
|
||||||
".global FastConvertYUVToARGBRow_MMX \n"
|
|
||||||
"FastConvertYUVToARGBRow_MMX: \n"
|
|
||||||
#endif
|
|
||||||
"pusha \n"
|
|
||||||
"mov 0x24(%esp),%edx \n"
|
|
||||||
"mov 0x28(%esp),%edi \n"
|
|
||||||
"mov 0x2c(%esp),%esi \n"
|
|
||||||
"mov 0x30(%esp),%ebp \n"
|
|
||||||
"mov 0x34(%esp),%ecx \n"
|
|
||||||
|
|
||||||
"1: \n"
|
|
||||||
"movzbl (%edi),%eax \n"
|
|
||||||
"lea 1(%edi),%edi \n"
|
|
||||||
"movzbl (%esi),%ebx \n"
|
|
||||||
"lea 1(%esi),%esi \n"
|
|
||||||
"movq " UNDERSCORE "kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
|
|
||||||
"movzbl (%edx),%eax \n"
|
|
||||||
"paddsw " UNDERSCORE "kCoefficientsRgbY+4096(,%ebx,8),%mm0\n"
|
|
||||||
"movzbl 0x1(%edx),%ebx \n"
|
|
||||||
"movq " UNDERSCORE "kCoefficientsRgbY(,%eax,8),%mm1\n"
|
|
||||||
"lea 2(%edx),%edx \n"
|
|
||||||
"movq " UNDERSCORE "kCoefficientsRgbY(,%ebx,8),%mm2\n"
|
|
||||||
"paddsw %mm0,%mm1 \n"
|
|
||||||
"paddsw %mm0,%mm2 \n"
|
|
||||||
"psraw $0x6,%mm1 \n"
|
|
||||||
"psraw $0x6,%mm2 \n"
|
|
||||||
"packuswb %mm2,%mm1 \n"
|
|
||||||
"movq %mm1,0x0(%ebp) \n"
|
|
||||||
"lea 8(%ebp),%ebp \n"
|
|
||||||
"sub $0x2,%ecx \n"
|
|
||||||
"ja 1b \n"
|
|
||||||
"popa \n"
|
|
||||||
"ret \n"
|
|
||||||
);
|
|
||||||
|
|
||||||
void FastConvertYUVToBGRARow_MMX(const uint8* y_buf,
|
|
||||||
const uint8* u_buf,
|
|
||||||
const uint8* v_buf,
|
|
||||||
uint8* rgb_buf,
|
|
||||||
int width);
|
|
||||||
asm(
|
|
||||||
".text \n"
|
|
||||||
#if defined(OSX) || defined(IOS)
|
|
||||||
".globl _FastConvertYUVToBGRARow_MMX \n"
|
|
||||||
"_FastConvertYUVToBGRARow_MMX: \n"
|
|
||||||
#else
|
|
||||||
".global FastConvertYUVToBGRARow_MMX \n"
|
|
||||||
"FastConvertYUVToBGRARow_MMX: \n"
|
|
||||||
#endif
|
|
||||||
"pusha \n"
|
|
||||||
"mov 0x24(%esp),%edx \n"
|
|
||||||
"mov 0x28(%esp),%edi \n"
|
|
||||||
"mov 0x2c(%esp),%esi \n"
|
|
||||||
"mov 0x30(%esp),%ebp \n"
|
|
||||||
"mov 0x34(%esp),%ecx \n"
|
|
||||||
|
|
||||||
"1: \n"
|
|
||||||
"movzbl (%edi),%eax \n"
|
|
||||||
"lea 1(%edi),%edi \n"
|
|
||||||
"movzbl (%esi),%ebx \n"
|
|
||||||
"lea 1(%esi),%esi \n"
|
|
||||||
"movq " UNDERSCORE "kCoefficientsBgraY+2048(,%eax,8),%mm0\n"
|
|
||||||
"movzbl (%edx),%eax \n"
|
|
||||||
"paddsw " UNDERSCORE "kCoefficientsBgraY+4096(,%ebx,8),%mm0\n"
|
|
||||||
"movzbl 0x1(%edx),%ebx \n"
|
|
||||||
"movq " UNDERSCORE "kCoefficientsBgraY(,%eax,8),%mm1\n"
|
|
||||||
"lea 2(%edx),%edx \n"
|
|
||||||
"movq " UNDERSCORE "kCoefficientsBgraY(,%ebx,8),%mm2\n"
|
|
||||||
"paddsw %mm0,%mm1 \n"
|
|
||||||
"paddsw %mm0,%mm2 \n"
|
|
||||||
"psraw $0x6,%mm1 \n"
|
|
||||||
"psraw $0x6,%mm2 \n"
|
|
||||||
"packuswb %mm2,%mm1 \n"
|
|
||||||
"movq %mm1,0x0(%ebp) \n"
|
|
||||||
"lea 8(%ebp),%ebp \n"
|
|
||||||
"sub $0x2,%ecx \n"
|
|
||||||
"ja 1b \n"
|
|
||||||
"popa \n"
|
|
||||||
"ret \n"
|
|
||||||
);
|
|
||||||
|
|
||||||
void FastConvertYUVToABGRRow_MMX(const uint8* y_buf,
|
|
||||||
const uint8* u_buf,
|
|
||||||
const uint8* v_buf,
|
|
||||||
uint8* rgb_buf,
|
|
||||||
int width);
|
|
||||||
asm(
|
|
||||||
".text \n"
|
|
||||||
#if defined(OSX) || defined(IOS)
|
|
||||||
".globl _FastConvertYUVToABGRRow_MMX \n"
|
|
||||||
"_FastConvertYUVToABGRRow_MMX: \n"
|
|
||||||
#else
|
|
||||||
".global FastConvertYUVToABGRRow_MMX \n"
|
|
||||||
"FastConvertYUVToABGRRow_MMX: \n"
|
|
||||||
#endif
|
|
||||||
"pusha \n"
|
|
||||||
"mov 0x24(%esp),%edx \n"
|
|
||||||
"mov 0x28(%esp),%edi \n"
|
|
||||||
"mov 0x2c(%esp),%esi \n"
|
|
||||||
"mov 0x30(%esp),%ebp \n"
|
|
||||||
"mov 0x34(%esp),%ecx \n"
|
|
||||||
|
|
||||||
"1: \n"
|
|
||||||
"movzbl (%edi),%eax \n"
|
|
||||||
"lea 1(%edi),%edi \n"
|
|
||||||
"movzbl (%esi),%ebx \n"
|
|
||||||
"lea 1(%esi),%esi \n"
|
|
||||||
"movq " UNDERSCORE "kCoefficientsAbgrY+2048(,%eax,8),%mm0\n"
|
|
||||||
"movzbl (%edx),%eax \n"
|
|
||||||
"paddsw " UNDERSCORE "kCoefficientsAbgrY+4096(,%ebx,8),%mm0\n"
|
|
||||||
"movzbl 0x1(%edx),%ebx \n"
|
|
||||||
"movq " UNDERSCORE "kCoefficientsAbgrY(,%eax,8),%mm1\n"
|
|
||||||
"lea 2(%edx),%edx \n"
|
|
||||||
"movq " UNDERSCORE "kCoefficientsAbgrY(,%ebx,8),%mm2\n"
|
|
||||||
"paddsw %mm0,%mm1 \n"
|
|
||||||
"paddsw %mm0,%mm2 \n"
|
|
||||||
"psraw $0x6,%mm1 \n"
|
|
||||||
"psraw $0x6,%mm2 \n"
|
|
||||||
"packuswb %mm2,%mm1 \n"
|
|
||||||
"movq %mm1,0x0(%ebp) \n"
|
|
||||||
"lea 8(%ebp),%ebp \n"
|
|
||||||
"sub $0x2,%ecx \n"
|
|
||||||
"ja 1b \n"
|
|
||||||
"popa \n"
|
|
||||||
"ret \n"
|
|
||||||
);
|
|
||||||
|
|
||||||
void FastConvertYUV444ToARGBRow_MMX(const uint8* y_buf,
|
|
||||||
const uint8* u_buf,
|
|
||||||
const uint8* v_buf,
|
|
||||||
uint8* rgb_buf,
|
|
||||||
int width);
|
|
||||||
asm(
|
|
||||||
".text \n"
|
|
||||||
#if defined(OSX) || defined(IOS)
|
|
||||||
".globl _FastConvertYUV444ToARGBRow_MMX \n"
|
|
||||||
"_FastConvertYUV444ToARGBRow_MMX: \n"
|
|
||||||
#else
|
|
||||||
".global FastConvertYUV444ToARGBRow_MMX \n"
|
|
||||||
"FastConvertYUV444ToARGBRow_MMX: \n"
|
|
||||||
#endif
|
|
||||||
"pusha \n"
|
|
||||||
"mov 0x24(%esp),%edx \n"
|
|
||||||
"mov 0x28(%esp),%edi \n"
|
|
||||||
"mov 0x2c(%esp),%esi \n"
|
|
||||||
"mov 0x30(%esp),%ebp \n"
|
|
||||||
"mov 0x34(%esp),%ecx \n"
|
|
||||||
|
|
||||||
"1: \n"
|
|
||||||
"movzbl (%edi),%eax \n"
|
|
||||||
"lea 1(%edi),%edi \n"
|
|
||||||
"movzbl (%esi),%ebx \n"
|
|
||||||
"lea 1(%esi),%esi \n"
|
|
||||||
"movq " UNDERSCORE "kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
|
|
||||||
"movzbl (%edx),%eax \n"
|
|
||||||
"paddsw " UNDERSCORE "kCoefficientsRgbY+4096(,%ebx,8),%mm0\n"
|
|
||||||
"lea 1(%edx),%edx \n"
|
|
||||||
"paddsw " UNDERSCORE "kCoefficientsRgbY(,%eax,8),%mm0\n"
|
|
||||||
"psraw $0x6,%mm0 \n"
|
|
||||||
"packuswb %mm0,%mm0 \n"
|
|
||||||
"movd %mm0,0x0(%ebp) \n"
|
|
||||||
"lea 4(%ebp),%ebp \n"
|
|
||||||
"sub $0x1,%ecx \n"
|
|
||||||
"ja 1b \n"
|
|
||||||
"popa \n"
|
|
||||||
"ret \n"
|
|
||||||
);
|
|
||||||
|
|
||||||
void FastConvertYToARGBRow_MMX(const uint8* y_buf,
|
|
||||||
uint8* rgb_buf,
|
|
||||||
int width);
|
|
||||||
asm(
|
|
||||||
".text \n"
|
|
||||||
#if defined(OSX) || defined(IOS)
|
|
||||||
".globl _FastConvertYToARGBRow_MMX \n"
|
|
||||||
"_FastConvertYToARGBRow_MMX: \n"
|
|
||||||
#else
|
|
||||||
".global FastConvertYToARGBRow_MMX \n"
|
|
||||||
"FastConvertYToARGBRow_MMX: \n"
|
|
||||||
#endif
|
|
||||||
"push %ebx \n"
|
|
||||||
"mov 0x8(%esp),%eax \n"
|
|
||||||
"mov 0xc(%esp),%edx \n"
|
|
||||||
"mov 0x10(%esp),%ecx \n"
|
|
||||||
|
|
||||||
"1: \n"
|
|
||||||
"movzbl (%eax),%ebx \n"
|
|
||||||
"movq " UNDERSCORE "kCoefficientsRgbY(,%ebx,8),%mm0\n"
|
|
||||||
"psraw $0x6,%mm0 \n"
|
|
||||||
"movzbl 0x1(%eax),%ebx \n"
|
|
||||||
"movq " UNDERSCORE "kCoefficientsRgbY(,%ebx,8),%mm1\n"
|
|
||||||
"psraw $0x6,%mm1 \n"
|
|
||||||
"packuswb %mm1,%mm0 \n"
|
|
||||||
"lea 0x2(%eax),%eax \n"
|
|
||||||
"movq %mm0,(%edx) \n"
|
|
||||||
"lea 0x8(%edx),%edx \n"
|
|
||||||
"sub $0x2,%ecx \n"
|
|
||||||
"ja 1b \n"
|
|
||||||
"pop %ebx \n"
|
|
||||||
"ret \n"
|
|
||||||
);
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_ARGBTOYROW_SSSE3
|
#ifdef HAS_ARGBTOYROW_SSSE3
|
||||||
|
|||||||
@ -15,71 +15,71 @@ extern "C" {
|
|||||||
#ifdef HAS_ARGBTOYROW_SSSE3
|
#ifdef HAS_ARGBTOYROW_SSSE3
|
||||||
|
|
||||||
// Constant multiplication table for converting ARGB to I400.
|
// Constant multiplication table for converting ARGB to I400.
|
||||||
SIMD_ALIGNED(const int8 kARGBToY[16]) = {
|
static const vec8 kARGBToY = {
|
||||||
13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0
|
13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0
|
||||||
};
|
};
|
||||||
|
|
||||||
SIMD_ALIGNED(const int8 kARGBToU[16]) = {
|
static const vec8 kARGBToU = {
|
||||||
112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0
|
112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0
|
||||||
};
|
};
|
||||||
|
|
||||||
SIMD_ALIGNED(const int8 kARGBToV[16]) = {
|
static const vec8 kARGBToV = {
|
||||||
-18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0,
|
-18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0,
|
||||||
};
|
};
|
||||||
|
|
||||||
// Constants for BGRA
|
// Constants for BGRA
|
||||||
SIMD_ALIGNED(const int8 kBGRAToY[16]) = {
|
static const vec8 kBGRAToY = {
|
||||||
0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13
|
0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13
|
||||||
};
|
};
|
||||||
|
|
||||||
SIMD_ALIGNED(const int8 kBGRAToU[16]) = {
|
static const vec8 kBGRAToU = {
|
||||||
0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112
|
0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112
|
||||||
};
|
};
|
||||||
|
|
||||||
SIMD_ALIGNED(const int8 kBGRAToV[16]) = {
|
static const vec8 kBGRAToV = {
|
||||||
0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18
|
0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18
|
||||||
};
|
};
|
||||||
|
|
||||||
// Constants for ABGR
|
// Constants for ABGR
|
||||||
SIMD_ALIGNED(const int8 kABGRToY[16]) = {
|
static const vec8 kABGRToY = {
|
||||||
33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0
|
33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0
|
||||||
};
|
};
|
||||||
|
|
||||||
SIMD_ALIGNED(const int8 kABGRToU[16]) = {
|
static const vec8 kABGRToU = {
|
||||||
-38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0
|
-38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0
|
||||||
};
|
};
|
||||||
|
|
||||||
SIMD_ALIGNED(const int8 kABGRToV[16]) = {
|
static const vec8 kABGRToV = {
|
||||||
112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0
|
112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0
|
||||||
};
|
};
|
||||||
|
|
||||||
SIMD_ALIGNED(const uint8 kAddY16[16]) = {
|
static const uvec8 kAddY16 = {
|
||||||
16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u,
|
16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u,
|
||||||
16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u,
|
16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u,
|
||||||
};
|
};
|
||||||
|
|
||||||
SIMD_ALIGNED(const uint8 kAddUV128[16]) = {
|
static const uvec8 kAddUV128 = {
|
||||||
128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u,
|
128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u,
|
||||||
128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u
|
128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u
|
||||||
};
|
};
|
||||||
|
|
||||||
// Shuffle table for converting BG24 to ARGB.
|
// Shuffle table for converting BG24 to ARGB.
|
||||||
SIMD_ALIGNED(const uint8 kShuffleMaskBG24ToARGB[16]) = {
|
static const uvec8 kShuffleMaskBG24ToARGB = {
|
||||||
0u, 1u, 2u, 12u, 3u, 4u, 5u, 13u, 6u, 7u, 8u, 14u, 9u, 10u, 11u, 15u
|
0u, 1u, 2u, 12u, 3u, 4u, 5u, 13u, 6u, 7u, 8u, 14u, 9u, 10u, 11u, 15u
|
||||||
};
|
};
|
||||||
|
|
||||||
// Shuffle table for converting RAW to ARGB.
|
// Shuffle table for converting RAW to ARGB.
|
||||||
SIMD_ALIGNED(const uint8 kShuffleMaskRAWToARGB[16]) = {
|
static const uvec8 kShuffleMaskRAWToARGB = {
|
||||||
2u, 1u, 0u, 12u, 5u, 4u, 3u, 13u, 8u, 7u, 6u, 14u, 11u, 10u, 9u, 15u
|
2u, 1u, 0u, 12u, 5u, 4u, 3u, 13u, 8u, 7u, 6u, 14u, 11u, 10u, 9u, 15u
|
||||||
};
|
};
|
||||||
|
|
||||||
// Shuffle table for converting ABGR to ARGB.
|
// Shuffle table for converting ABGR to ARGB.
|
||||||
SIMD_ALIGNED(const uint8 kShuffleMaskABGRToARGB[16]) = {
|
static const uvec8 kShuffleMaskABGRToARGB = {
|
||||||
2u, 1u, 0u, 3u, 6u, 5u, 4u, 7u, 10u, 9u, 8u, 11u, 14u, 13u, 12u, 15u
|
2u, 1u, 0u, 3u, 6u, 5u, 4u, 7u, 10u, 9u, 8u, 11u, 14u, 13u, 12u, 15u
|
||||||
};
|
};
|
||||||
|
|
||||||
// Shuffle table for converting BGRA to ARGB.
|
// Shuffle table for converting BGRA to ARGB.
|
||||||
SIMD_ALIGNED(const uint8 kShuffleMaskBGRAToARGB[16]) = {
|
static const uvec8 kShuffleMaskBGRAToARGB = {
|
||||||
3u, 2u, 1u, 0u, 7u, 6u, 5u, 4u, 11u, 10u, 9u, 8u, 15u, 14u, 13u, 12u
|
3u, 2u, 1u, 0u, 7u, 6u, 5u, 4u, 11u, 10u, 9u, 8u, 15u, 14u, 13u, 12u
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -118,7 +118,7 @@ __asm {
|
|||||||
mov ecx, [esp + 12] // pix
|
mov ecx, [esp + 12] // pix
|
||||||
movdqa xmm5, kShuffleMaskABGRToARGB
|
movdqa xmm5, kShuffleMaskABGRToARGB
|
||||||
|
|
||||||
convertloop :
|
convertloop:
|
||||||
movdqa xmm0, [eax]
|
movdqa xmm0, [eax]
|
||||||
lea eax, [eax + 16]
|
lea eax, [eax + 16]
|
||||||
pshufb xmm0, xmm5
|
pshufb xmm0, xmm5
|
||||||
@ -138,7 +138,7 @@ __asm {
|
|||||||
mov ecx, [esp + 12] // pix
|
mov ecx, [esp + 12] // pix
|
||||||
movdqa xmm5, kShuffleMaskBGRAToARGB
|
movdqa xmm5, kShuffleMaskBGRAToARGB
|
||||||
|
|
||||||
convertloop :
|
convertloop:
|
||||||
movdqa xmm0, [eax]
|
movdqa xmm0, [eax]
|
||||||
lea eax, [eax + 16]
|
lea eax, [eax + 16]
|
||||||
pshufb xmm0, xmm5
|
pshufb xmm0, xmm5
|
||||||
@ -160,7 +160,7 @@ __asm {
|
|||||||
pslld xmm5, 24
|
pslld xmm5, 24
|
||||||
movdqa xmm4, kShuffleMaskBG24ToARGB
|
movdqa xmm4, kShuffleMaskBG24ToARGB
|
||||||
|
|
||||||
convertloop :
|
convertloop:
|
||||||
movdqa xmm0, [eax]
|
movdqa xmm0, [eax]
|
||||||
movdqa xmm1, [eax + 16]
|
movdqa xmm1, [eax + 16]
|
||||||
movdqa xmm3, [eax + 32]
|
movdqa xmm3, [eax + 32]
|
||||||
@ -199,7 +199,7 @@ __asm {
|
|||||||
pslld xmm5, 24
|
pslld xmm5, 24
|
||||||
movdqa xmm4, kShuffleMaskRAWToARGB
|
movdqa xmm4, kShuffleMaskRAWToARGB
|
||||||
|
|
||||||
convertloop :
|
convertloop:
|
||||||
movdqa xmm0, [eax]
|
movdqa xmm0, [eax]
|
||||||
movdqa xmm1, [eax + 16]
|
movdqa xmm1, [eax + 16]
|
||||||
movdqa xmm3, [eax + 32]
|
movdqa xmm3, [eax + 32]
|
||||||
@ -237,7 +237,7 @@ __asm {
|
|||||||
movdqa xmm5, kAddY16
|
movdqa xmm5, kAddY16
|
||||||
movdqa xmm4, kARGBToY
|
movdqa xmm4, kARGBToY
|
||||||
|
|
||||||
convertloop :
|
convertloop:
|
||||||
movdqa xmm0, [eax]
|
movdqa xmm0, [eax]
|
||||||
movdqa xmm1, [eax + 16]
|
movdqa xmm1, [eax + 16]
|
||||||
movdqa xmm2, [eax + 32]
|
movdqa xmm2, [eax + 32]
|
||||||
@ -270,7 +270,7 @@ __asm {
|
|||||||
movdqa xmm5, kAddY16
|
movdqa xmm5, kAddY16
|
||||||
movdqa xmm4, kBGRAToY
|
movdqa xmm4, kBGRAToY
|
||||||
|
|
||||||
convertloop :
|
convertloop:
|
||||||
movdqa xmm0, [eax]
|
movdqa xmm0, [eax]
|
||||||
movdqa xmm1, [eax + 16]
|
movdqa xmm1, [eax + 16]
|
||||||
movdqa xmm2, [eax + 32]
|
movdqa xmm2, [eax + 32]
|
||||||
@ -303,7 +303,7 @@ __asm {
|
|||||||
movdqa xmm5, kAddY16
|
movdqa xmm5, kAddY16
|
||||||
movdqa xmm4, kABGRToY
|
movdqa xmm4, kABGRToY
|
||||||
|
|
||||||
convertloop :
|
convertloop:
|
||||||
movdqa xmm0, [eax]
|
movdqa xmm0, [eax]
|
||||||
movdqa xmm1, [eax + 16]
|
movdqa xmm1, [eax + 16]
|
||||||
movdqa xmm2, [eax + 32]
|
movdqa xmm2, [eax + 32]
|
||||||
@ -343,7 +343,7 @@ __asm {
|
|||||||
movdqa xmm5, kAddUV128
|
movdqa xmm5, kAddUV128
|
||||||
sub edi, edx // stride from u to v
|
sub edi, edx // stride from u to v
|
||||||
|
|
||||||
convertloop :
|
convertloop:
|
||||||
/* step 1 - subsample 16x2 argb pixels to 8x1 */
|
/* step 1 - subsample 16x2 argb pixels to 8x1 */
|
||||||
movdqa xmm0, [eax]
|
movdqa xmm0, [eax]
|
||||||
movdqa xmm1, [eax + 16]
|
movdqa xmm1, [eax + 16]
|
||||||
@ -407,7 +407,7 @@ __asm {
|
|||||||
movdqa xmm5, kAddUV128
|
movdqa xmm5, kAddUV128
|
||||||
sub edi, edx // stride from u to v
|
sub edi, edx // stride from u to v
|
||||||
|
|
||||||
convertloop :
|
convertloop:
|
||||||
/* step 1 - subsample 16x2 argb pixels to 8x1 */
|
/* step 1 - subsample 16x2 argb pixels to 8x1 */
|
||||||
movdqa xmm0, [eax]
|
movdqa xmm0, [eax]
|
||||||
movdqa xmm1, [eax + 16]
|
movdqa xmm1, [eax + 16]
|
||||||
@ -471,7 +471,7 @@ __asm {
|
|||||||
movdqa xmm5, kAddUV128
|
movdqa xmm5, kAddUV128
|
||||||
sub edi, edx // stride from u to v
|
sub edi, edx // stride from u to v
|
||||||
|
|
||||||
convertloop :
|
convertloop:
|
||||||
/* step 1 - subsample 16x2 argb pixels to 8x1 */
|
/* step 1 - subsample 16x2 argb pixels to 8x1 */
|
||||||
movdqa xmm0, [eax]
|
movdqa xmm0, [eax]
|
||||||
movdqa xmm1, [eax + 16]
|
movdqa xmm1, [eax + 16]
|
||||||
@ -519,182 +519,6 @@ __asm {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#define YUVTORGB_MMX(TABLE) __asm { \
|
|
||||||
__asm convertloop : \
|
|
||||||
__asm movzx eax, byte ptr [edi] \
|
|
||||||
__asm lea edi, [edi + 1] \
|
|
||||||
__asm movzx ebx, byte ptr [esi] \
|
|
||||||
__asm lea esi, [esi + 1] \
|
|
||||||
__asm movq mm0, [TABLE + 2048 + 8 * eax] \
|
|
||||||
__asm movzx eax, byte ptr [edx] \
|
|
||||||
__asm paddsw mm0, [TABLE + 4096 + 8 * ebx] \
|
|
||||||
__asm movzx ebx, byte ptr [edx + 1] \
|
|
||||||
__asm movq mm1, [TABLE + 8 * eax] \
|
|
||||||
__asm lea edx, [edx + 2] \
|
|
||||||
__asm movq mm2, [TABLE + 8 * ebx] \
|
|
||||||
__asm paddsw mm1, mm0 \
|
|
||||||
__asm paddsw mm2, mm0 \
|
|
||||||
__asm psraw mm1, 6 \
|
|
||||||
__asm psraw mm2, 6 \
|
|
||||||
__asm packuswb mm1, mm2 \
|
|
||||||
__asm movq [ebp], mm1 \
|
|
||||||
__asm lea ebp, [ebp + 8] \
|
|
||||||
__asm sub ecx, 2 \
|
|
||||||
__asm ja convertloop \
|
|
||||||
}
|
|
||||||
|
|
||||||
__declspec(naked)
|
|
||||||
void FastConvertYUVToARGBRow_MMX(const uint8* y_buf,
|
|
||||||
const uint8* u_buf,
|
|
||||||
const uint8* v_buf,
|
|
||||||
uint8* rgb_buf,
|
|
||||||
int width) {
|
|
||||||
__asm {
|
|
||||||
push ebx
|
|
||||||
push esi
|
|
||||||
push edi
|
|
||||||
push ebp
|
|
||||||
mov edx, [esp + 16 + 4]
|
|
||||||
mov edi, [esp + 16 + 8]
|
|
||||||
mov esi, [esp + 16 + 12]
|
|
||||||
mov ebp, [esp + 16 + 16]
|
|
||||||
mov ecx, [esp + 16 + 20]
|
|
||||||
|
|
||||||
YUVTORGB_MMX(kCoefficientsRgbY)
|
|
||||||
|
|
||||||
pop ebp
|
|
||||||
pop edi
|
|
||||||
pop esi
|
|
||||||
pop ebx
|
|
||||||
ret
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
__declspec(naked)
|
|
||||||
void FastConvertYUVToBGRARow_MMX(const uint8* y_buf,
|
|
||||||
const uint8* u_buf,
|
|
||||||
const uint8* v_buf,
|
|
||||||
uint8* rgb_buf,
|
|
||||||
int width) {
|
|
||||||
__asm {
|
|
||||||
push ebx
|
|
||||||
push esi
|
|
||||||
push edi
|
|
||||||
push ebp
|
|
||||||
mov edx, [esp + 16 + 4]
|
|
||||||
mov edi, [esp + 16 + 8]
|
|
||||||
mov esi, [esp + 16 + 12]
|
|
||||||
mov ebp, [esp + 16 + 16]
|
|
||||||
mov ecx, [esp + 16 + 20]
|
|
||||||
|
|
||||||
YUVTORGB_MMX(kCoefficientsBgraY)
|
|
||||||
|
|
||||||
pop ebp
|
|
||||||
pop edi
|
|
||||||
pop esi
|
|
||||||
pop ebx
|
|
||||||
ret
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
__declspec(naked)
|
|
||||||
void FastConvertYUVToABGRRow_MMX(const uint8* y_buf,
|
|
||||||
const uint8* u_buf,
|
|
||||||
const uint8* v_buf,
|
|
||||||
uint8* rgb_buf,
|
|
||||||
int width) {
|
|
||||||
__asm {
|
|
||||||
push ebx
|
|
||||||
push esi
|
|
||||||
push edi
|
|
||||||
push ebp
|
|
||||||
mov edx, [esp + 16 + 4]
|
|
||||||
mov edi, [esp + 16 + 8]
|
|
||||||
mov esi, [esp + 16 + 12]
|
|
||||||
mov ebp, [esp + 16 + 16]
|
|
||||||
mov ecx, [esp + 16 + 20]
|
|
||||||
|
|
||||||
YUVTORGB_MMX(kCoefficientsAbgrY)
|
|
||||||
|
|
||||||
pop ebp
|
|
||||||
pop edi
|
|
||||||
pop esi
|
|
||||||
pop ebx
|
|
||||||
ret
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
__declspec(naked)
|
|
||||||
void FastConvertYUV444ToARGBRow_MMX(const uint8* y_buf,
|
|
||||||
const uint8* u_buf,
|
|
||||||
const uint8* v_buf,
|
|
||||||
uint8* rgb_buf,
|
|
||||||
int width) {
|
|
||||||
__asm {
|
|
||||||
push ebx
|
|
||||||
push esi
|
|
||||||
push edi
|
|
||||||
push ebp
|
|
||||||
mov edx, [esp + 16 + 4]
|
|
||||||
mov edi, [esp + 16 + 8]
|
|
||||||
mov esi, [esp + 16 + 12]
|
|
||||||
mov ebp, [esp + 16 + 16]
|
|
||||||
mov ecx, [esp + 16 + 20]
|
|
||||||
|
|
||||||
convertloop :
|
|
||||||
movzx eax, byte ptr [edi]
|
|
||||||
lea edi, [edi + 1]
|
|
||||||
movzx ebx, byte ptr [esi]
|
|
||||||
lea esi, [esi + 1]
|
|
||||||
movq mm0, [kCoefficientsRgbY + 2048 + 8 * eax]
|
|
||||||
movzx eax, byte ptr [edx]
|
|
||||||
paddsw mm0, [kCoefficientsRgbY + 4096 + 8 * ebx]
|
|
||||||
lea edx, [edx + 1]
|
|
||||||
paddsw mm0, [kCoefficientsRgbY + 8 * eax]
|
|
||||||
psraw mm0, 6
|
|
||||||
packuswb mm0, mm0
|
|
||||||
movd [ebp], mm0
|
|
||||||
lea ebp, [ebp + 4]
|
|
||||||
sub ecx, 1
|
|
||||||
ja convertloop
|
|
||||||
|
|
||||||
pop ebp
|
|
||||||
pop edi
|
|
||||||
pop esi
|
|
||||||
pop ebx
|
|
||||||
ret
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
__declspec(naked)
|
|
||||||
void FastConvertYToARGBRow_MMX(const uint8* y_buf,
|
|
||||||
uint8* rgb_buf,
|
|
||||||
int width) {
|
|
||||||
__asm {
|
|
||||||
push ebx
|
|
||||||
mov eax, [esp + 4 + 4] // Y
|
|
||||||
mov edx, [esp + 4 + 8] // rgb
|
|
||||||
mov ecx, [esp + 4 + 12] // width
|
|
||||||
|
|
||||||
convertloop :
|
|
||||||
movzx ebx, byte ptr [eax]
|
|
||||||
movq mm0, [kCoefficientsRgbY + 8 * ebx]
|
|
||||||
psraw mm0, 6
|
|
||||||
movzx ebx, byte ptr [eax + 1]
|
|
||||||
movq mm1, [kCoefficientsRgbY + 8 * ebx]
|
|
||||||
psraw mm1, 6
|
|
||||||
packuswb mm0, mm1
|
|
||||||
lea eax, [eax + 2]
|
|
||||||
movq [edx], mm0
|
|
||||||
lea edx, [edx + 8]
|
|
||||||
sub ecx, 2
|
|
||||||
ja convertloop
|
|
||||||
|
|
||||||
pop ebx
|
|
||||||
ret
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef HAS_FASTCONVERTYUVTOARGBROW_SSSE3
|
#ifdef HAS_FASTCONVERTYUVTOARGBROW_SSSE3
|
||||||
|
|
||||||
#define YG 74 /* static_cast<int8>(1.164 * 64 + 0.5) */
|
#define YG 74 /* static_cast<int8>(1.164 * 64 + 0.5) */
|
||||||
@ -712,35 +536,35 @@ void FastConvertYToARGBRow_MMX(const uint8* y_buf,
|
|||||||
#define BG UG * 128 + VG * 128
|
#define BG UG * 128 + VG * 128
|
||||||
#define BR UR * 128 + VR * 128
|
#define BR UR * 128 + VR * 128
|
||||||
|
|
||||||
SIMD_ALIGNED(const int8 kUVToB[16]) = {
|
static const vec8 kUVToB = {
|
||||||
UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB
|
UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB
|
||||||
};
|
};
|
||||||
|
|
||||||
SIMD_ALIGNED(const int8 kUVToR[16]) = {
|
static const vec8 kUVToR = {
|
||||||
UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR
|
UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR
|
||||||
};
|
};
|
||||||
|
|
||||||
SIMD_ALIGNED(const int8 kUVToG[16]) = {
|
static const vec8 kUVToG = {
|
||||||
UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG
|
UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG
|
||||||
};
|
};
|
||||||
|
|
||||||
SIMD_ALIGNED(const int16 kYToRgb[8]) = {
|
static const vec16 kYToRgb = {
|
||||||
YG, YG, YG, YG, YG, YG, YG, YG
|
YG, YG, YG, YG, YG, YG, YG, YG
|
||||||
};
|
};
|
||||||
|
|
||||||
SIMD_ALIGNED(const int16 kYSub16[8]) = {
|
static const vec16 kYSub16 = {
|
||||||
16, 16, 16, 16, 16, 16, 16, 16
|
16, 16, 16, 16, 16, 16, 16, 16
|
||||||
};
|
};
|
||||||
|
|
||||||
SIMD_ALIGNED(const int16 kUVBiasB[8]) = {
|
static const vec16 kUVBiasB = {
|
||||||
BB, BB, BB, BB, BB, BB, BB, BB
|
BB, BB, BB, BB, BB, BB, BB, BB
|
||||||
};
|
};
|
||||||
|
|
||||||
SIMD_ALIGNED(const int16 kUVBiasG[8]) = {
|
static const vec16 kUVBiasG = {
|
||||||
BG, BG, BG, BG, BG, BG, BG, BG
|
BG, BG, BG, BG, BG, BG, BG, BG
|
||||||
};
|
};
|
||||||
|
|
||||||
SIMD_ALIGNED(const int16 kUVBiasR[8]) = {
|
static const vec16 kUVBiasR = {
|
||||||
BR, BR, BR, BR, BR, BR, BR, BR
|
BR, BR, BR, BR, BR, BR, BR, BR
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -794,7 +618,7 @@ void FastConvertYUVToARGBRow_SSSE3(const uint8* y_buf,
|
|||||||
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
|
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
|
||||||
pxor xmm4, xmm4
|
pxor xmm4, xmm4
|
||||||
|
|
||||||
convertloop :
|
convertloop:
|
||||||
YUVTORGB_SSSE3
|
YUVTORGB_SSSE3
|
||||||
|
|
||||||
// Step 3: Weave into ARGB
|
// Step 3: Weave into ARGB
|
||||||
@ -833,7 +657,7 @@ void FastConvertYUVToBGRARow_SSSE3(const uint8* y_buf,
|
|||||||
sub edi, esi
|
sub edi, esi
|
||||||
pxor xmm4, xmm4
|
pxor xmm4, xmm4
|
||||||
|
|
||||||
convertloop :
|
convertloop:
|
||||||
YUVTORGB_SSSE3
|
YUVTORGB_SSSE3
|
||||||
|
|
||||||
// Step 3: Weave into BGRA
|
// Step 3: Weave into BGRA
|
||||||
@ -874,7 +698,7 @@ void FastConvertYUVToABGRRow_SSSE3(const uint8* y_buf,
|
|||||||
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
|
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
|
||||||
pxor xmm4, xmm4
|
pxor xmm4, xmm4
|
||||||
|
|
||||||
convertloop :
|
convertloop:
|
||||||
YUVTORGB_SSSE3
|
YUVTORGB_SSSE3
|
||||||
|
|
||||||
// Step 3: Weave into ARGB
|
// Step 3: Weave into ARGB
|
||||||
@ -914,7 +738,7 @@ void FastConvertYUV444ToARGBRow_SSSE3(const uint8* y_buf,
|
|||||||
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
|
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
|
||||||
pxor xmm4, xmm4
|
pxor xmm4, xmm4
|
||||||
|
|
||||||
convertloop :
|
convertloop:
|
||||||
// Step 1: Find 4 UV contributions to 4 R,G,B values
|
// Step 1: Find 4 UV contributions to 4 R,G,B values
|
||||||
movd xmm0, [esi] // U
|
movd xmm0, [esi] // U
|
||||||
movd xmm1, [esi + edi] // V
|
movd xmm1, [esi + edi] // V
|
||||||
@ -978,7 +802,7 @@ void FastConvertYToARGBRow_SSE2(const uint8* y_buf,
|
|||||||
movdqa xmm3, kYSub16
|
movdqa xmm3, kYSub16
|
||||||
movdqa xmm2, kYToRgb
|
movdqa xmm2, kYToRgb
|
||||||
|
|
||||||
convertloop :
|
convertloop:
|
||||||
// Step 1: Scale Y contribution to 8 G values. G = (y - 16) * 1.164
|
// Step 1: Scale Y contribution to 8 G values. G = (y - 16) * 1.164
|
||||||
movq xmm0, qword ptr [eax]
|
movq xmm0, qword ptr [eax]
|
||||||
lea eax, [eax + 8]
|
lea eax, [eax + 8]
|
||||||
|
|||||||
@ -14,6 +14,7 @@
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
#include "libyuv/cpu_id.h"
|
#include "libyuv/cpu_id.h"
|
||||||
|
#include "row.h"
|
||||||
|
|
||||||
#if defined(_MSC_VER)
|
#if defined(_MSC_VER)
|
||||||
#define ALIGN16(var) __declspec(align(16)) var
|
#define ALIGN16(var) __declspec(align(16)) var
|
||||||
@ -21,6 +22,7 @@
|
|||||||
#define ALIGN16(var) var __attribute__((aligned(16)))
|
#define ALIGN16(var) var __attribute__((aligned(16)))
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
// Note: A Neon reference manual
|
// Note: A Neon reference manual
|
||||||
// http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0204j/CJAJIIGG.html
|
// http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0204j/CJAJIIGG.html
|
||||||
// Note: Some SSE2 reference manuals
|
// Note: Some SSE2 reference manuals
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user