mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
remove mmx functions
BUG=none TEST=builds Review URL: http://webrtc-codereview.appspot.com/269010 git-svn-id: http://libyuv.googlecode.com/svn/trunk@77 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
c82af4a59c
commit
eaedc1d727
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 69
|
||||
Version: 77
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -1149,11 +1149,6 @@ int I420ToARGB(const uint8* src_y, int src_stride_y,
|
||||
(width % 2 == 0)) {
|
||||
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_SSE2;
|
||||
} else
|
||||
#endif
|
||||
#if defined(HAS_FASTCONVERTYUVTOARGBROW_MMX)
|
||||
if (width % 2 == 0) {
|
||||
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_MMX;
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_C;
|
||||
@ -1167,8 +1162,6 @@ int I420ToARGB(const uint8* src_y, int src_stride_y,
|
||||
src_v += src_stride_v;
|
||||
}
|
||||
}
|
||||
// MMX used for FastConvertYUVToARGBRow requires an emms instruction.
|
||||
EMMS();
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1201,11 +1194,6 @@ int I420ToBGRA(const uint8* src_y, int src_stride_y,
|
||||
(width % 2 == 0)) {
|
||||
FastConvertYUVToBGRARow = FastConvertYUVToBGRARow_SSE2;
|
||||
} else
|
||||
#endif
|
||||
#if defined(HAS_FASTCONVERTYUVTOBGRAROW_MMX)
|
||||
if (width % 2 == 0) {
|
||||
FastConvertYUVToBGRARow = FastConvertYUVToBGRARow_MMX;
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
FastConvertYUVToBGRARow = FastConvertYUVToBGRARow_C;
|
||||
@ -1219,7 +1207,6 @@ int I420ToBGRA(const uint8* src_y, int src_stride_y,
|
||||
src_v += src_stride_v;
|
||||
}
|
||||
}
|
||||
EMMS();
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1252,11 +1239,6 @@ int I420ToABGR(const uint8* src_y, int src_stride_y,
|
||||
(width % 2 == 0)) {
|
||||
FastConvertYUVToABGRRow = FastConvertYUVToABGRRow_SSE2;
|
||||
} else
|
||||
#endif
|
||||
#if defined(HAS_FASTCONVERTYUVTOABGRROW_MMX)
|
||||
if (width % 2 == 0) {
|
||||
FastConvertYUVToABGRRow = FastConvertYUVToABGRRow_MMX;
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
FastConvertYUVToABGRRow = FastConvertYUVToABGRRow_C;
|
||||
@ -1270,7 +1252,6 @@ int I420ToABGR(const uint8* src_y, int src_stride_y,
|
||||
src_v += src_stride_v;
|
||||
}
|
||||
}
|
||||
EMMS();
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1303,11 +1284,6 @@ int I422ToARGB(const uint8* src_y, int src_stride_y,
|
||||
(width % 2 == 0)) {
|
||||
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_SSE2;
|
||||
} else
|
||||
#endif
|
||||
#if defined(HAS_FASTCONVERTYUVTOARGBROW_MMX)
|
||||
if (width % 2 == 0) {
|
||||
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_MMX;
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_C;
|
||||
@ -1319,8 +1295,6 @@ int I422ToARGB(const uint8* src_y, int src_stride_y,
|
||||
src_u += src_stride_u;
|
||||
src_v += src_stride_v;
|
||||
}
|
||||
// MMX used for FastConvertYUVToARGBRow requires an emms instruction.
|
||||
EMMS();
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1353,13 +1327,9 @@ int I444ToARGB(const uint8* src_y, int src_stride_y,
|
||||
FastConvertYUV444ToARGBRow = FastConvertYUV444ToARGBRow_SSE2;
|
||||
} else
|
||||
#endif
|
||||
#if defined(HAS_FASTCONVERTYUVTOARGBROW_MMX)
|
||||
FastConvertYUV444ToARGBRow = FastConvertYUV444ToARGBRow_MMX;
|
||||
#else
|
||||
{
|
||||
FastConvertYUV444ToARGBRow = FastConvertYUV444ToARGBRow_C;
|
||||
}
|
||||
#endif
|
||||
for (int y = 0; y < height; ++y) {
|
||||
FastConvertYUV444ToARGBRow(src_y, src_u, src_v, dst_argb, width);
|
||||
dst_argb += dst_stride_argb;
|
||||
@ -1367,8 +1337,6 @@ int I444ToARGB(const uint8* src_y, int src_stride_y,
|
||||
src_u += src_stride_u;
|
||||
src_v += src_stride_v;
|
||||
}
|
||||
// MMX used for FastConvertYUVToARGBRow requires an emms instruction.
|
||||
EMMS();
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1391,11 +1359,6 @@ int I400ToARGB_Reference(const uint8* src_y, int src_stride_y,
|
||||
IS_ALIGNED(dst_argb, 16) && (dst_stride_argb % 16 == 0)) {
|
||||
FastConvertYToARGBRow = FastConvertYToARGBRow_SSE2;
|
||||
} else
|
||||
#endif
|
||||
#if defined(HAS_FASTCONVERTYUVTOARGBROW_MMX)
|
||||
if (width % 2 == 0) {
|
||||
FastConvertYToARGBRow = FastConvertYToARGBRow_MMX;
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
FastConvertYToARGBRow = FastConvertYToARGBRow_C;
|
||||
@ -1405,8 +1368,6 @@ int I400ToARGB_Reference(const uint8* src_y, int src_stride_y,
|
||||
dst_argb += dst_stride_argb;
|
||||
src_y += src_stride_y;
|
||||
}
|
||||
// MMX used for FastConvertYUVToARGBRow requires an emms instruction.
|
||||
EMMS();
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@ -13,21 +13,19 @@
|
||||
#include "libyuv/cpu_id.h"
|
||||
#include "libyuv/planar_functions.h"
|
||||
#include "rotate_priv.h"
|
||||
#include "row.h"
|
||||
|
||||
namespace libyuv {
|
||||
|
||||
#if (defined(WIN32) || defined(__x86_64__) || defined(__i386__)) \
|
||||
&& !defined(COVERAGE_ENABLED) && !defined(TARGET_IPHONE_SIMULATOR)
|
||||
#if defined(_MSC_VER)
|
||||
#define TALIGN16(t, var) static __declspec(align(16)) t _ ## var
|
||||
#else
|
||||
#define TALIGN16(t, var) t var __attribute__((aligned(16)))
|
||||
#endif
|
||||
#if (defined(WIN32) || defined(__x86_64__) || defined(__i386__)) && \
|
||||
!defined(__APPLE__) && \
|
||||
!defined(COVERAGE_ENABLED) && !defined(TARGET_IPHONE_SIMULATOR)
|
||||
|
||||
// Shuffle table for reversing the bytes.
|
||||
extern "C" TALIGN16(const uint8, kShuffleReverse[16]) =
|
||||
static const uvec8 kShuffleReverse =
|
||||
{ 15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u };
|
||||
// Shuffle table for reversing the bytes of UV channels.
|
||||
extern "C" TALIGN16(const uint8, kShuffleReverseUV[16]) =
|
||||
static const uvec8 kShuffleReverseUV =
|
||||
{ 14u, 12u, 10u, 8u, 6u, 4u, 2u, 0u, 15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u };
|
||||
#endif
|
||||
|
||||
@ -863,7 +861,7 @@ __asm {
|
||||
mov eax, [esp + 4] // src
|
||||
mov edx, [esp + 8] // dst
|
||||
mov ecx, [esp + 12] // width
|
||||
movdqa xmm5, _kShuffleReverse
|
||||
movdqa xmm5, kShuffleReverse
|
||||
lea eax, [eax + ecx - 16]
|
||||
convertloop:
|
||||
movdqa xmm0, [eax]
|
||||
@ -878,12 +876,16 @@ __asm {
|
||||
}
|
||||
|
||||
#elif (defined(__i386__) || defined(__x86_64__)) && \
|
||||
!defined(__APPLE__) && \
|
||||
!defined(COVERAGE_ENABLED) && !defined(TARGET_IPHONE_SIMULATOR)
|
||||
#define HAS_REVERSE_LINE_SSSE3
|
||||
static void ReverseLine_SSSE3(const uint8* src, uint8* dst, int width) {
|
||||
intptr_t temp_width = static_cast<intptr_t>(width);
|
||||
asm volatile (
|
||||
"movdqa (%3),%%xmm5 \n"
|
||||
"movdqa %0,%%xmm5 \n"
|
||||
:: "m"(kShuffleReverse)
|
||||
);
|
||||
asm volatile (
|
||||
"lea -0x10(%0,%2,1),%0 \n"
|
||||
"1: \n"
|
||||
"movdqa (%0),%%xmm0 \n"
|
||||
@ -896,7 +898,7 @@ static void ReverseLine_SSSE3(const uint8* src, uint8* dst, int width) {
|
||||
: "+r"(src), // %0
|
||||
"+r"(dst), // %1
|
||||
"+r"(temp_width) // %2
|
||||
: "r"(kShuffleReverse) // %3
|
||||
:
|
||||
: "memory", "cc"
|
||||
#if defined(__SSE2__)
|
||||
, "xmm0", "xmm5"
|
||||
@ -1066,7 +1068,7 @@ __asm {
|
||||
mov edx, [esp + 4 + 8] // dst_a
|
||||
mov edi, [esp + 4 + 12] // dst_b
|
||||
mov ecx, [esp + 4 + 16] // width
|
||||
movdqa xmm5, _kShuffleReverseUV
|
||||
movdqa xmm5, kShuffleReverseUV
|
||||
lea eax, [eax + ecx * 2 - 16]
|
||||
|
||||
convertloop:
|
||||
@ -1085,6 +1087,7 @@ __asm {
|
||||
}
|
||||
|
||||
#elif (defined(__i386__) || defined(__x86_64__)) && \
|
||||
!defined(__APPLE__) && \
|
||||
!defined(COVERAGE_ENABLED) && !defined(TARGET_IPHONE_SIMULATOR)
|
||||
#define HAS_REVERSE_LINE_UV_SSSE3
|
||||
void ReverseLineUV_SSSE3(const uint8* src,
|
||||
@ -1092,23 +1095,26 @@ void ReverseLineUV_SSSE3(const uint8* src,
|
||||
int width) {
|
||||
intptr_t temp_width = static_cast<intptr_t>(width);
|
||||
asm volatile (
|
||||
"movdqa (%4),%%xmm5 \n"
|
||||
"lea -0x10(%0,%3,2),%0 \n"
|
||||
"movdqa %0,%%xmm5 \n"
|
||||
:: "m"(kShuffleReverseUV)
|
||||
);
|
||||
asm volatile (
|
||||
"lea -16(%0,%3,2),%0 \n"
|
||||
"1: \n"
|
||||
"movdqa (%0),%%xmm0 \n"
|
||||
"lea -0x10(%0),%0 \n"
|
||||
"lea -16(%0),%0 \n"
|
||||
"pshufb %%xmm5,%%xmm0 \n"
|
||||
"movlpd %%xmm0,(%1) \n"
|
||||
"lea 0x8(%1),%1 \n"
|
||||
"lea 8(%1),%1 \n"
|
||||
"movhpd %%xmm0,(%2) \n"
|
||||
"lea 0x8(%2),%2 \n"
|
||||
"sub $0x8,%3 \n"
|
||||
"lea 8(%2),%2 \n"
|
||||
"sub $8,%3 \n"
|
||||
"ja 1b \n"
|
||||
: "+r"(src), // %0
|
||||
"+r"(dst_a), // %1
|
||||
"+r"(dst_b), // %2
|
||||
"+r"(temp_width) // %3
|
||||
: "r"(kShuffleReverseUV) // %4
|
||||
:
|
||||
: "memory", "cc"
|
||||
#if defined(__SSE2__)
|
||||
, "xmm0", "xmm5"
|
||||
|
||||
81
source/row.h
81
source/row.h
@ -51,15 +51,6 @@
|
||||
#define HAS_FASTCONVERTYTOARGBROW_SSE2
|
||||
#endif
|
||||
|
||||
// The following are available on Windows and GCC 32 bit
|
||||
#if (defined(WIN32) || \
|
||||
defined(__i386__)) && \
|
||||
!defined(LIBYUV_DISABLE_ASM)
|
||||
#define HAS_FASTCONVERTYUVTOARGBROW_MMX
|
||||
#define HAS_FASTCONVERTYUVTOBGRAROW_MMX
|
||||
#define HAS_FASTCONVERTYUVTOABGRROW_MMX
|
||||
#endif
|
||||
|
||||
// The following are available on Windows
|
||||
#if defined(WIN32) && \
|
||||
!defined(LIBYUV_DISABLE_ASM)
|
||||
@ -128,12 +119,14 @@ void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int pix);
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define SIMD_ALIGNED(var) __declspec(align(16)) var
|
||||
#define TALIGN16(t, var) static __declspec(align(16)) t _ ## var
|
||||
typedef __declspec(align(16)) signed char vec8[16];
|
||||
typedef __declspec(align(16)) unsigned char uvec8[16];
|
||||
typedef __declspec(align(16)) signed short vec16[8];
|
||||
#else // __GNUC__
|
||||
#define SIMD_ALIGNED(var) var __attribute__((aligned(16)))
|
||||
#define TALIGN16(t, var) t var __attribute__((aligned(16)))
|
||||
typedef signed char __attribute__((vector_size(16))) vec8;
|
||||
typedef unsigned char __attribute__((vector_size(16))) uvec8;
|
||||
typedef signed short __attribute__((vector_size(16))) vec16;
|
||||
#endif
|
||||
|
||||
extern "C" SIMD_ALIGNED(const int16 kCoefficientsRgbY[768][4]);
|
||||
@ -204,36 +197,6 @@ void FastConvertYToARGBRow_SSE2(const uint8* y_buf,
|
||||
int width);
|
||||
#endif
|
||||
|
||||
#ifdef HAS_FASTCONVERTYUVTOARGBROW_MMX
|
||||
void FastConvertYUVToARGBRow_MMX(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
|
||||
void FastConvertYUVToBGRARow_MMX(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
|
||||
void FastConvertYUVToABGRRow_MMX(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
|
||||
void FastConvertYUV444ToARGBRow_MMX(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
|
||||
void FastConvertYToARGBRow_MMX(const uint8* y_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
#endif
|
||||
|
||||
#ifdef HAS_FASTCONVERTYUVTOARGBROW_SSSE3
|
||||
void FastConvertYUVToARGBRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
@ -268,42 +231,6 @@ void FastConvertYToARGBRow_SSE2(const uint8* y_buf,
|
||||
|
||||
#endif
|
||||
|
||||
// Method to force C version.
|
||||
//#define USE_MMX 0
|
||||
//#define USE_SSE2 0
|
||||
|
||||
#if !defined(USE_MMX)
|
||||
// Windows, Mac and Linux use MMX
|
||||
#if defined(__i386__) || defined(_MSC_VER)
|
||||
#define USE_MMX 1
|
||||
#else
|
||||
#define USE_MMX 0
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined(USE_SSE2)
|
||||
#if defined(__SSE2__) || defined(ARCH_CPU_X86_64) || _M_IX86_FP==2
|
||||
#define USE_SSE2 1
|
||||
#else
|
||||
#define USE_SSE2 0
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// x64 uses MMX2 (SSE) so emms is not required.
|
||||
// Warning C4799: function has no EMMS instruction.
|
||||
// EMMS() is slow and should be called by the calling function once per image.
|
||||
#if USE_MMX && !defined(ARCH_CPU_X86_64)
|
||||
#if defined(_MSC_VER)
|
||||
#define EMMS() __asm emms
|
||||
#pragma warning(disable: 4799)
|
||||
#else
|
||||
#define EMMS() asm("emms")
|
||||
#endif
|
||||
#else
|
||||
#define EMMS()
|
||||
#endif
|
||||
|
||||
|
||||
} // extern "C"
|
||||
|
||||
#endif // LIBYUV_SOURCE_ROW_H_
|
||||
|
||||
@ -542,231 +542,6 @@ void FastConvertYToARGBRow_SSE2(const uint8* y_buf, // rdi
|
||||
#endif
|
||||
);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef HAS_FASTCONVERTYUVTOARGBROW_MMX
|
||||
// 32 bit mmx gcc version
|
||||
|
||||
#ifdef OSX
|
||||
#define UNDERSCORE "_"
|
||||
#else
|
||||
#define UNDERSCORE ""
|
||||
#endif
|
||||
|
||||
void FastConvertYUVToARGBRow_MMX(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
asm(
|
||||
".text \n"
|
||||
#if defined(OSX) || defined(IOS)
|
||||
".globl _FastConvertYUVToARGBRow_MMX \n"
|
||||
"_FastConvertYUVToARGBRow_MMX: \n"
|
||||
#else
|
||||
".global FastConvertYUVToARGBRow_MMX \n"
|
||||
"FastConvertYUVToARGBRow_MMX: \n"
|
||||
#endif
|
||||
"pusha \n"
|
||||
"mov 0x24(%esp),%edx \n"
|
||||
"mov 0x28(%esp),%edi \n"
|
||||
"mov 0x2c(%esp),%esi \n"
|
||||
"mov 0x30(%esp),%ebp \n"
|
||||
"mov 0x34(%esp),%ecx \n"
|
||||
|
||||
"1: \n"
|
||||
"movzbl (%edi),%eax \n"
|
||||
"lea 1(%edi),%edi \n"
|
||||
"movzbl (%esi),%ebx \n"
|
||||
"lea 1(%esi),%esi \n"
|
||||
"movq " UNDERSCORE "kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
|
||||
"movzbl (%edx),%eax \n"
|
||||
"paddsw " UNDERSCORE "kCoefficientsRgbY+4096(,%ebx,8),%mm0\n"
|
||||
"movzbl 0x1(%edx),%ebx \n"
|
||||
"movq " UNDERSCORE "kCoefficientsRgbY(,%eax,8),%mm1\n"
|
||||
"lea 2(%edx),%edx \n"
|
||||
"movq " UNDERSCORE "kCoefficientsRgbY(,%ebx,8),%mm2\n"
|
||||
"paddsw %mm0,%mm1 \n"
|
||||
"paddsw %mm0,%mm2 \n"
|
||||
"psraw $0x6,%mm1 \n"
|
||||
"psraw $0x6,%mm2 \n"
|
||||
"packuswb %mm2,%mm1 \n"
|
||||
"movq %mm1,0x0(%ebp) \n"
|
||||
"lea 8(%ebp),%ebp \n"
|
||||
"sub $0x2,%ecx \n"
|
||||
"ja 1b \n"
|
||||
"popa \n"
|
||||
"ret \n"
|
||||
);
|
||||
|
||||
void FastConvertYUVToBGRARow_MMX(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
asm(
|
||||
".text \n"
|
||||
#if defined(OSX) || defined(IOS)
|
||||
".globl _FastConvertYUVToBGRARow_MMX \n"
|
||||
"_FastConvertYUVToBGRARow_MMX: \n"
|
||||
#else
|
||||
".global FastConvertYUVToBGRARow_MMX \n"
|
||||
"FastConvertYUVToBGRARow_MMX: \n"
|
||||
#endif
|
||||
"pusha \n"
|
||||
"mov 0x24(%esp),%edx \n"
|
||||
"mov 0x28(%esp),%edi \n"
|
||||
"mov 0x2c(%esp),%esi \n"
|
||||
"mov 0x30(%esp),%ebp \n"
|
||||
"mov 0x34(%esp),%ecx \n"
|
||||
|
||||
"1: \n"
|
||||
"movzbl (%edi),%eax \n"
|
||||
"lea 1(%edi),%edi \n"
|
||||
"movzbl (%esi),%ebx \n"
|
||||
"lea 1(%esi),%esi \n"
|
||||
"movq " UNDERSCORE "kCoefficientsBgraY+2048(,%eax,8),%mm0\n"
|
||||
"movzbl (%edx),%eax \n"
|
||||
"paddsw " UNDERSCORE "kCoefficientsBgraY+4096(,%ebx,8),%mm0\n"
|
||||
"movzbl 0x1(%edx),%ebx \n"
|
||||
"movq " UNDERSCORE "kCoefficientsBgraY(,%eax,8),%mm1\n"
|
||||
"lea 2(%edx),%edx \n"
|
||||
"movq " UNDERSCORE "kCoefficientsBgraY(,%ebx,8),%mm2\n"
|
||||
"paddsw %mm0,%mm1 \n"
|
||||
"paddsw %mm0,%mm2 \n"
|
||||
"psraw $0x6,%mm1 \n"
|
||||
"psraw $0x6,%mm2 \n"
|
||||
"packuswb %mm2,%mm1 \n"
|
||||
"movq %mm1,0x0(%ebp) \n"
|
||||
"lea 8(%ebp),%ebp \n"
|
||||
"sub $0x2,%ecx \n"
|
||||
"ja 1b \n"
|
||||
"popa \n"
|
||||
"ret \n"
|
||||
);
|
||||
|
||||
void FastConvertYUVToABGRRow_MMX(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
asm(
|
||||
".text \n"
|
||||
#if defined(OSX) || defined(IOS)
|
||||
".globl _FastConvertYUVToABGRRow_MMX \n"
|
||||
"_FastConvertYUVToABGRRow_MMX: \n"
|
||||
#else
|
||||
".global FastConvertYUVToABGRRow_MMX \n"
|
||||
"FastConvertYUVToABGRRow_MMX: \n"
|
||||
#endif
|
||||
"pusha \n"
|
||||
"mov 0x24(%esp),%edx \n"
|
||||
"mov 0x28(%esp),%edi \n"
|
||||
"mov 0x2c(%esp),%esi \n"
|
||||
"mov 0x30(%esp),%ebp \n"
|
||||
"mov 0x34(%esp),%ecx \n"
|
||||
|
||||
"1: \n"
|
||||
"movzbl (%edi),%eax \n"
|
||||
"lea 1(%edi),%edi \n"
|
||||
"movzbl (%esi),%ebx \n"
|
||||
"lea 1(%esi),%esi \n"
|
||||
"movq " UNDERSCORE "kCoefficientsAbgrY+2048(,%eax,8),%mm0\n"
|
||||
"movzbl (%edx),%eax \n"
|
||||
"paddsw " UNDERSCORE "kCoefficientsAbgrY+4096(,%ebx,8),%mm0\n"
|
||||
"movzbl 0x1(%edx),%ebx \n"
|
||||
"movq " UNDERSCORE "kCoefficientsAbgrY(,%eax,8),%mm1\n"
|
||||
"lea 2(%edx),%edx \n"
|
||||
"movq " UNDERSCORE "kCoefficientsAbgrY(,%ebx,8),%mm2\n"
|
||||
"paddsw %mm0,%mm1 \n"
|
||||
"paddsw %mm0,%mm2 \n"
|
||||
"psraw $0x6,%mm1 \n"
|
||||
"psraw $0x6,%mm2 \n"
|
||||
"packuswb %mm2,%mm1 \n"
|
||||
"movq %mm1,0x0(%ebp) \n"
|
||||
"lea 8(%ebp),%ebp \n"
|
||||
"sub $0x2,%ecx \n"
|
||||
"ja 1b \n"
|
||||
"popa \n"
|
||||
"ret \n"
|
||||
);
|
||||
|
||||
void FastConvertYUV444ToARGBRow_MMX(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
asm(
|
||||
".text \n"
|
||||
#if defined(OSX) || defined(IOS)
|
||||
".globl _FastConvertYUV444ToARGBRow_MMX \n"
|
||||
"_FastConvertYUV444ToARGBRow_MMX: \n"
|
||||
#else
|
||||
".global FastConvertYUV444ToARGBRow_MMX \n"
|
||||
"FastConvertYUV444ToARGBRow_MMX: \n"
|
||||
#endif
|
||||
"pusha \n"
|
||||
"mov 0x24(%esp),%edx \n"
|
||||
"mov 0x28(%esp),%edi \n"
|
||||
"mov 0x2c(%esp),%esi \n"
|
||||
"mov 0x30(%esp),%ebp \n"
|
||||
"mov 0x34(%esp),%ecx \n"
|
||||
|
||||
"1: \n"
|
||||
"movzbl (%edi),%eax \n"
|
||||
"lea 1(%edi),%edi \n"
|
||||
"movzbl (%esi),%ebx \n"
|
||||
"lea 1(%esi),%esi \n"
|
||||
"movq " UNDERSCORE "kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
|
||||
"movzbl (%edx),%eax \n"
|
||||
"paddsw " UNDERSCORE "kCoefficientsRgbY+4096(,%ebx,8),%mm0\n"
|
||||
"lea 1(%edx),%edx \n"
|
||||
"paddsw " UNDERSCORE "kCoefficientsRgbY(,%eax,8),%mm0\n"
|
||||
"psraw $0x6,%mm0 \n"
|
||||
"packuswb %mm0,%mm0 \n"
|
||||
"movd %mm0,0x0(%ebp) \n"
|
||||
"lea 4(%ebp),%ebp \n"
|
||||
"sub $0x1,%ecx \n"
|
||||
"ja 1b \n"
|
||||
"popa \n"
|
||||
"ret \n"
|
||||
);
|
||||
|
||||
void FastConvertYToARGBRow_MMX(const uint8* y_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
asm(
|
||||
".text \n"
|
||||
#if defined(OSX) || defined(IOS)
|
||||
".globl _FastConvertYToARGBRow_MMX \n"
|
||||
"_FastConvertYToARGBRow_MMX: \n"
|
||||
#else
|
||||
".global FastConvertYToARGBRow_MMX \n"
|
||||
"FastConvertYToARGBRow_MMX: \n"
|
||||
#endif
|
||||
"push %ebx \n"
|
||||
"mov 0x8(%esp),%eax \n"
|
||||
"mov 0xc(%esp),%edx \n"
|
||||
"mov 0x10(%esp),%ecx \n"
|
||||
|
||||
"1: \n"
|
||||
"movzbl (%eax),%ebx \n"
|
||||
"movq " UNDERSCORE "kCoefficientsRgbY(,%ebx,8),%mm0\n"
|
||||
"psraw $0x6,%mm0 \n"
|
||||
"movzbl 0x1(%eax),%ebx \n"
|
||||
"movq " UNDERSCORE "kCoefficientsRgbY(,%ebx,8),%mm1\n"
|
||||
"psraw $0x6,%mm1 \n"
|
||||
"packuswb %mm1,%mm0 \n"
|
||||
"lea 0x2(%eax),%eax \n"
|
||||
"movq %mm0,(%edx) \n"
|
||||
"lea 0x8(%edx),%edx \n"
|
||||
"sub $0x2,%ecx \n"
|
||||
"ja 1b \n"
|
||||
"pop %ebx \n"
|
||||
"ret \n"
|
||||
);
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef HAS_ARGBTOYROW_SSSE3
|
||||
|
||||
@ -15,71 +15,71 @@ extern "C" {
|
||||
#ifdef HAS_ARGBTOYROW_SSSE3
|
||||
|
||||
// Constant multiplication table for converting ARGB to I400.
|
||||
SIMD_ALIGNED(const int8 kARGBToY[16]) = {
|
||||
static const vec8 kARGBToY = {
|
||||
13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0
|
||||
};
|
||||
|
||||
SIMD_ALIGNED(const int8 kARGBToU[16]) = {
|
||||
static const vec8 kARGBToU = {
|
||||
112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0
|
||||
};
|
||||
|
||||
SIMD_ALIGNED(const int8 kARGBToV[16]) = {
|
||||
static const vec8 kARGBToV = {
|
||||
-18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0,
|
||||
};
|
||||
|
||||
// Constants for BGRA
|
||||
SIMD_ALIGNED(const int8 kBGRAToY[16]) = {
|
||||
static const vec8 kBGRAToY = {
|
||||
0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13
|
||||
};
|
||||
|
||||
SIMD_ALIGNED(const int8 kBGRAToU[16]) = {
|
||||
static const vec8 kBGRAToU = {
|
||||
0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112
|
||||
};
|
||||
|
||||
SIMD_ALIGNED(const int8 kBGRAToV[16]) = {
|
||||
static const vec8 kBGRAToV = {
|
||||
0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18
|
||||
};
|
||||
|
||||
// Constants for ABGR
|
||||
SIMD_ALIGNED(const int8 kABGRToY[16]) = {
|
||||
static const vec8 kABGRToY = {
|
||||
33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0
|
||||
};
|
||||
|
||||
SIMD_ALIGNED(const int8 kABGRToU[16]) = {
|
||||
static const vec8 kABGRToU = {
|
||||
-38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0
|
||||
};
|
||||
|
||||
SIMD_ALIGNED(const int8 kABGRToV[16]) = {
|
||||
static const vec8 kABGRToV = {
|
||||
112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0
|
||||
};
|
||||
|
||||
SIMD_ALIGNED(const uint8 kAddY16[16]) = {
|
||||
static const uvec8 kAddY16 = {
|
||||
16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u,
|
||||
16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u,
|
||||
};
|
||||
|
||||
SIMD_ALIGNED(const uint8 kAddUV128[16]) = {
|
||||
static const uvec8 kAddUV128 = {
|
||||
128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u,
|
||||
128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u
|
||||
};
|
||||
|
||||
// Shuffle table for converting BG24 to ARGB.
|
||||
SIMD_ALIGNED(const uint8 kShuffleMaskBG24ToARGB[16]) = {
|
||||
static const uvec8 kShuffleMaskBG24ToARGB = {
|
||||
0u, 1u, 2u, 12u, 3u, 4u, 5u, 13u, 6u, 7u, 8u, 14u, 9u, 10u, 11u, 15u
|
||||
};
|
||||
|
||||
// Shuffle table for converting RAW to ARGB.
|
||||
SIMD_ALIGNED(const uint8 kShuffleMaskRAWToARGB[16]) = {
|
||||
static const uvec8 kShuffleMaskRAWToARGB = {
|
||||
2u, 1u, 0u, 12u, 5u, 4u, 3u, 13u, 8u, 7u, 6u, 14u, 11u, 10u, 9u, 15u
|
||||
};
|
||||
|
||||
// Shuffle table for converting ABGR to ARGB.
|
||||
SIMD_ALIGNED(const uint8 kShuffleMaskABGRToARGB[16]) = {
|
||||
static const uvec8 kShuffleMaskABGRToARGB = {
|
||||
2u, 1u, 0u, 3u, 6u, 5u, 4u, 7u, 10u, 9u, 8u, 11u, 14u, 13u, 12u, 15u
|
||||
};
|
||||
|
||||
// Shuffle table for converting BGRA to ARGB.
|
||||
SIMD_ALIGNED(const uint8 kShuffleMaskBGRAToARGB[16]) = {
|
||||
static const uvec8 kShuffleMaskBGRAToARGB = {
|
||||
3u, 2u, 1u, 0u, 7u, 6u, 5u, 4u, 11u, 10u, 9u, 8u, 15u, 14u, 13u, 12u
|
||||
};
|
||||
|
||||
@ -519,182 +519,6 @@ __asm {
|
||||
}
|
||||
}
|
||||
|
||||
#define YUVTORGB_MMX(TABLE) __asm { \
|
||||
__asm convertloop : \
|
||||
__asm movzx eax, byte ptr [edi] \
|
||||
__asm lea edi, [edi + 1] \
|
||||
__asm movzx ebx, byte ptr [esi] \
|
||||
__asm lea esi, [esi + 1] \
|
||||
__asm movq mm0, [TABLE + 2048 + 8 * eax] \
|
||||
__asm movzx eax, byte ptr [edx] \
|
||||
__asm paddsw mm0, [TABLE + 4096 + 8 * ebx] \
|
||||
__asm movzx ebx, byte ptr [edx + 1] \
|
||||
__asm movq mm1, [TABLE + 8 * eax] \
|
||||
__asm lea edx, [edx + 2] \
|
||||
__asm movq mm2, [TABLE + 8 * ebx] \
|
||||
__asm paddsw mm1, mm0 \
|
||||
__asm paddsw mm2, mm0 \
|
||||
__asm psraw mm1, 6 \
|
||||
__asm psraw mm2, 6 \
|
||||
__asm packuswb mm1, mm2 \
|
||||
__asm movq [ebp], mm1 \
|
||||
__asm lea ebp, [ebp + 8] \
|
||||
__asm sub ecx, 2 \
|
||||
__asm ja convertloop \
|
||||
}
|
||||
|
||||
__declspec(naked)
|
||||
void FastConvertYUVToARGBRow_MMX(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) {
|
||||
__asm {
|
||||
push ebx
|
||||
push esi
|
||||
push edi
|
||||
push ebp
|
||||
mov edx, [esp + 16 + 4]
|
||||
mov edi, [esp + 16 + 8]
|
||||
mov esi, [esp + 16 + 12]
|
||||
mov ebp, [esp + 16 + 16]
|
||||
mov ecx, [esp + 16 + 20]
|
||||
|
||||
YUVTORGB_MMX(kCoefficientsRgbY)
|
||||
|
||||
pop ebp
|
||||
pop edi
|
||||
pop esi
|
||||
pop ebx
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
__declspec(naked)
|
||||
void FastConvertYUVToBGRARow_MMX(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) {
|
||||
__asm {
|
||||
push ebx
|
||||
push esi
|
||||
push edi
|
||||
push ebp
|
||||
mov edx, [esp + 16 + 4]
|
||||
mov edi, [esp + 16 + 8]
|
||||
mov esi, [esp + 16 + 12]
|
||||
mov ebp, [esp + 16 + 16]
|
||||
mov ecx, [esp + 16 + 20]
|
||||
|
||||
YUVTORGB_MMX(kCoefficientsBgraY)
|
||||
|
||||
pop ebp
|
||||
pop edi
|
||||
pop esi
|
||||
pop ebx
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
__declspec(naked)
|
||||
void FastConvertYUVToABGRRow_MMX(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) {
|
||||
__asm {
|
||||
push ebx
|
||||
push esi
|
||||
push edi
|
||||
push ebp
|
||||
mov edx, [esp + 16 + 4]
|
||||
mov edi, [esp + 16 + 8]
|
||||
mov esi, [esp + 16 + 12]
|
||||
mov ebp, [esp + 16 + 16]
|
||||
mov ecx, [esp + 16 + 20]
|
||||
|
||||
YUVTORGB_MMX(kCoefficientsAbgrY)
|
||||
|
||||
pop ebp
|
||||
pop edi
|
||||
pop esi
|
||||
pop ebx
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
__declspec(naked)
|
||||
void FastConvertYUV444ToARGBRow_MMX(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) {
|
||||
__asm {
|
||||
push ebx
|
||||
push esi
|
||||
push edi
|
||||
push ebp
|
||||
mov edx, [esp + 16 + 4]
|
||||
mov edi, [esp + 16 + 8]
|
||||
mov esi, [esp + 16 + 12]
|
||||
mov ebp, [esp + 16 + 16]
|
||||
mov ecx, [esp + 16 + 20]
|
||||
|
||||
convertloop :
|
||||
movzx eax, byte ptr [edi]
|
||||
lea edi, [edi + 1]
|
||||
movzx ebx, byte ptr [esi]
|
||||
lea esi, [esi + 1]
|
||||
movq mm0, [kCoefficientsRgbY + 2048 + 8 * eax]
|
||||
movzx eax, byte ptr [edx]
|
||||
paddsw mm0, [kCoefficientsRgbY + 4096 + 8 * ebx]
|
||||
lea edx, [edx + 1]
|
||||
paddsw mm0, [kCoefficientsRgbY + 8 * eax]
|
||||
psraw mm0, 6
|
||||
packuswb mm0, mm0
|
||||
movd [ebp], mm0
|
||||
lea ebp, [ebp + 4]
|
||||
sub ecx, 1
|
||||
ja convertloop
|
||||
|
||||
pop ebp
|
||||
pop edi
|
||||
pop esi
|
||||
pop ebx
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
__declspec(naked)
|
||||
void FastConvertYToARGBRow_MMX(const uint8* y_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) {
|
||||
__asm {
|
||||
push ebx
|
||||
mov eax, [esp + 4 + 4] // Y
|
||||
mov edx, [esp + 4 + 8] // rgb
|
||||
mov ecx, [esp + 4 + 12] // width
|
||||
|
||||
convertloop :
|
||||
movzx ebx, byte ptr [eax]
|
||||
movq mm0, [kCoefficientsRgbY + 8 * ebx]
|
||||
psraw mm0, 6
|
||||
movzx ebx, byte ptr [eax + 1]
|
||||
movq mm1, [kCoefficientsRgbY + 8 * ebx]
|
||||
psraw mm1, 6
|
||||
packuswb mm0, mm1
|
||||
lea eax, [eax + 2]
|
||||
movq [edx], mm0
|
||||
lea edx, [edx + 8]
|
||||
sub ecx, 2
|
||||
ja convertloop
|
||||
|
||||
pop ebx
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef HAS_FASTCONVERTYUVTOARGBROW_SSSE3
|
||||
|
||||
#define YG 74 /* static_cast<int8>(1.164 * 64 + 0.5) */
|
||||
@ -712,35 +536,35 @@ void FastConvertYToARGBRow_MMX(const uint8* y_buf,
|
||||
#define BG UG * 128 + VG * 128
|
||||
#define BR UR * 128 + VR * 128
|
||||
|
||||
SIMD_ALIGNED(const int8 kUVToB[16]) = {
|
||||
static const vec8 kUVToB = {
|
||||
UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB
|
||||
};
|
||||
|
||||
SIMD_ALIGNED(const int8 kUVToR[16]) = {
|
||||
static const vec8 kUVToR = {
|
||||
UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR
|
||||
};
|
||||
|
||||
SIMD_ALIGNED(const int8 kUVToG[16]) = {
|
||||
static const vec8 kUVToG = {
|
||||
UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG
|
||||
};
|
||||
|
||||
SIMD_ALIGNED(const int16 kYToRgb[8]) = {
|
||||
static const vec16 kYToRgb = {
|
||||
YG, YG, YG, YG, YG, YG, YG, YG
|
||||
};
|
||||
|
||||
SIMD_ALIGNED(const int16 kYSub16[8]) = {
|
||||
static const vec16 kYSub16 = {
|
||||
16, 16, 16, 16, 16, 16, 16, 16
|
||||
};
|
||||
|
||||
SIMD_ALIGNED(const int16 kUVBiasB[8]) = {
|
||||
static const vec16 kUVBiasB = {
|
||||
BB, BB, BB, BB, BB, BB, BB, BB
|
||||
};
|
||||
|
||||
SIMD_ALIGNED(const int16 kUVBiasG[8]) = {
|
||||
static const vec16 kUVBiasG = {
|
||||
BG, BG, BG, BG, BG, BG, BG, BG
|
||||
};
|
||||
|
||||
SIMD_ALIGNED(const int16 kUVBiasR[8]) = {
|
||||
static const vec16 kUVBiasR = {
|
||||
BR, BR, BR, BR, BR, BR, BR, BR
|
||||
};
|
||||
|
||||
|
||||
@ -14,6 +14,7 @@
|
||||
#include <string.h>
|
||||
|
||||
#include "libyuv/cpu_id.h"
|
||||
#include "row.h"
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define ALIGN16(var) __declspec(align(16)) var
|
||||
@ -21,6 +22,7 @@
|
||||
#define ALIGN16(var) var __attribute__((aligned(16)))
|
||||
#endif
|
||||
|
||||
|
||||
// Note: A Neon reference manual
|
||||
// http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0204j/CJAJIIGG.html
|
||||
// Note: Some SSE2 reference manuals
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user