scale ported to OSX for 32/64 bit. Required breaking up some functions into 2 or 3 asm inlines to set constants. Reworked storing of 6 bytes on all platforms for 3/4 scale by storing first 4 bytes and then storing last 4 bytes, overlapping 2, in order to stay within SSE2 registers.

BUG=none
TEST=none
Review URL: https://webrtc-codereview.appspot.com/582004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@266 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
fbarchard@google.com 2012-05-14 20:23:35 +00:00
parent f368565b95
commit f2d84dddd2
5 changed files with 392 additions and 666 deletions

View File

@ -1,6 +1,6 @@
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 264
Version: 265
License: BSD
License File: LICENSE

View File

@ -11,7 +11,7 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 264
#define LIBYUV_VERSION 265
#endif // INCLUDE_LIBYUV_VERSION_H_

View File

@ -99,12 +99,14 @@ extern "C" {
typedef __declspec(align(16)) int8 vec8[16];
typedef __declspec(align(16)) uint8 uvec8[16];
typedef __declspec(align(16)) int16 vec16[8];
typedef __declspec(align(16)) uint16 uvec16[8];
typedef __declspec(align(16)) uint32 uvec32[4];
#else // __GNUC__
#define SIMD_ALIGNED(var) var __attribute__((aligned(16)))
typedef int8 __attribute__((vector_size(16))) vec8;
typedef uint8 __attribute__((vector_size(16))) uvec8;
typedef int16 __attribute__((vector_size(16))) vec16;
typedef uint16 __attribute__((vector_size(16))) uvec16;
typedef uint32 __attribute__((vector_size(16))) uvec32;
#endif

View File

@ -697,10 +697,6 @@ void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
: "m"(kARGBToU), // %0
"m"(kARGBToV), // %1
"m"(kAddUV128) // %2
:
#if defined(__SSE2__)
"xmm3", "xmm4", "xmm5"
#endif
);
asm volatile (
"sub %1,%2 \n"
@ -762,10 +758,6 @@ void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
: "m"(kARGBToU), // %0
"m"(kARGBToV), // %1
"m"(kAddUV128) // %2
:
#if defined(__SSE2__)
"xmm3", "xmm4", "xmm5"
#endif
);
asm volatile (
"sub %1,%2 \n"
@ -905,10 +897,6 @@ void BGRAToUVRow_SSSE3(const uint8* src_bgra0, int src_stride_bgra,
: "m"(kBGRAToU), // %0
"m"(kBGRAToV), // %1
"m"(kAddUV128) // %2
:
#if defined(__SSE2__)
"xmm3", "xmm4", "xmm5"
#endif
);
asm volatile (
"sub %1,%2 \n"
@ -970,10 +958,6 @@ void BGRAToUVRow_Unaligned_SSSE3(const uint8* src_bgra0, int src_stride_bgra,
: "m"(kBGRAToU), // %0
"m"(kBGRAToV), // %1
"m"(kAddUV128) // %2
:
#if defined(__SSE2__)
"xmm3", "xmm4", "xmm5"
#endif
);
asm volatile (
"sub %1,%2 \n"
@ -1113,10 +1097,6 @@ void ABGRToUVRow_SSSE3(const uint8* src_abgr0, int src_stride_abgr,
: "m"(kABGRToU), // %0
"m"(kABGRToV), // %1
"m"(kAddUV128) // %2
:
#if defined(__SSE2__)
"xmm3", "xmm4", "xmm5"
#endif
);
asm volatile (
"sub %1,%2 \n"
@ -1178,10 +1158,6 @@ void ABGRToUVRow_Unaligned_SSSE3(const uint8* src_abgr0, int src_stride_abgr,
: "m"(kABGRToU), // %0
"m"(kABGRToV), // %1
"m"(kAddUV128) // %2
:
#if defined(__SSE2__)
"xmm3", "xmm4", "xmm5"
#endif
);
asm volatile (
"sub %1,%2 \n"

File diff suppressed because it is too large Load Diff