mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
With an asm() away lint, this cleans up most remaining issues.
BUG=none TEST=lint filename Review URL: https://webrtc-codereview.appspot.com/464001 git-svn-id: http://libyuv.googlecode.com/svn/trunk@230 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
427f966645
commit
5b22506b14
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: http://code.google.com/p/libyuv/
|
URL: http://code.google.com/p/libyuv/
|
||||||
Version: 229
|
Version: 230
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|
||||||
|
|||||||
@ -31,8 +31,8 @@ typedef __int64 int64;
|
|||||||
#define INT64_F "I64"
|
#define INT64_F "I64"
|
||||||
#else // COMPILER_MSVC
|
#else // COMPILER_MSVC
|
||||||
#ifdef __LP64__
|
#ifdef __LP64__
|
||||||
typedef unsigned long uint64;
|
typedef unsigned long uint64; // NOLINT
|
||||||
typedef long int64;
|
typedef long int64; // NOLINT
|
||||||
#ifndef INT64_C
|
#ifndef INT64_C
|
||||||
#define INT64_C(x) x ## L
|
#define INT64_C(x) x ## L
|
||||||
#endif
|
#endif
|
||||||
@ -41,8 +41,8 @@ typedef long int64;
|
|||||||
#endif
|
#endif
|
||||||
#define INT64_F "l"
|
#define INT64_F "l"
|
||||||
#else // __LP64__
|
#else // __LP64__
|
||||||
typedef unsigned long long uint64;
|
typedef unsigned long long uint64; // NOLINT
|
||||||
typedef long long int64;
|
typedef long long int64; // NOLINT
|
||||||
#ifndef INT64_C
|
#ifndef INT64_C
|
||||||
#define INT64_C(x) x ## LL
|
#define INT64_C(x) x ## LL
|
||||||
#endif
|
#endif
|
||||||
@ -54,8 +54,8 @@ typedef long long int64;
|
|||||||
#endif // COMPILER_MSVC
|
#endif // COMPILER_MSVC
|
||||||
typedef unsigned int uint32;
|
typedef unsigned int uint32;
|
||||||
typedef int int32;
|
typedef int int32;
|
||||||
typedef unsigned short uint16;
|
typedef unsigned short uint16; // NOLINT
|
||||||
typedef short int16;
|
typedef short int16; // NOLINT
|
||||||
typedef unsigned char uint8;
|
typedef unsigned char uint8;
|
||||||
typedef signed char int8;
|
typedef signed char int8;
|
||||||
#endif // INT_TYPES_DEFINED
|
#endif // INT_TYPES_DEFINED
|
||||||
@ -70,4 +70,4 @@ typedef signed char int8;
|
|||||||
(reinterpret_cast<uint8*>(((reinterpret_cast<uintptr_t>(p) + \
|
(reinterpret_cast<uint8*>(((reinterpret_cast<uintptr_t>(p) + \
|
||||||
((t)-1)) & ~((t)-1))))
|
((t)-1)) & ~((t)-1))))
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_BASIC_TYPES_H_
|
#endif // INCLUDE_LIBYUV_BASIC_TYPES_H_
|
||||||
|
|||||||
@ -62,4 +62,4 @@ double I420Ssim(const uint8* src_y_a, int stride_y_a,
|
|||||||
} // namespace libyuv
|
} // namespace libyuv
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_COMPARE_H_
|
#endif // INCLUDE_LIBYUV_COMPARE_H_
|
||||||
|
|||||||
@ -215,4 +215,4 @@ int ConvertToI420(const uint8* src_frame, size_t src_size,
|
|||||||
} // namespace libyuv
|
} // namespace libyuv
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_CONVERT_H_
|
#endif // INCLUDE_LIBYUV_CONVERT_H_
|
||||||
|
|||||||
@ -139,4 +139,4 @@ int ConvertFromI420(const uint8* y, int y_stride,
|
|||||||
} // namespace libyuv
|
} // namespace libyuv
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_CONVERT_FROM_H_
|
#endif // INCLUDE_LIBYUV_CONVERT_FROM_H_
|
||||||
|
|||||||
@ -44,8 +44,8 @@ int BayerRGGBToI420(const uint8* src_bayer, int src_stride_bayer,
|
|||||||
int width, int height);
|
int width, int height);
|
||||||
|
|
||||||
// Temporary API mapper
|
// Temporary API mapper
|
||||||
#define BayerRGBToI420(b,bs,f,y,ys,u,us,v,vs,w,h) \
|
#define BayerRGBToI420(b, bs, f, y, ys, u, us, v, vs, w, h) \
|
||||||
BayerToI420(b,bs,y,ys,u,us,v,vs,w,h,f)
|
BayerToI420(b, bs, y, ys, u, us, v, vs, w, h, f)
|
||||||
|
|
||||||
int BayerToI420(const uint8* src_bayer, int src_stride_bayer,
|
int BayerToI420(const uint8* src_bayer, int src_stride_bayer,
|
||||||
uint8* dst_y, int dst_stride_y,
|
uint8* dst_y, int dst_stride_y,
|
||||||
@ -80,8 +80,8 @@ int I420ToBayerRGGB(const uint8* src_y, int src_stride_y,
|
|||||||
int width, int height);
|
int width, int height);
|
||||||
|
|
||||||
// Temporary API mapper
|
// Temporary API mapper
|
||||||
#define I420ToBayerRGB(y,ys,u,us,v,vs,b,bs,f,w,h) \
|
#define I420ToBayerRGB(y, ys, u, us, v, vs, b, bs, f, w, h) \
|
||||||
I420ToBayer(y,ys,u,us,v,vs,b,bs,w,h,f)
|
I420ToBayer(y, ys, u, us, v, vs, b, bs, w, h, f)
|
||||||
|
|
||||||
int I420ToBayer(const uint8* src_y, int src_stride_y,
|
int I420ToBayer(const uint8* src_y, int src_stride_y,
|
||||||
const uint8* src_u, int src_stride_u,
|
const uint8* src_u, int src_stride_u,
|
||||||
@ -108,7 +108,7 @@ int BayerRGGBToARGB(const uint8* src_bayer, int src_stride_bayer,
|
|||||||
int width, int height);
|
int width, int height);
|
||||||
|
|
||||||
// Temporary API mapper
|
// Temporary API mapper
|
||||||
#define BayerRGBToARGB(b,bs,f,a,as,w,h) BayerToARGB(b,bs,a,as,w,h,f)
|
#define BayerRGBToARGB(b, bs, f, a, as, w, h) BayerToARGB(b, bs, a, as, w, h, f)
|
||||||
|
|
||||||
int BayerToARGB(const uint8* src_bayer, int src_stride_bayer,
|
int BayerToARGB(const uint8* src_bayer, int src_stride_bayer,
|
||||||
uint8* dst_argb, int dst_stride_argb,
|
uint8* dst_argb, int dst_stride_argb,
|
||||||
@ -133,7 +133,7 @@ int ARGBToBayerRGGB(const uint8* src_argb, int src_stride_argb,
|
|||||||
int width, int height);
|
int width, int height);
|
||||||
|
|
||||||
// Temporary API mapper
|
// Temporary API mapper
|
||||||
#define ARGBToBayerRGB(a,as,b,bs,f,w,h) ARGBToBayer(b,bs,a,as,w,h,f)
|
#define ARGBToBayerRGB(a, as, b, bs, f, w, h) ARGBToBayer(b, bs, a, as, w, h, f)
|
||||||
|
|
||||||
int ARGBToBayer(const uint8* src_argb, int src_stride_argb,
|
int ARGBToBayer(const uint8* src_argb, int src_stride_argb,
|
||||||
uint8* dst_bayer, int dst_stride_bayer,
|
uint8* dst_bayer, int dst_stride_bayer,
|
||||||
|
|||||||
@ -11,7 +11,7 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define INCLUDE_LIBYUV_VERSION 229
|
#define INCLUDE_LIBYUV_VERSION 230
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
|
|||||||
@ -42,7 +42,7 @@ uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) {
|
|||||||
static uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b,
|
static uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b,
|
||||||
int count) {
|
int count) {
|
||||||
volatile uint32 sse;
|
volatile uint32 sse;
|
||||||
asm volatile(
|
asm volatile (
|
||||||
"vmov.u8 q7, #0 \n"
|
"vmov.u8 q7, #0 \n"
|
||||||
"vmov.u8 q9, #0 \n"
|
"vmov.u8 q9, #0 \n"
|
||||||
"vmov.u8 q8, #0 \n"
|
"vmov.u8 q8, #0 \n"
|
||||||
@ -116,12 +116,12 @@ static uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif (defined(__x86_64__) || defined(__i386__)) && !defined(YUV_DISABLE_ASM)
|
#elif defined(__x86_64__) || defined(__i386__) && !defined(YUV_DISABLE_ASM)
|
||||||
#define HAS_SUMSQUAREERROR_SSE2
|
#define HAS_SUMSQUAREERROR_SSE2
|
||||||
static uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b,
|
static uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b,
|
||||||
int count) {
|
int count) {
|
||||||
uint32 sse;
|
uint32 sse;
|
||||||
asm volatile(
|
asm volatile (
|
||||||
"pxor %%xmm0,%%xmm0 \n"
|
"pxor %%xmm0,%%xmm0 \n"
|
||||||
"pxor %%xmm5,%%xmm5 \n"
|
"pxor %%xmm5,%%xmm5 \n"
|
||||||
"sub %0,%1 \n"
|
"sub %0,%1 \n"
|
||||||
|
|||||||
@ -90,7 +90,7 @@ static void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
|
|||||||
#define HAS_HALFROW_SSE2
|
#define HAS_HALFROW_SSE2
|
||||||
static void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
|
static void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
|
||||||
uint8* dst_uv, int pix) {
|
uint8* dst_uv, int pix) {
|
||||||
asm volatile(
|
asm volatile (
|
||||||
"sub %0,%1 \n"
|
"sub %0,%1 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movdqa (%0),%%xmm0 \n"
|
"movdqa (%0),%%xmm0 \n"
|
||||||
@ -502,7 +502,7 @@ static void SplitYUY2_SSE2(const uint8* src_yuy2,
|
|||||||
#define HAS_SPLITYUY2_SSE2
|
#define HAS_SPLITYUY2_SSE2
|
||||||
static void SplitYUY2_SSE2(const uint8* src_yuy2, uint8* dst_y,
|
static void SplitYUY2_SSE2(const uint8* src_yuy2, uint8* dst_y,
|
||||||
uint8* dst_u, uint8* dst_v, int pix) {
|
uint8* dst_u, uint8* dst_v, int pix) {
|
||||||
asm volatile(
|
asm volatile (
|
||||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||||
"psrlw $0x8,%%xmm5 \n"
|
"psrlw $0x8,%%xmm5 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
|
|||||||
@ -289,7 +289,7 @@ static void I42xToYUY2Row_SSE2(const uint8* src_y,
|
|||||||
const uint8* src_u,
|
const uint8* src_u,
|
||||||
const uint8* src_v,
|
const uint8* src_v,
|
||||||
uint8* dst_frame, int width) {
|
uint8* dst_frame, int width) {
|
||||||
asm volatile(
|
asm volatile (
|
||||||
"sub %1,%2 \n"
|
"sub %1,%2 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movq (%1),%%xmm2 \n"
|
"movq (%1),%%xmm2 \n"
|
||||||
@ -324,7 +324,7 @@ static void I42xToUYVYRow_SSE2(const uint8* src_y,
|
|||||||
const uint8* src_u,
|
const uint8* src_u,
|
||||||
const uint8* src_v,
|
const uint8* src_v,
|
||||||
uint8* dst_frame, int width) {
|
uint8* dst_frame, int width) {
|
||||||
asm volatile(
|
asm volatile (
|
||||||
"sub %1,%2 \n"
|
"sub %1,%2 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movq (%1),%%xmm2 \n"
|
"movq (%1),%%xmm2 \n"
|
||||||
|
|||||||
@ -24,7 +24,7 @@
|
|||||||
// TODO(fbarchard): Use cpuid.h when gcc 4.4 is used on OSX and Linux.
|
// TODO(fbarchard): Use cpuid.h when gcc 4.4 is used on OSX and Linux.
|
||||||
#if (defined(__pic__) || defined(__APPLE__)) && defined(__i386__)
|
#if (defined(__pic__) || defined(__APPLE__)) && defined(__i386__)
|
||||||
static __inline void __cpuid(int cpu_info[4], int info_type) {
|
static __inline void __cpuid(int cpu_info[4], int info_type) {
|
||||||
asm volatile(
|
asm volatile (
|
||||||
"mov %%ebx, %%edi \n"
|
"mov %%ebx, %%edi \n"
|
||||||
"cpuid \n"
|
"cpuid \n"
|
||||||
"xchg %%edi, %%ebx \n"
|
"xchg %%edi, %%ebx \n"
|
||||||
@ -33,7 +33,7 @@ static __inline void __cpuid(int cpu_info[4], int info_type) {
|
|||||||
}
|
}
|
||||||
#elif defined(__i386__) || defined(__x86_64__)
|
#elif defined(__i386__) || defined(__x86_64__)
|
||||||
static __inline void __cpuid(int cpu_info[4], int info_type) {
|
static __inline void __cpuid(int cpu_info[4], int info_type) {
|
||||||
asm volatile(
|
asm volatile (
|
||||||
"cpuid \n"
|
"cpuid \n"
|
||||||
: "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
|
: "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
|
||||||
: "a"(info_type));
|
: "a"(info_type));
|
||||||
|
|||||||
@ -53,7 +53,7 @@ static void ARGBToBayerRow_SSSE3(const uint8* src_argb,
|
|||||||
#define HAS_ARGBTOBAYERROW_SSSE3
|
#define HAS_ARGBTOBAYERROW_SSSE3
|
||||||
static void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer,
|
static void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer,
|
||||||
uint32 selector, int pix) {
|
uint32 selector, int pix) {
|
||||||
asm volatile(
|
asm volatile (
|
||||||
"movd %3,%%xmm5 \n"
|
"movd %3,%%xmm5 \n"
|
||||||
"pshufd $0x0,%%xmm5,%%xmm5 \n"
|
"pshufd $0x0,%%xmm5,%%xmm5 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
|
|||||||
@ -654,7 +654,7 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y,
|
|||||||
#if defined(__ARM_NEON__) && !defined(YUV_DISABLE_ASM)
|
#if defined(__ARM_NEON__) && !defined(YUV_DISABLE_ASM)
|
||||||
#define HAS_SETROW_NEON
|
#define HAS_SETROW_NEON
|
||||||
static void SetRow8_NEON(uint8* dst, uint32 v32, int count) {
|
static void SetRow8_NEON(uint8* dst, uint32 v32, int count) {
|
||||||
asm volatile(
|
asm volatile (
|
||||||
"vdup.u32 q0, %2 \n" // duplicate 4 ints
|
"vdup.u32 q0, %2 \n" // duplicate 4 ints
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"subs %1, %1, #16 \n" // 16 bytes per loop
|
"subs %1, %1, #16 \n" // 16 bytes per loop
|
||||||
@ -723,7 +723,7 @@ static void SetRows32_X86(uint8* dst, uint32 v32, int width,
|
|||||||
#define HAS_SETROW_X86
|
#define HAS_SETROW_X86
|
||||||
static void SetRow8_X86(uint8* dst, uint32 v32, int width) {
|
static void SetRow8_X86(uint8* dst, uint32 v32, int width) {
|
||||||
size_t width_tmp = static_cast<size_t>(width);
|
size_t width_tmp = static_cast<size_t>(width);
|
||||||
asm volatile(
|
asm volatile (
|
||||||
"shr $0x2,%1 \n"
|
"shr $0x2,%1 \n"
|
||||||
"rep stosl \n"
|
"rep stosl \n"
|
||||||
: "+D"(dst), // %0
|
: "+D"(dst), // %0
|
||||||
@ -737,7 +737,7 @@ static void SetRows32_X86(uint8* dst, uint32 v32, int width,
|
|||||||
for (int y = 0; y < height; ++y) {
|
for (int y = 0; y < height; ++y) {
|
||||||
size_t width_tmp = static_cast<size_t>(width);
|
size_t width_tmp = static_cast<size_t>(width);
|
||||||
uint32* d = reinterpret_cast<uint32*>(dst);
|
uint32* d = reinterpret_cast<uint32*>(dst);
|
||||||
asm volatile(
|
asm volatile (
|
||||||
"rep stosl \n"
|
"rep stosl \n"
|
||||||
: "+D"(d), // %0
|
: "+D"(d), // %0
|
||||||
"+c"(width_tmp) // %1
|
"+c"(width_tmp) // %1
|
||||||
|
|||||||
@ -295,7 +295,7 @@ static void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
|
|||||||
#define HAS_TRANSPOSE_WX8_SSSE3
|
#define HAS_TRANSPOSE_WX8_SSSE3
|
||||||
static void TransposeWx8_SSSE3(const uint8* src, int src_stride,
|
static void TransposeWx8_SSSE3(const uint8* src, int src_stride,
|
||||||
uint8* dst, int dst_stride, int width) {
|
uint8* dst, int dst_stride, int width) {
|
||||||
asm volatile(
|
asm volatile (
|
||||||
// Read in the data from the source pointer.
|
// Read in the data from the source pointer.
|
||||||
// First round of bit swap.
|
// First round of bit swap.
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -506,7 +506,7 @@ extern "C" void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
|
|||||||
#define HAS_TRANSPOSE_WX8_FAST_SSSE3
|
#define HAS_TRANSPOSE_WX8_FAST_SSSE3
|
||||||
static void TransposeWx8_FAST_SSSE3(const uint8* src, int src_stride,
|
static void TransposeWx8_FAST_SSSE3(const uint8* src, int src_stride,
|
||||||
uint8* dst, int dst_stride, int width) {
|
uint8* dst, int dst_stride, int width) {
|
||||||
asm volatile(
|
asm volatile (
|
||||||
// Read in the data from the source pointer.
|
// Read in the data from the source pointer.
|
||||||
// First round of bit swap.
|
// First round of bit swap.
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -646,7 +646,7 @@ static void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
|
|||||||
uint8* dst_a, int dst_stride_a,
|
uint8* dst_a, int dst_stride_a,
|
||||||
uint8* dst_b, int dst_stride_b,
|
uint8* dst_b, int dst_stride_b,
|
||||||
int w) {
|
int w) {
|
||||||
asm volatile(
|
asm volatile (
|
||||||
// Read in the data from the source pointer.
|
// Read in the data from the source pointer.
|
||||||
// First round of bit swap.
|
// First round of bit swap.
|
||||||
"1: \n"
|
"1: \n"
|
||||||
|
|||||||
@ -25,7 +25,7 @@ static const uvec8 vtbl_4x4_transpose =
|
|||||||
void TransposeWx8_NEON(const uint8* src, int src_stride,
|
void TransposeWx8_NEON(const uint8* src, int src_stride,
|
||||||
uint8* dst, int dst_stride,
|
uint8* dst, int dst_stride,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile(
|
asm volatile (
|
||||||
// loops are on blocks of 8. loop will stop when
|
// loops are on blocks of 8. loop will stop when
|
||||||
// counter gets to or below 0. starting the counter
|
// counter gets to or below 0. starting the counter
|
||||||
// at w-8 allow for this
|
// at w-8 allow for this
|
||||||
@ -191,7 +191,7 @@ void TransposeUVWx8_NEON(const uint8* src, int src_stride,
|
|||||||
uint8* dst_a, int dst_stride_a,
|
uint8* dst_a, int dst_stride_a,
|
||||||
uint8* dst_b, int dst_stride_b,
|
uint8* dst_b, int dst_stride_b,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile(
|
asm volatile (
|
||||||
// loops are on blocks of 8. loop will stop when
|
// loops are on blocks of 8. loop will stop when
|
||||||
// counter gets to or below 0. starting the counter
|
// counter gets to or below 0. starting the counter
|
||||||
// at w-8 allow for this
|
// at w-8 allow for this
|
||||||
|
|||||||
@ -87,7 +87,7 @@ extern "C" {
|
|||||||
typedef __declspec(align(16)) int8 vec8[16];
|
typedef __declspec(align(16)) int8 vec8[16];
|
||||||
typedef __declspec(align(16)) uint8 uvec8[16];
|
typedef __declspec(align(16)) uint8 uvec8[16];
|
||||||
typedef __declspec(align(16)) int16 vec16[8];
|
typedef __declspec(align(16)) int16 vec16[8];
|
||||||
#else // __GNUC__
|
#else // __GNUC__
|
||||||
#define SIMD_ALIGNED(var) var __attribute__((aligned(16)))
|
#define SIMD_ALIGNED(var) var __attribute__((aligned(16)))
|
||||||
typedef int8 __attribute__((vector_size(16))) vec8;
|
typedef int8 __attribute__((vector_size(16))) vec8;
|
||||||
typedef uint8 __attribute__((vector_size(16))) uvec8;
|
typedef uint8 __attribute__((vector_size(16))) uvec8;
|
||||||
|
|||||||
@ -10,9 +10,10 @@
|
|||||||
|
|
||||||
#include "source/row.h"
|
#include "source/row.h"
|
||||||
|
|
||||||
#include "libyuv/basic_types.h"
|
|
||||||
#include <string.h> // For memcpy
|
#include <string.h> // For memcpy
|
||||||
|
|
||||||
|
#include "libyuv/basic_types.h"
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
namespace libyuv {
|
namespace libyuv {
|
||||||
extern "C" {
|
extern "C" {
|
||||||
@ -195,7 +196,7 @@ static __inline int RGBToV(uint8 r, uint8 g, uint8 b) {
|
|||||||
return ((112 * r - 94 * g - 18 * b + 128) >> 8) + 128;
|
return ((112 * r - 94 * g - 18 * b + 128) >> 8) + 128;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define MAKEROWY(NAME,R,G,B) \
|
#define MAKEROWY(NAME, R, G, B) \
|
||||||
void NAME ## ToYRow_C(const uint8* src_argb0, uint8* dst_y, int width) { \
|
void NAME ## ToYRow_C(const uint8* src_argb0, uint8* dst_y, int width) { \
|
||||||
for (int x = 0; x < width; ++x) { \
|
for (int x = 0; x < width; ++x) { \
|
||||||
dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]); \
|
dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]); \
|
||||||
@ -229,9 +230,9 @@ void NAME ## ToUVRow_C(const uint8* src_rgb0, int src_stride_rgb, \
|
|||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
|
|
||||||
MAKEROWY(ARGB,2,1,0)
|
MAKEROWY(ARGB, 2, 1, 0)
|
||||||
MAKEROWY(BGRA,1,2,3)
|
MAKEROWY(BGRA, 1, 2, 3)
|
||||||
MAKEROWY(ABGR,0,1,2)
|
MAKEROWY(ABGR, 0, 1, 2)
|
||||||
|
|
||||||
void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width) {
|
void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width) {
|
||||||
// Copy a Y to RGB.
|
// Copy a Y to RGB.
|
||||||
@ -263,11 +264,11 @@ void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width) {
|
|||||||
|
|
||||||
static __inline uint32 Clip(int32 val) {
|
static __inline uint32 Clip(int32 val) {
|
||||||
if (val < 0) {
|
if (val < 0) {
|
||||||
return (uint32) 0;
|
return static_cast<uint32>(0);
|
||||||
} else if (val > 255){
|
} else if (val > 255) {
|
||||||
return (uint32) 255;
|
return static_cast<uint32>(255);
|
||||||
}
|
}
|
||||||
return (uint32) val;
|
return static_cast<uint32>(val);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline void YuvPixel(uint8 y, uint8 u, uint8 v, uint8* rgb_buf,
|
static __inline void YuvPixel(uint8 y, uint8 u, uint8 v, uint8* rgb_buf,
|
||||||
@ -469,7 +470,8 @@ void ARGBBlendRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
|
|||||||
dst_argb[2] = BLENDER(fr, br, a);
|
dst_argb[2] = BLENDER(fr, br, a);
|
||||||
dst_argb[3] = 255u;
|
dst_argb[3] = 255u;
|
||||||
} else {
|
} else {
|
||||||
*(uint32*)dst_argb = *(uint32*)src_argb;
|
*reinterpret_cast<uint32*>(dst_argb) =
|
||||||
|
*reinterpret_cast<uint32*>(src_argb);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
a = src_argb[4 + 3];
|
a = src_argb[4 + 3];
|
||||||
@ -486,7 +488,8 @@ void ARGBBlendRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
|
|||||||
dst_argb[4 + 2] = BLENDER(fr, br, a);
|
dst_argb[4 + 2] = BLENDER(fr, br, a);
|
||||||
dst_argb[4 + 3] = 255u;
|
dst_argb[4 + 3] = 255u;
|
||||||
} else {
|
} else {
|
||||||
*(uint32*)(dst_argb + 4) = *(uint32*)(src_argb + 4);
|
*reinterpret_cast<uint32*>(dst_argb + 4) =
|
||||||
|
*reinterpret_cast<uint32*>(src_argb + 4);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
src_argb += 8;
|
src_argb += 8;
|
||||||
@ -508,7 +511,8 @@ void ARGBBlendRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
|
|||||||
dst_argb[2] = BLENDER(fr, br, a);
|
dst_argb[2] = BLENDER(fr, br, a);
|
||||||
dst_argb[3] = 255u;
|
dst_argb[3] = 255u;
|
||||||
} else {
|
} else {
|
||||||
*(uint32*)dst_argb = *(uint32*)src_argb;
|
*reinterpret_cast<uint32*>(dst_argb) =
|
||||||
|
*reinterpret_cast<uint32*>(src_argb);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -61,7 +61,7 @@ void I420ToARGBRow_NEON(const uint8* y_buf,
|
|||||||
const uint8* v_buf,
|
const uint8* v_buf,
|
||||||
uint8* rgb_buf,
|
uint8* rgb_buf,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile(
|
asm volatile (
|
||||||
"vld1.u8 {d24}, [%5] \n"
|
"vld1.u8 {d24}, [%5] \n"
|
||||||
"vld1.u8 {d25}, [%6] \n"
|
"vld1.u8 {d25}, [%6] \n"
|
||||||
"vmov.u8 d26, #128 \n"
|
"vmov.u8 d26, #128 \n"
|
||||||
@ -93,7 +93,7 @@ void I420ToBGRARow_NEON(const uint8* y_buf,
|
|||||||
const uint8* v_buf,
|
const uint8* v_buf,
|
||||||
uint8* rgb_buf,
|
uint8* rgb_buf,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile(
|
asm volatile (
|
||||||
"vld1.u8 {d24}, [%5] \n"
|
"vld1.u8 {d24}, [%5] \n"
|
||||||
"vld1.u8 {d25}, [%6] \n"
|
"vld1.u8 {d25}, [%6] \n"
|
||||||
"vmov.u8 d26, #128 \n"
|
"vmov.u8 d26, #128 \n"
|
||||||
@ -126,7 +126,7 @@ void I420ToABGRRow_NEON(const uint8* y_buf,
|
|||||||
const uint8* v_buf,
|
const uint8* v_buf,
|
||||||
uint8* rgb_buf,
|
uint8* rgb_buf,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile(
|
asm volatile (
|
||||||
"vld1.u8 {d24}, [%5] \n"
|
"vld1.u8 {d24}, [%5] \n"
|
||||||
"vld1.u8 {d25}, [%6] \n"
|
"vld1.u8 {d25}, [%6] \n"
|
||||||
"vmov.u8 d26, #128 \n"
|
"vmov.u8 d26, #128 \n"
|
||||||
@ -157,7 +157,7 @@ YUVTORGB
|
|||||||
// Reads 16 pairs of UV and write even values to dst_u and odd to dst_v
|
// Reads 16 pairs of UV and write even values to dst_u and odd to dst_v
|
||||||
// Alignment requirement: 16 bytes for pointers, and multiple of 16 pixels.
|
// Alignment requirement: 16 bytes for pointers, and multiple of 16 pixels.
|
||||||
void SplitUV_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
|
void SplitUV_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
|
||||||
asm volatile(
|
asm volatile (
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"vld2.u8 {q0,q1}, [%0]! \n" // load 16 pairs of UV
|
"vld2.u8 {q0,q1}, [%0]! \n" // load 16 pairs of UV
|
||||||
"subs %3, %3, #16 \n" // 16 processed per loop
|
"subs %3, %3, #16 \n" // 16 processed per loop
|
||||||
@ -177,7 +177,7 @@ void SplitUV_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
|
|||||||
#ifdef HAS_COPYROW_NEON
|
#ifdef HAS_COPYROW_NEON
|
||||||
// Copy multiple of 64
|
// Copy multiple of 64
|
||||||
void CopyRow_NEON(const uint8* src, uint8* dst, int count) {
|
void CopyRow_NEON(const uint8* src, uint8* dst, int count) {
|
||||||
asm volatile(
|
asm volatile (
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"pld [%0, #0xC0] \n" // preload
|
"pld [%0, #0xC0] \n" // preload
|
||||||
"vldm %0!,{q0,q1,q2,q3} \n" // load 64
|
"vldm %0!,{q0,q1,q2,q3} \n" // load 64
|
||||||
@ -195,7 +195,7 @@ void CopyRow_NEON(const uint8* src, uint8* dst, int count) {
|
|||||||
|
|
||||||
#ifdef HAS_MIRRORROW_NEON
|
#ifdef HAS_MIRRORROW_NEON
|
||||||
void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
|
void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
|
||||||
asm volatile(
|
asm volatile (
|
||||||
// compute where to start writing destination
|
// compute where to start writing destination
|
||||||
"add %1, %2 \n"
|
"add %1, %2 \n"
|
||||||
// work on segments that are multiples of 16
|
// work on segments that are multiples of 16
|
||||||
@ -270,7 +270,7 @@ void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
|
|||||||
|
|
||||||
#ifdef HAS_MIRRORROWUV_NEON
|
#ifdef HAS_MIRRORROWUV_NEON
|
||||||
void MirrorRowUV_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width) {
|
void MirrorRowUV_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width) {
|
||||||
asm volatile(
|
asm volatile (
|
||||||
// compute where to start writing destination
|
// compute where to start writing destination
|
||||||
"add %1, %3 \n" // dst_a + width
|
"add %1, %3 \n" // dst_a + width
|
||||||
"add %2, %3 \n" // dst_b + width
|
"add %2, %3 \n" // dst_b + width
|
||||||
|
|||||||
@ -59,7 +59,7 @@ void SetUseReferenceImpl(bool use) {
|
|||||||
#define HAS_SCALEROWDOWN2_NEON
|
#define HAS_SCALEROWDOWN2_NEON
|
||||||
void ScaleRowDown2_NEON(const uint8* src_ptr, int /* src_stride */,
|
void ScaleRowDown2_NEON(const uint8* src_ptr, int /* src_stride */,
|
||||||
uint8* dst, int dst_width) {
|
uint8* dst, int dst_width) {
|
||||||
asm volatile(
|
asm volatile (
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"vld2.u8 {q0,q1}, [%0]! \n" // load even pixels into q0, odd into q1
|
"vld2.u8 {q0,q1}, [%0]! \n" // load even pixels into q0, odd into q1
|
||||||
"vst1.u8 {q0}, [%1]! \n" // store even pixels
|
"vst1.u8 {q0}, [%1]! \n" // store even pixels
|
||||||
@ -75,7 +75,7 @@ void ScaleRowDown2_NEON(const uint8* src_ptr, int /* src_stride */,
|
|||||||
|
|
||||||
void ScaleRowDown2Int_NEON(const uint8* src_ptr, int src_stride,
|
void ScaleRowDown2Int_NEON(const uint8* src_ptr, int src_stride,
|
||||||
uint8* dst, int dst_width) {
|
uint8* dst, int dst_width) {
|
||||||
asm volatile(
|
asm volatile (
|
||||||
"add %1, %0 \n" // change the stride to row 2 pointer
|
"add %1, %0 \n" // change the stride to row 2 pointer
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"vld1.u8 {q0,q1}, [%0]! \n" // load row 1 and post increment
|
"vld1.u8 {q0,q1}, [%0]! \n" // load row 1 and post increment
|
||||||
@ -101,7 +101,7 @@ void ScaleRowDown2Int_NEON(const uint8* src_ptr, int src_stride,
|
|||||||
#define HAS_SCALEROWDOWN4_NEON
|
#define HAS_SCALEROWDOWN4_NEON
|
||||||
static void ScaleRowDown4_NEON(const uint8* src_ptr, int /* src_stride */,
|
static void ScaleRowDown4_NEON(const uint8* src_ptr, int /* src_stride */,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
asm volatile(
|
asm volatile (
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"vld2.u8 {d0, d1}, [%0]! \n"
|
"vld2.u8 {d0, d1}, [%0]! \n"
|
||||||
"vtrn.u8 d1, d0 \n"
|
"vtrn.u8 d1, d0 \n"
|
||||||
@ -120,7 +120,7 @@ static void ScaleRowDown4_NEON(const uint8* src_ptr, int /* src_stride */,
|
|||||||
|
|
||||||
static void ScaleRowDown4Int_NEON(const uint8* src_ptr, int src_stride,
|
static void ScaleRowDown4Int_NEON(const uint8* src_ptr, int src_stride,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
asm volatile(
|
asm volatile (
|
||||||
"add r4, %0, %3 \n"
|
"add r4, %0, %3 \n"
|
||||||
"add r5, r4, %3 \n"
|
"add r5, r4, %3 \n"
|
||||||
"add %3, r5, %3 \n"
|
"add %3, r5, %3 \n"
|
||||||
@ -159,7 +159,7 @@ static void ScaleRowDown4Int_NEON(const uint8* src_ptr, int src_stride,
|
|||||||
// Point samples 32 pixels to 24 pixels.
|
// Point samples 32 pixels to 24 pixels.
|
||||||
static void ScaleRowDown34_NEON(const uint8* src_ptr, int /* src_stride */,
|
static void ScaleRowDown34_NEON(const uint8* src_ptr, int /* src_stride */,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
asm volatile(
|
asm volatile (
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"vld4.u8 {d0, d1, d2, d3}, [%0]! \n" // src line 0
|
"vld4.u8 {d0, d1, d2, d3}, [%0]! \n" // src line 0
|
||||||
"vmov d2, d3 \n" // order needs to be d0, d1, d2
|
"vmov d2, d3 \n" // order needs to be d0, d1, d2
|
||||||
@ -176,7 +176,7 @@ static void ScaleRowDown34_NEON(const uint8* src_ptr, int /* src_stride */,
|
|||||||
|
|
||||||
static void ScaleRowDown34_0_Int_NEON(const uint8* src_ptr, int src_stride,
|
static void ScaleRowDown34_0_Int_NEON(const uint8* src_ptr, int src_stride,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
asm volatile(
|
asm volatile (
|
||||||
"vmov.u8 d24, #3 \n"
|
"vmov.u8 d24, #3 \n"
|
||||||
"add %3, %0 \n"
|
"add %3, %0 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -231,7 +231,7 @@ static void ScaleRowDown34_0_Int_NEON(const uint8* src_ptr, int src_stride,
|
|||||||
|
|
||||||
static void ScaleRowDown34_1_Int_NEON(const uint8* src_ptr, int src_stride,
|
static void ScaleRowDown34_1_Int_NEON(const uint8* src_ptr, int src_stride,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
asm volatile(
|
asm volatile (
|
||||||
"vmov.u8 d24, #3 \n"
|
"vmov.u8 d24, #3 \n"
|
||||||
"add %3, %0 \n"
|
"add %3, %0 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -283,7 +283,7 @@ const unsigned short mult38_div9[8] __attribute__ ((aligned(16))) =
|
|||||||
// 32 -> 12
|
// 32 -> 12
|
||||||
static void ScaleRowDown38_NEON(const uint8* src_ptr, int,
|
static void ScaleRowDown38_NEON(const uint8* src_ptr, int,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
asm volatile(
|
asm volatile (
|
||||||
"vld1.u8 {q3}, [%3] \n"
|
"vld1.u8 {q3}, [%3] \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"vld1.u8 {d0, d1, d2, d3}, [%0]! \n"
|
"vld1.u8 {d0, d1, d2, d3}, [%0]! \n"
|
||||||
@ -304,7 +304,7 @@ static void ScaleRowDown38_NEON(const uint8* src_ptr, int,
|
|||||||
// 32x3 -> 12x1
|
// 32x3 -> 12x1
|
||||||
static void ScaleRowDown38_3_Int_NEON(const uint8* src_ptr, int src_stride,
|
static void ScaleRowDown38_3_Int_NEON(const uint8* src_ptr, int src_stride,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
asm volatile(
|
asm volatile (
|
||||||
"vld1.u16 {q13}, [%4] \n"
|
"vld1.u16 {q13}, [%4] \n"
|
||||||
"vld1.u8 {q14}, [%5] \n"
|
"vld1.u8 {q14}, [%5] \n"
|
||||||
"vld1.u8 {q15}, [%6] \n"
|
"vld1.u8 {q15}, [%6] \n"
|
||||||
@ -413,7 +413,7 @@ static void ScaleRowDown38_3_Int_NEON(const uint8* src_ptr, int src_stride,
|
|||||||
// 32x2 -> 12x1
|
// 32x2 -> 12x1
|
||||||
static void ScaleRowDown38_2_Int_NEON(const uint8* src_ptr, int src_stride,
|
static void ScaleRowDown38_2_Int_NEON(const uint8* src_ptr, int src_stride,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
asm volatile(
|
asm volatile (
|
||||||
"vld1.u16 {q13}, [%4] \n"
|
"vld1.u16 {q13}, [%4] \n"
|
||||||
"vld1.u8 {q14}, [%5] \n"
|
"vld1.u8 {q14}, [%5] \n"
|
||||||
"add %3, %0 \n"
|
"add %3, %0 \n"
|
||||||
@ -508,7 +508,7 @@ static void ScaleRowDown38_2_Int_NEON(const uint8* src_ptr, int src_stride,
|
|||||||
static void ScaleFilterRows_NEON(uint8* dst_ptr,
|
static void ScaleFilterRows_NEON(uint8* dst_ptr,
|
||||||
const uint8* src_ptr, int src_stride,
|
const uint8* src_ptr, int src_stride,
|
||||||
int dst_width, int source_y_fraction) {
|
int dst_width, int source_y_fraction) {
|
||||||
asm volatile(
|
asm volatile (
|
||||||
"cmp %4, #0 \n"
|
"cmp %4, #0 \n"
|
||||||
"beq 2f \n"
|
"beq 2f \n"
|
||||||
"add %2, %1 \n"
|
"add %2, %1 \n"
|
||||||
@ -1555,7 +1555,7 @@ static void ScaleFilterCols34_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
|
|||||||
#define HAS_SCALEROWDOWN2_SSE2
|
#define HAS_SCALEROWDOWN2_SSE2
|
||||||
static void ScaleRowDown2_SSE2(const uint8* src_ptr, int src_stride,
|
static void ScaleRowDown2_SSE2(const uint8* src_ptr, int src_stride,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
asm volatile(
|
asm volatile (
|
||||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||||
"psrlw $0x8,%%xmm5 \n"
|
"psrlw $0x8,%%xmm5 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -1579,7 +1579,7 @@ static void ScaleRowDown2_SSE2(const uint8* src_ptr, int src_stride,
|
|||||||
|
|
||||||
static void ScaleRowDown2Int_SSE2(const uint8* src_ptr, int src_stride,
|
static void ScaleRowDown2Int_SSE2(const uint8* src_ptr, int src_stride,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
asm volatile(
|
asm volatile (
|
||||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||||
"psrlw $0x8,%%xmm5 \n"
|
"psrlw $0x8,%%xmm5 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -1614,7 +1614,7 @@ static void ScaleRowDown2Int_SSE2(const uint8* src_ptr, int src_stride,
|
|||||||
#define HAS_SCALEROWDOWN4_SSE2
|
#define HAS_SCALEROWDOWN4_SSE2
|
||||||
static void ScaleRowDown4_SSE2(const uint8* src_ptr, int src_stride,
|
static void ScaleRowDown4_SSE2(const uint8* src_ptr, int src_stride,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
asm volatile(
|
asm volatile (
|
||||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||||
"psrld $0x18,%%xmm5 \n"
|
"psrld $0x18,%%xmm5 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -1640,7 +1640,7 @@ static void ScaleRowDown4_SSE2(const uint8* src_ptr, int src_stride,
|
|||||||
static void ScaleRowDown4Int_SSE2(const uint8* src_ptr, int src_stride,
|
static void ScaleRowDown4Int_SSE2(const uint8* src_ptr, int src_stride,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
intptr_t temp = 0;
|
intptr_t temp = 0;
|
||||||
asm volatile(
|
asm volatile (
|
||||||
"pcmpeqb %%xmm7,%%xmm7 \n"
|
"pcmpeqb %%xmm7,%%xmm7 \n"
|
||||||
"psrlw $0x8,%%xmm7 \n"
|
"psrlw $0x8,%%xmm7 \n"
|
||||||
"lea (%4,%4,2),%3 \n"
|
"lea (%4,%4,2),%3 \n"
|
||||||
@ -1693,7 +1693,7 @@ static void ScaleRowDown4Int_SSE2(const uint8* src_ptr, int src_stride,
|
|||||||
#define HAS_SCALEROWDOWN8_SSE2
|
#define HAS_SCALEROWDOWN8_SSE2
|
||||||
static void ScaleRowDown8_SSE2(const uint8* src_ptr, int src_stride,
|
static void ScaleRowDown8_SSE2(const uint8* src_ptr, int src_stride,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
asm volatile(
|
asm volatile (
|
||||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||||
"psrlq $0x38,%%xmm5 \n"
|
"psrlq $0x38,%%xmm5 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -1722,7 +1722,7 @@ static void ScaleAddRows_SSE2(const uint8* src_ptr, int src_stride,
|
|||||||
uint16* dst_ptr, int src_width, int src_height) {
|
uint16* dst_ptr, int src_width, int src_height) {
|
||||||
int tmp_height = 0;
|
int tmp_height = 0;
|
||||||
intptr_t tmp_src = 0;
|
intptr_t tmp_src = 0;
|
||||||
asm volatile(
|
asm volatile (
|
||||||
"pxor %%xmm4,%%xmm4 \n"
|
"pxor %%xmm4,%%xmm4 \n"
|
||||||
"sub $0x1,%5 \n"
|
"sub $0x1,%5 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -2263,7 +2263,7 @@ extern "C" void ScaleFilterRows_SSSE3(uint8* dst_ptr,
|
|||||||
#elif defined(__x86_64__)
|
#elif defined(__x86_64__)
|
||||||
static void ScaleRowDown8Int_SSE2(const uint8* src_ptr, int src_stride,
|
static void ScaleRowDown8Int_SSE2(const uint8* src_ptr, int src_stride,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
asm volatile(
|
asm volatile (
|
||||||
"lea (%3,%3,2),%%r10 \n"
|
"lea (%3,%3,2),%%r10 \n"
|
||||||
"pxor %%xmm7,%%xmm7 \n"
|
"pxor %%xmm7,%%xmm7 \n"
|
||||||
"1:"
|
"1:"
|
||||||
@ -2322,7 +2322,7 @@ static void ScaleRowDown8Int_SSE2(const uint8* src_ptr, int src_stride,
|
|||||||
#define HAS_SCALEROWDOWN34_SSSE3
|
#define HAS_SCALEROWDOWN34_SSSE3
|
||||||
static void ScaleRowDown34_SSSE3(const uint8* src_ptr, int src_stride,
|
static void ScaleRowDown34_SSSE3(const uint8* src_ptr, int src_stride,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
asm volatile(
|
asm volatile (
|
||||||
"movdqa (%3),%%xmm3 \n"
|
"movdqa (%3),%%xmm3 \n"
|
||||||
"movdqa (%4),%%xmm4 \n"
|
"movdqa (%4),%%xmm4 \n"
|
||||||
"movdqa (%5),%%xmm5 \n"
|
"movdqa (%5),%%xmm5 \n"
|
||||||
@ -2353,7 +2353,7 @@ static void ScaleRowDown34_SSSE3(const uint8* src_ptr, int src_stride,
|
|||||||
|
|
||||||
static void ScaleRowDown34_1_Int_SSSE3(const uint8* src_ptr, int src_stride,
|
static void ScaleRowDown34_1_Int_SSSE3(const uint8* src_ptr, int src_stride,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
asm volatile(
|
asm volatile (
|
||||||
"movdqa (%4),%%xmm2 \n" // _shuf01
|
"movdqa (%4),%%xmm2 \n" // _shuf01
|
||||||
"movdqa (%5),%%xmm3 \n" // _shuf11
|
"movdqa (%5),%%xmm3 \n" // _shuf11
|
||||||
"movdqa (%6),%%xmm4 \n" // _shuf21
|
"movdqa (%6),%%xmm4 \n" // _shuf21
|
||||||
@ -2410,7 +2410,7 @@ static void ScaleRowDown34_1_Int_SSSE3(const uint8* src_ptr, int src_stride,
|
|||||||
|
|
||||||
static void ScaleRowDown34_0_Int_SSSE3(const uint8* src_ptr, int src_stride,
|
static void ScaleRowDown34_0_Int_SSSE3(const uint8* src_ptr, int src_stride,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
asm volatile(
|
asm volatile (
|
||||||
"movdqa (%4),%%xmm2 \n" // _shuf01
|
"movdqa (%4),%%xmm2 \n" // _shuf01
|
||||||
"movdqa (%5),%%xmm3 \n" // _shuf11
|
"movdqa (%5),%%xmm3 \n" // _shuf11
|
||||||
"movdqa (%6),%%xmm4 \n" // _shuf21
|
"movdqa (%6),%%xmm4 \n" // _shuf21
|
||||||
@ -2471,7 +2471,7 @@ static void ScaleRowDown34_0_Int_SSSE3(const uint8* src_ptr, int src_stride,
|
|||||||
#define HAS_SCALEROWDOWN38_SSSE3
|
#define HAS_SCALEROWDOWN38_SSSE3
|
||||||
static void ScaleRowDown38_SSSE3(const uint8* src_ptr, int src_stride,
|
static void ScaleRowDown38_SSSE3(const uint8* src_ptr, int src_stride,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
asm volatile(
|
asm volatile (
|
||||||
"movdqa (%3),%%xmm4 \n"
|
"movdqa (%3),%%xmm4 \n"
|
||||||
"movdqa (%4),%%xmm5 \n"
|
"movdqa (%4),%%xmm5 \n"
|
||||||
"1:"
|
"1:"
|
||||||
@ -2498,7 +2498,7 @@ static void ScaleRowDown38_SSSE3(const uint8* src_ptr, int src_stride,
|
|||||||
|
|
||||||
static void ScaleRowDown38_3_Int_SSSE3(const uint8* src_ptr, int src_stride,
|
static void ScaleRowDown38_3_Int_SSSE3(const uint8* src_ptr, int src_stride,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
asm volatile(
|
asm volatile (
|
||||||
"movdqa (%4),%%xmm4 \n"
|
"movdqa (%4),%%xmm4 \n"
|
||||||
"movdqa (%5),%%xmm5 \n"
|
"movdqa (%5),%%xmm5 \n"
|
||||||
"movdqa (%6),%%xmm6 \n"
|
"movdqa (%6),%%xmm6 \n"
|
||||||
@ -2555,7 +2555,7 @@ static void ScaleRowDown38_3_Int_SSSE3(const uint8* src_ptr, int src_stride,
|
|||||||
|
|
||||||
static void ScaleRowDown38_2_Int_SSSE3(const uint8* src_ptr, int src_stride,
|
static void ScaleRowDown38_2_Int_SSSE3(const uint8* src_ptr, int src_stride,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
asm volatile(
|
asm volatile (
|
||||||
"movdqa (%4),%%xmm4 \n"
|
"movdqa (%4),%%xmm4 \n"
|
||||||
"movdqa (%5),%%xmm5 \n"
|
"movdqa (%5),%%xmm5 \n"
|
||||||
"movdqa (%6),%%xmm6 \n"
|
"movdqa (%6),%%xmm6 \n"
|
||||||
@ -2597,7 +2597,7 @@ static void ScaleFilterRows_SSE2(uint8* dst_ptr,
|
|||||||
const uint8* src_ptr, int src_stride,
|
const uint8* src_ptr, int src_stride,
|
||||||
int dst_width, int source_y_fraction) {
|
int dst_width, int source_y_fraction) {
|
||||||
if (source_y_fraction == 0) {
|
if (source_y_fraction == 0) {
|
||||||
asm volatile(
|
asm volatile (
|
||||||
"1:"
|
"1:"
|
||||||
"movdqa (%1),%%xmm0 \n"
|
"movdqa (%1),%%xmm0 \n"
|
||||||
"lea 0x10(%1),%1 \n"
|
"lea 0x10(%1),%1 \n"
|
||||||
@ -2615,7 +2615,7 @@ static void ScaleFilterRows_SSE2(uint8* dst_ptr,
|
|||||||
);
|
);
|
||||||
return;
|
return;
|
||||||
} else if (source_y_fraction == 128) {
|
} else if (source_y_fraction == 128) {
|
||||||
asm volatile(
|
asm volatile (
|
||||||
"1:"
|
"1:"
|
||||||
"movdqa (%1),%%xmm0 \n"
|
"movdqa (%1),%%xmm0 \n"
|
||||||
"movdqa (%1,%3,1),%%xmm2 \n"
|
"movdqa (%1,%3,1),%%xmm2 \n"
|
||||||
@ -2635,7 +2635,7 @@ static void ScaleFilterRows_SSE2(uint8* dst_ptr,
|
|||||||
);
|
);
|
||||||
return;
|
return;
|
||||||
} else {
|
} else {
|
||||||
asm volatile(
|
asm volatile (
|
||||||
"mov %3,%%eax \n"
|
"mov %3,%%eax \n"
|
||||||
"movd %%eax,%%xmm6 \n"
|
"movd %%eax,%%xmm6 \n"
|
||||||
"punpcklwd %%xmm6,%%xmm6 \n"
|
"punpcklwd %%xmm6,%%xmm6 \n"
|
||||||
@ -2688,7 +2688,7 @@ static void ScaleFilterRows_SSSE3(uint8* dst_ptr,
|
|||||||
const uint8* src_ptr, int src_stride,
|
const uint8* src_ptr, int src_stride,
|
||||||
int dst_width, int source_y_fraction) {
|
int dst_width, int source_y_fraction) {
|
||||||
if (source_y_fraction <= 1) {
|
if (source_y_fraction <= 1) {
|
||||||
asm volatile(
|
asm volatile (
|
||||||
"1:"
|
"1:"
|
||||||
"movdqa (%1),%%xmm0 \n"
|
"movdqa (%1),%%xmm0 \n"
|
||||||
"lea 0x10(%1),%1 \n"
|
"lea 0x10(%1),%1 \n"
|
||||||
@ -2706,7 +2706,7 @@ static void ScaleFilterRows_SSSE3(uint8* dst_ptr,
|
|||||||
);
|
);
|
||||||
return;
|
return;
|
||||||
} else if (source_y_fraction == 128) {
|
} else if (source_y_fraction == 128) {
|
||||||
asm volatile(
|
asm volatile (
|
||||||
"1:"
|
"1:"
|
||||||
"movdqa (%1),%%xmm0 \n"
|
"movdqa (%1),%%xmm0 \n"
|
||||||
"movdqa (%1,%3,1),%%xmm2 \n"
|
"movdqa (%1,%3,1),%%xmm2 \n"
|
||||||
@ -2726,7 +2726,7 @@ static void ScaleFilterRows_SSSE3(uint8* dst_ptr,
|
|||||||
);
|
);
|
||||||
return;
|
return;
|
||||||
} else {
|
} else {
|
||||||
asm volatile(
|
asm volatile (
|
||||||
"mov %3,%%eax \n"
|
"mov %3,%%eax \n"
|
||||||
"shr %%eax \n"
|
"shr %%eax \n"
|
||||||
"mov %%al,%%ah \n"
|
"mov %%al,%%ah \n"
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user