mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
lint fix test
BUG=none TEST=gcl lint lintfix2 Review URL: https://webrtc-codereview.appspot.com/458003 git-svn-id: http://libyuv.googlecode.com/svn/trunk@220 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
8670b1ae04
commit
2b9c210803
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: http://code.google.com/p/libyuv/
|
URL: http://code.google.com/p/libyuv/
|
||||||
Version: 219
|
Version: 220
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|
||||||
|
|||||||
@ -11,7 +11,7 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 219
|
#define LIBYUV_VERSION 220
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
|
|||||||
@ -25,6 +25,7 @@ extern "C" {
|
|||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
// Definition of FourCC codes
|
// Definition of FourCC codes
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
// Convert four characters to a FourCC code.
|
// Convert four characters to a FourCC code.
|
||||||
// Needs to be a macro otherwise the OS X compiler complains when the kFormat*
|
// Needs to be a macro otherwise the OS X compiler complains when the kFormat*
|
||||||
// constants are used in a switch.
|
// constants are used in a switch.
|
||||||
@ -57,9 +58,9 @@ enum FourCC {
|
|||||||
FOURCC_ABGR = FOURCC('A', 'B', 'G', 'R'),
|
FOURCC_ABGR = FOURCC('A', 'B', 'G', 'R'),
|
||||||
FOURCC_BGRA = FOURCC('B', 'G', 'R', 'A'),
|
FOURCC_BGRA = FOURCC('B', 'G', 'R', 'A'),
|
||||||
FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'),
|
FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'),
|
||||||
FOURCC_RGBP = FOURCC('R', 'G', 'B', 'P'), // bgr565
|
FOURCC_RGBP = FOURCC('R', 'G', 'B', 'P'), // bgr565
|
||||||
FOURCC_RGBO = FOURCC('R', 'G', 'B', 'O'), // abgr1555
|
FOURCC_RGBO = FOURCC('R', 'G', 'B', 'O'), // abgr1555
|
||||||
FOURCC_R444 = FOURCC('R', '4', '4', '4'), // argb4444
|
FOURCC_R444 = FOURCC('R', '4', '4', '4'), // argb4444
|
||||||
FOURCC_MJPG = FOURCC('M', 'J', 'P', 'G'),
|
FOURCC_MJPG = FOURCC('M', 'J', 'P', 'G'),
|
||||||
FOURCC_RAW = FOURCC('r', 'a', 'w', ' '),
|
FOURCC_RAW = FOURCC('r', 'a', 'w', ' '),
|
||||||
FOURCC_NV21 = FOURCC('N', 'V', '2', '1'),
|
FOURCC_NV21 = FOURCC('N', 'V', '2', '1'),
|
||||||
|
|||||||
@ -18,7 +18,7 @@
|
|||||||
|
|
||||||
#include "libyuv/basic_types.h"
|
#include "libyuv/basic_types.h"
|
||||||
#include "libyuv/cpu_id.h"
|
#include "libyuv/cpu_id.h"
|
||||||
#include "row.h"
|
#include "source/row.h"
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
namespace libyuv {
|
namespace libyuv {
|
||||||
@ -42,7 +42,7 @@ uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) {
|
|||||||
static uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b,
|
static uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b,
|
||||||
int count) {
|
int count) {
|
||||||
volatile uint32 sse;
|
volatile uint32 sse;
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"vmov.u8 q7, #0 \n"
|
"vmov.u8 q7, #0 \n"
|
||||||
"vmov.u8 q9, #0 \n"
|
"vmov.u8 q9, #0 \n"
|
||||||
"vmov.u8 q8, #0 \n"
|
"vmov.u8 q8, #0 \n"
|
||||||
@ -71,8 +71,7 @@ static uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b,
|
|||||||
"+r"(count),
|
"+r"(count),
|
||||||
"=r"(sse)
|
"=r"(sse)
|
||||||
:
|
:
|
||||||
: "memory", "cc", "q0", "q1", "q2", "q3", "q7", "q8", "q9", "q10"
|
: "memory", "cc", "q0", "q1", "q2", "q3", "q7", "q8", "q9", "q10");
|
||||||
);
|
|
||||||
return sse;
|
return sse;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -122,7 +121,7 @@ static uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b,
|
|||||||
static uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b,
|
static uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b,
|
||||||
int count) {
|
int count) {
|
||||||
uint32 sse;
|
uint32 sse;
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"pxor %%xmm0,%%xmm0 \n"
|
"pxor %%xmm0,%%xmm0 \n"
|
||||||
"pxor %%xmm5,%%xmm5 \n"
|
"pxor %%xmm5,%%xmm5 \n"
|
||||||
"sub %0,%1 \n"
|
"sub %0,%1 \n"
|
||||||
|
|||||||
@ -19,7 +19,7 @@
|
|||||||
#include "libyuv/planar_functions.h"
|
#include "libyuv/planar_functions.h"
|
||||||
#include "libyuv/rotate.h"
|
#include "libyuv/rotate.h"
|
||||||
#include "libyuv/video_common.h"
|
#include "libyuv/video_common.h"
|
||||||
#include "row.h"
|
#include "source/row.h"
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
namespace libyuv {
|
namespace libyuv {
|
||||||
@ -78,7 +78,7 @@ static void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
|
|||||||
movdqa xmm0, [eax]
|
movdqa xmm0, [eax]
|
||||||
pavgb xmm0, [eax + edx]
|
pavgb xmm0, [eax + edx]
|
||||||
sub ecx, 16
|
sub ecx, 16
|
||||||
movdqa [eax + edi], xmm0
|
movdqa [eax + edi], xmm0 // NOLINT
|
||||||
lea eax, [eax + 16]
|
lea eax, [eax + 16]
|
||||||
jg convertloop
|
jg convertloop
|
||||||
pop edi
|
pop edi
|
||||||
@ -86,11 +86,11 @@ static void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif (defined(__x86_64__) || defined(__i386__)) && !defined(YUV_DISABLE_ASM)
|
#elif defined(__x86_64__) || defined(__i386__) && !defined(YUV_DISABLE_ASM)
|
||||||
#define HAS_HALFROW_SSE2
|
#define HAS_HALFROW_SSE2
|
||||||
static void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
|
static void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
|
||||||
uint8* dst_uv, int pix) {
|
uint8* dst_uv, int pix) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"sub %0,%1 \n"
|
"sub %0,%1 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movdqa (%0),%%xmm0 \n"
|
"movdqa (%0),%%xmm0 \n"
|
||||||
@ -137,7 +137,7 @@ int I422ToI420(const uint8* src_y, int src_stride_y,
|
|||||||
}
|
}
|
||||||
int halfwidth = (width + 1) >> 1;
|
int halfwidth = (width + 1) >> 1;
|
||||||
void (*HalfRow)(const uint8* src_uv, int src_uv_stride,
|
void (*HalfRow)(const uint8* src_uv, int src_uv_stride,
|
||||||
uint8* dst_uv, int pix);
|
uint8* dst_uv, int pix) = HalfRow_C;
|
||||||
#if defined(HAS_HALFROW_SSE2)
|
#if defined(HAS_HALFROW_SSE2)
|
||||||
if (TestCpuFlag(kCpuHasSSE2) &&
|
if (TestCpuFlag(kCpuHasSSE2) &&
|
||||||
IS_ALIGNED(halfwidth, 16) &&
|
IS_ALIGNED(halfwidth, 16) &&
|
||||||
@ -146,11 +146,8 @@ int I422ToI420(const uint8* src_y, int src_stride_y,
|
|||||||
IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) &&
|
IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) &&
|
||||||
IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) {
|
IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) {
|
||||||
HalfRow = HalfRow_SSE2;
|
HalfRow = HalfRow_SSE2;
|
||||||
} else
|
|
||||||
#endif
|
|
||||||
{
|
|
||||||
HalfRow = HalfRow_C;
|
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
// Copy Y plane
|
// Copy Y plane
|
||||||
if (dst_y) {
|
if (dst_y) {
|
||||||
@ -186,7 +183,7 @@ int I422ToI420(const uint8* src_y, int src_stride_y,
|
|||||||
#define HAS_SCALEROWDOWN2_NEON
|
#define HAS_SCALEROWDOWN2_NEON
|
||||||
void ScaleRowDown2Int_NEON(const uint8* src_ptr, int src_stride,
|
void ScaleRowDown2Int_NEON(const uint8* src_ptr, int src_stride,
|
||||||
uint8* dst, int dst_width);
|
uint8* dst, int dst_width);
|
||||||
#elif (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) && \
|
#elif defined(_M_IX86) || defined(__x86_64__) || defined(__i386__) && \
|
||||||
!defined(YUV_DISABLE_ASM)
|
!defined(YUV_DISABLE_ASM)
|
||||||
void ScaleRowDown2Int_SSE2(const uint8* src_ptr, int src_stride,
|
void ScaleRowDown2Int_SSE2(const uint8* src_ptr, int src_stride,
|
||||||
uint8* dst_ptr, int dst_width);
|
uint8* dst_ptr, int dst_width);
|
||||||
@ -213,14 +210,13 @@ int I444ToI420(const uint8* src_y, int src_stride_y,
|
|||||||
}
|
}
|
||||||
int halfwidth = (width + 1) >> 1;
|
int halfwidth = (width + 1) >> 1;
|
||||||
void (*ScaleRowDown2)(const uint8* src_ptr, int src_stride,
|
void (*ScaleRowDown2)(const uint8* src_ptr, int src_stride,
|
||||||
uint8* dst_ptr, int dst_width);
|
uint8* dst_ptr, int dst_width) = ScaleRowDown2Int_C;
|
||||||
#if defined(HAS_SCALEROWDOWN2_NEON)
|
#if defined(HAS_SCALEROWDOWN2_NEON)
|
||||||
if (TestCpuFlag(kCpuHasNEON) &&
|
if (TestCpuFlag(kCpuHasNEON) &&
|
||||||
IS_ALIGNED(halfwidth, 16)) {
|
IS_ALIGNED(halfwidth, 16)) {
|
||||||
ScaleRowDown2 = ScaleRowDown2Int_NEON;
|
ScaleRowDown2 = ScaleRowDown2Int_NEON;
|
||||||
} else
|
}
|
||||||
#endif
|
#elif defined(HAS_SCALEROWDOWN2_SSE2)
|
||||||
#if defined(HAS_SCALEROWDOWN2_SSE2)
|
|
||||||
if (TestCpuFlag(kCpuHasSSE2) &&
|
if (TestCpuFlag(kCpuHasSSE2) &&
|
||||||
IS_ALIGNED(halfwidth, 16) &&
|
IS_ALIGNED(halfwidth, 16) &&
|
||||||
IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) &&
|
IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) &&
|
||||||
@ -228,10 +224,8 @@ int I444ToI420(const uint8* src_y, int src_stride_y,
|
|||||||
IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) &&
|
IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) &&
|
||||||
IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) {
|
IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) {
|
||||||
ScaleRowDown2 = ScaleRowDown2Int_SSE2;
|
ScaleRowDown2 = ScaleRowDown2Int_SSE2;
|
||||||
#endif
|
|
||||||
{
|
|
||||||
ScaleRowDown2 = ScaleRowDown2Int_C;
|
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
// Copy Y plane
|
// Copy Y plane
|
||||||
if (dst_y) {
|
if (dst_y) {
|
||||||
@ -395,11 +389,12 @@ static int X420ToI420(const uint8* src_y,
|
|||||||
}
|
}
|
||||||
|
|
||||||
int halfwidth = (width + 1) >> 1;
|
int halfwidth = (width + 1) >> 1;
|
||||||
void (*SplitUV)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
|
void (*SplitUV)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) =
|
||||||
|
SplitUV_C;
|
||||||
#if defined(HAS_SPLITUV_NEON)
|
#if defined(HAS_SPLITUV_NEON)
|
||||||
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(halfwidth, 16)) {
|
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(halfwidth, 16)) {
|
||||||
SplitUV = SplitUV_NEON;
|
SplitUV = SplitUV_NEON;
|
||||||
} else
|
}
|
||||||
#elif defined(HAS_SPLITUV_SSE2)
|
#elif defined(HAS_SPLITUV_SSE2)
|
||||||
if (TestCpuFlag(kCpuHasSSE2) &&
|
if (TestCpuFlag(kCpuHasSSE2) &&
|
||||||
IS_ALIGNED(halfwidth, 16) &&
|
IS_ALIGNED(halfwidth, 16) &&
|
||||||
@ -407,11 +402,8 @@ static int X420ToI420(const uint8* src_y,
|
|||||||
IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) &&
|
IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) &&
|
||||||
IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) {
|
IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) {
|
||||||
SplitUV = SplitUV_SSE2;
|
SplitUV = SplitUV_SSE2;
|
||||||
} else
|
|
||||||
#endif
|
|
||||||
{
|
|
||||||
SplitUV = SplitUV_C;
|
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
if (dst_y) {
|
if (dst_y) {
|
||||||
CopyPlane2(src_y, src_stride_y0, src_stride_y1, dst_y, dst_stride_y,
|
CopyPlane2(src_y, src_stride_y0, src_stride_y1, dst_y, dst_stride_y,
|
||||||
@ -483,7 +475,7 @@ static void SplitYUY2_SSE2(const uint8* src_yuy2,
|
|||||||
pand xmm2, xmm5 // even bytes are Y
|
pand xmm2, xmm5 // even bytes are Y
|
||||||
pand xmm3, xmm5
|
pand xmm3, xmm5
|
||||||
packuswb xmm2, xmm3
|
packuswb xmm2, xmm3
|
||||||
movdqa [edx], xmm2
|
movdqa [edx], xmm2 // NOLINT
|
||||||
lea edx, [edx + 16]
|
lea edx, [edx + 16]
|
||||||
psrlw xmm0, 8 // YUYV -> UVUV
|
psrlw xmm0, 8 // YUYV -> UVUV
|
||||||
psrlw xmm1, 8
|
psrlw xmm1, 8
|
||||||
@ -491,12 +483,12 @@ static void SplitYUY2_SSE2(const uint8* src_yuy2,
|
|||||||
movdqa xmm1, xmm0
|
movdqa xmm1, xmm0
|
||||||
pand xmm0, xmm5 // U
|
pand xmm0, xmm5 // U
|
||||||
packuswb xmm0, xmm0
|
packuswb xmm0, xmm0
|
||||||
movq qword ptr [esi], xmm0
|
movq qword ptr [esi], xmm0 // NOLINT
|
||||||
lea esi, [esi + 8]
|
lea esi, [esi + 8]
|
||||||
psrlw xmm1, 8 // V
|
psrlw xmm1, 8 // V
|
||||||
packuswb xmm1, xmm1
|
packuswb xmm1, xmm1
|
||||||
sub ecx, 16
|
sub ecx, 16
|
||||||
movq qword ptr [edi], xmm1
|
movq qword ptr [edi], xmm1 // NOLINT
|
||||||
lea edi, [edi + 8]
|
lea edi, [edi + 8]
|
||||||
jg convertloop
|
jg convertloop
|
||||||
|
|
||||||
@ -506,11 +498,11 @@ static void SplitYUY2_SSE2(const uint8* src_yuy2,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif (defined(__x86_64__) || defined(__i386__)) && !defined(YUV_DISABLE_ASM)
|
#elif defined(__x86_64__) || defined(__i386__) && !defined(YUV_DISABLE_ASM)
|
||||||
#define HAS_SPLITYUY2_SSE2
|
#define HAS_SPLITYUY2_SSE2
|
||||||
static void SplitYUY2_SSE2(const uint8* src_yuy2, uint8* dst_y,
|
static void SplitYUY2_SSE2(const uint8* src_yuy2, uint8* dst_y,
|
||||||
uint8* dst_u, uint8* dst_v, int pix) {
|
uint8* dst_u, uint8* dst_v, int pix) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||||
"psrlw $0x8,%%xmm5 \n"
|
"psrlw $0x8,%%xmm5 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -604,19 +596,17 @@ int Q420ToI420(const uint8* src_y, int src_stride_y,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void (*SplitYUY2)(const uint8* src_yuy2,
|
void (*SplitYUY2)(const uint8* src_yuy2, uint8* dst_y, uint8* dst_u,
|
||||||
uint8* dst_y, uint8* dst_u, uint8* dst_v, int pix);
|
uint8* dst_v, int pix) = SplitYUY2_C;
|
||||||
#if defined(HAS_SPLITYUY2_SSE2)
|
#if defined(HAS_SPLITYUY2_SSE2)
|
||||||
if (TestCpuFlag(kCpuHasSSE2) &&
|
if (TestCpuFlag(kCpuHasSSE2) &&
|
||||||
IS_ALIGNED(width, 16) &&
|
IS_ALIGNED(width, 16) &&
|
||||||
IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16) &&
|
IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16) &&
|
||||||
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
|
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
|
||||||
SplitYUY2 = SplitYUY2_SSE2;
|
SplitYUY2 = SplitYUY2_SSE2;
|
||||||
} else
|
|
||||||
#endif
|
|
||||||
{
|
|
||||||
SplitYUY2 = SplitYUY2_C;
|
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
for (int y = 0; y < height; y += 2) {
|
for (int y = 0; y < height; y += 2) {
|
||||||
CopyRow(src_y, dst_y, width);
|
CopyRow(src_y, dst_y, width);
|
||||||
dst_y += dst_stride_y;
|
dst_y += dst_stride_y;
|
||||||
@ -800,13 +790,13 @@ int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy,
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef LIBYUV_LITTLE_ENDIAN
|
#ifdef LIBYUV_LITTLE_ENDIAN
|
||||||
#define READWORD(p) (*((uint32*) (p)))
|
#define READWORD(p) (*reinterpret_cast<const uint32*>(p))
|
||||||
#else
|
#else
|
||||||
static inline uint32 READWORD(const uint8* p) {
|
static inline uint32 READWORD(const uint8* p) {
|
||||||
return (uint32) p[0] |
|
return static_cast<uint32>(p[0]) |
|
||||||
((uint32) (p[1]) << 8) |
|
(static_cast<uint32>(p[1]) << 8) |
|
||||||
((uint32) (p[2]) << 16) |
|
(static_cast<uint32>(p[2]) << 16) |
|
||||||
((uint32) (p[3]) << 24);
|
(static_cast<uint32>(p[3]) << 24);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -1599,7 +1589,7 @@ int ConvertToI420(const uint8* sample, size_t sample_size,
|
|||||||
int tmp_y_stride = y_stride;
|
int tmp_y_stride = y_stride;
|
||||||
int tmp_u_stride = u_stride;
|
int tmp_u_stride = u_stride;
|
||||||
int tmp_v_stride = v_stride;
|
int tmp_v_stride = v_stride;
|
||||||
uint8* buf = 0;
|
uint8* buf = NULL;
|
||||||
int abs_dst_height = (dst_height < 0) ? -dst_height : dst_height;
|
int abs_dst_height = (dst_height < 0) ? -dst_height : dst_height;
|
||||||
if (need_rot) {
|
if (need_rot) {
|
||||||
int y_size = dst_width * abs_dst_height;
|
int y_size = dst_width * abs_dst_height;
|
||||||
@ -1618,7 +1608,7 @@ int ConvertToI420(const uint8* sample, size_t sample_size,
|
|||||||
switch (format) {
|
switch (format) {
|
||||||
// Single plane formats
|
// Single plane formats
|
||||||
case FOURCC_YUY2:
|
case FOURCC_YUY2:
|
||||||
src = sample + (aligned_src_width * crop_y + crop_x) * 2 ;
|
src = sample + (aligned_src_width * crop_y + crop_x) * 2;
|
||||||
r = YUY2ToI420(src, aligned_src_width * 2,
|
r = YUY2ToI420(src, aligned_src_width * 2,
|
||||||
y, y_stride,
|
y, y_stride,
|
||||||
u, u_stride,
|
u, u_stride,
|
||||||
|
|||||||
@ -17,7 +17,7 @@
|
|||||||
#include "libyuv/planar_functions.h"
|
#include "libyuv/planar_functions.h"
|
||||||
#include "libyuv/rotate.h"
|
#include "libyuv/rotate.h"
|
||||||
#include "libyuv/video_common.h"
|
#include "libyuv/video_common.h"
|
||||||
#include "row.h"
|
#include "source/row.h"
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
namespace libyuv {
|
namespace libyuv {
|
||||||
@ -289,7 +289,7 @@ static void I42xToYUY2Row_SSE2(const uint8* src_y,
|
|||||||
const uint8* src_u,
|
const uint8* src_u,
|
||||||
const uint8* src_v,
|
const uint8* src_v,
|
||||||
uint8* dst_frame, int width) {
|
uint8* dst_frame, int width) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"sub %1,%2 \n"
|
"sub %1,%2 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movq (%1),%%xmm2 \n"
|
"movq (%1),%%xmm2 \n"
|
||||||
@ -324,7 +324,7 @@ static void I42xToUYVYRow_SSE2(const uint8* src_y,
|
|||||||
const uint8* src_u,
|
const uint8* src_u,
|
||||||
const uint8* src_v,
|
const uint8* src_v,
|
||||||
uint8* dst_frame, int width) {
|
uint8* dst_frame, int width) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"sub %1,%2 \n"
|
"sub %1,%2 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movq (%1),%%xmm2 \n"
|
"movq (%1),%%xmm2 \n"
|
||||||
|
|||||||
@ -24,21 +24,19 @@
|
|||||||
// TODO(fbarchard): Use cpuid.h when gcc 4.4 is used on OSX and Linux.
|
// TODO(fbarchard): Use cpuid.h when gcc 4.4 is used on OSX and Linux.
|
||||||
#if (defined(__pic__) || defined(__APPLE__)) && defined(__i386__)
|
#if (defined(__pic__) || defined(__APPLE__)) && defined(__i386__)
|
||||||
static __inline void __cpuid(int cpu_info[4], int info_type) {
|
static __inline void __cpuid(int cpu_info[4], int info_type) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"mov %%ebx, %%edi \n"
|
"mov %%ebx, %%edi \n"
|
||||||
"cpuid \n"
|
"cpuid \n"
|
||||||
"xchg %%edi, %%ebx \n"
|
"xchg %%edi, %%ebx \n"
|
||||||
: "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
|
: "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
|
||||||
: "a"(info_type)
|
: "a"(info_type));
|
||||||
);
|
|
||||||
}
|
}
|
||||||
#elif defined(__i386__) || defined(__x86_64__)
|
#elif defined(__i386__) || defined(__x86_64__)
|
||||||
static __inline void __cpuid(int cpu_info[4], int info_type) {
|
static __inline void __cpuid(int cpu_info[4], int info_type) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"cpuid \n"
|
"cpuid \n"
|
||||||
: "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
|
: "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
|
||||||
: "a"(info_type)
|
: "a"(info_type));
|
||||||
);
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|||||||
@ -13,7 +13,7 @@
|
|||||||
#include "libyuv/basic_types.h"
|
#include "libyuv/basic_types.h"
|
||||||
#include "libyuv/cpu_id.h"
|
#include "libyuv/cpu_id.h"
|
||||||
#include "libyuv/video_common.h"
|
#include "libyuv/video_common.h"
|
||||||
#include "row.h"
|
#include "source/row.h"
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
namespace libyuv {
|
namespace libyuv {
|
||||||
@ -53,7 +53,7 @@ static void ARGBToBayerRow_SSSE3(const uint8* src_argb,
|
|||||||
#define HAS_ARGBTOBAYERROW_SSSE3
|
#define HAS_ARGBTOBAYERROW_SSSE3
|
||||||
static void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer,
|
static void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer,
|
||||||
uint32 selector, int pix) {
|
uint32 selector, int pix) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"movd %3,%%xmm5 \n"
|
"movd %3,%%xmm5 \n"
|
||||||
"pshufd $0x0,%%xmm5,%%xmm5 \n"
|
"pshufd $0x0,%%xmm5,%%xmm5 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
|
|||||||
@ -13,7 +13,7 @@
|
|||||||
#include <string.h> // for memset()
|
#include <string.h> // for memset()
|
||||||
|
|
||||||
#include "libyuv/cpu_id.h"
|
#include "libyuv/cpu_id.h"
|
||||||
#include "row.h"
|
#include "source/row.h"
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
namespace libyuv {
|
namespace libyuv {
|
||||||
@ -693,7 +693,7 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y,
|
|||||||
#if defined(__ARM_NEON__) && !defined(YUV_DISABLE_ASM)
|
#if defined(__ARM_NEON__) && !defined(YUV_DISABLE_ASM)
|
||||||
#define HAS_SETROW_NEON
|
#define HAS_SETROW_NEON
|
||||||
static void SetRow8_NEON(uint8* dst, uint32 v32, int count) {
|
static void SetRow8_NEON(uint8* dst, uint32 v32, int count) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"vdup.u32 q0, %2 \n" // duplicate 4 ints
|
"vdup.u32 q0, %2 \n" // duplicate 4 ints
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"subs %1, %1, #16 \n" // 16 bytes per loop
|
"subs %1, %1, #16 \n" // 16 bytes per loop
|
||||||
@ -763,7 +763,7 @@ static void SetRows32_X86(uint8* dst, uint32 v32, int width,
|
|||||||
#define HAS_SETROW_X86
|
#define HAS_SETROW_X86
|
||||||
static void SetRow8_X86(uint8* dst, uint32 v32, int width) {
|
static void SetRow8_X86(uint8* dst, uint32 v32, int width) {
|
||||||
size_t width_tmp = static_cast<size_t>(width);
|
size_t width_tmp = static_cast<size_t>(width);
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"shr $0x2,%1 \n"
|
"shr $0x2,%1 \n"
|
||||||
"rep stosl \n"
|
"rep stosl \n"
|
||||||
: "+D"(dst), // %0
|
: "+D"(dst), // %0
|
||||||
@ -778,7 +778,7 @@ static void SetRows32_X86(uint8* dst, uint32 v32, int width,
|
|||||||
for (int y = 0; y < height; ++y) {
|
for (int y = 0; y < height; ++y) {
|
||||||
size_t width_tmp = static_cast<size_t>(width);
|
size_t width_tmp = static_cast<size_t>(width);
|
||||||
uint32* d = reinterpret_cast<uint32*>(dst);
|
uint32* d = reinterpret_cast<uint32*>(dst);
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"rep stosl \n"
|
"rep stosl \n"
|
||||||
: "+D"(d), // %0
|
: "+D"(d), // %0
|
||||||
"+c"(width_tmp) // %1
|
"+c"(width_tmp) // %1
|
||||||
|
|||||||
@ -13,8 +13,8 @@
|
|||||||
#include "libyuv/cpu_id.h"
|
#include "libyuv/cpu_id.h"
|
||||||
#include "libyuv/convert.h"
|
#include "libyuv/convert.h"
|
||||||
#include "libyuv/planar_functions.h"
|
#include "libyuv/planar_functions.h"
|
||||||
#include "rotate_priv.h"
|
#include "source/rotate_priv.h"
|
||||||
#include "row.h"
|
#include "source/row.h"
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
namespace libyuv {
|
namespace libyuv {
|
||||||
@ -295,7 +295,7 @@ static void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
|
|||||||
#define HAS_TRANSPOSE_WX8_SSSE3
|
#define HAS_TRANSPOSE_WX8_SSSE3
|
||||||
static void TransposeWx8_SSSE3(const uint8* src, int src_stride,
|
static void TransposeWx8_SSSE3(const uint8* src, int src_stride,
|
||||||
uint8* dst, int dst_stride, int width) {
|
uint8* dst, int dst_stride, int width) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
// Read in the data from the source pointer.
|
// Read in the data from the source pointer.
|
||||||
// First round of bit swap.
|
// First round of bit swap.
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -506,7 +506,7 @@ extern "C" void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
|
|||||||
#define HAS_TRANSPOSE_WX8_FAST_SSSE3
|
#define HAS_TRANSPOSE_WX8_FAST_SSSE3
|
||||||
static void TransposeWx8_FAST_SSSE3(const uint8* src, int src_stride,
|
static void TransposeWx8_FAST_SSSE3(const uint8* src, int src_stride,
|
||||||
uint8* dst, int dst_stride, int width) {
|
uint8* dst, int dst_stride, int width) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
// Read in the data from the source pointer.
|
// Read in the data from the source pointer.
|
||||||
// First round of bit swap.
|
// First round of bit swap.
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -646,7 +646,7 @@ static void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
|
|||||||
uint8* dst_a, int dst_stride_a,
|
uint8* dst_a, int dst_stride_a,
|
||||||
uint8* dst_b, int dst_stride_b,
|
uint8* dst_b, int dst_stride_b,
|
||||||
int w) {
|
int w) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
// Read in the data from the source pointer.
|
// Read in the data from the source pointer.
|
||||||
// First round of bit swap.
|
// First round of bit swap.
|
||||||
"1: \n"
|
"1: \n"
|
||||||
|
|||||||
@ -8,7 +8,7 @@
|
|||||||
* be found in the AUTHORS file in the root of the source tree.
|
* be found in the AUTHORS file in the root of the source tree.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "row.h"
|
#include "source/row.h"
|
||||||
|
|
||||||
#include "libyuv/basic_types.h"
|
#include "libyuv/basic_types.h"
|
||||||
|
|
||||||
@ -25,7 +25,7 @@ static const uvec8 vtbl_4x4_transpose =
|
|||||||
void TransposeWx8_NEON(const uint8* src, int src_stride,
|
void TransposeWx8_NEON(const uint8* src, int src_stride,
|
||||||
uint8* dst, int dst_stride,
|
uint8* dst, int dst_stride,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
// loops are on blocks of 8. loop will stop when
|
// loops are on blocks of 8. loop will stop when
|
||||||
// counter gets to or below 0. starting the counter
|
// counter gets to or below 0. starting the counter
|
||||||
// at w-8 allow for this
|
// at w-8 allow for this
|
||||||
@ -191,7 +191,7 @@ void TransposeUVWx8_NEON(const uint8* src, int src_stride,
|
|||||||
uint8* dst_a, int dst_stride_a,
|
uint8* dst_a, int dst_stride_a,
|
||||||
uint8* dst_b, int dst_stride_b,
|
uint8* dst_b, int dst_stride_b,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
// loops are on blocks of 8. loop will stop when
|
// loops are on blocks of 8. loop will stop when
|
||||||
// counter gets to or below 0. starting the counter
|
// counter gets to or below 0. starting the counter
|
||||||
// at w-8 allow for this
|
// at w-8 allow for this
|
||||||
|
|||||||
@ -8,7 +8,7 @@
|
|||||||
* be found in the AUTHORS file in the root of the source tree.
|
* be found in the AUTHORS file in the root of the source tree.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "row.h"
|
#include "source/row.h"
|
||||||
|
|
||||||
#include "libyuv/basic_types.h"
|
#include "libyuv/basic_types.h"
|
||||||
#include <string.h> // For memcpy
|
#include <string.h> // For memcpy
|
||||||
|
|||||||
@ -8,7 +8,7 @@
|
|||||||
* be found in the AUTHORS file in the root of the source tree.
|
* be found in the AUTHORS file in the root of the source tree.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "row.h"
|
#include "source/row.h"
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
namespace libyuv {
|
namespace libyuv {
|
||||||
@ -61,7 +61,7 @@ void I420ToARGBRow_NEON(const uint8* y_buf,
|
|||||||
const uint8* v_buf,
|
const uint8* v_buf,
|
||||||
uint8* rgb_buf,
|
uint8* rgb_buf,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"vld1.u8 {d24}, [%5] \n"
|
"vld1.u8 {d24}, [%5] \n"
|
||||||
"vld1.u8 {d25}, [%6] \n"
|
"vld1.u8 {d25}, [%6] \n"
|
||||||
"vmov.u8 d26, #128 \n"
|
"vmov.u8 d26, #128 \n"
|
||||||
@ -93,7 +93,7 @@ void I420ToBGRARow_NEON(const uint8* y_buf,
|
|||||||
const uint8* v_buf,
|
const uint8* v_buf,
|
||||||
uint8* rgb_buf,
|
uint8* rgb_buf,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"vld1.u8 {d24}, [%5] \n"
|
"vld1.u8 {d24}, [%5] \n"
|
||||||
"vld1.u8 {d25}, [%6] \n"
|
"vld1.u8 {d25}, [%6] \n"
|
||||||
"vmov.u8 d26, #128 \n"
|
"vmov.u8 d26, #128 \n"
|
||||||
@ -126,7 +126,7 @@ void I420ToABGRRow_NEON(const uint8* y_buf,
|
|||||||
const uint8* v_buf,
|
const uint8* v_buf,
|
||||||
uint8* rgb_buf,
|
uint8* rgb_buf,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"vld1.u8 {d24}, [%5] \n"
|
"vld1.u8 {d24}, [%5] \n"
|
||||||
"vld1.u8 {d25}, [%6] \n"
|
"vld1.u8 {d25}, [%6] \n"
|
||||||
"vmov.u8 d26, #128 \n"
|
"vmov.u8 d26, #128 \n"
|
||||||
@ -157,7 +157,7 @@ YUVTORGB
|
|||||||
// Reads 16 pairs of UV and write even values to dst_u and odd to dst_v
|
// Reads 16 pairs of UV and write even values to dst_u and odd to dst_v
|
||||||
// Alignment requirement: 16 bytes for pointers, and multiple of 16 pixels.
|
// Alignment requirement: 16 bytes for pointers, and multiple of 16 pixels.
|
||||||
void SplitUV_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
|
void SplitUV_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"vld2.u8 {q0,q1}, [%0]! \n" // load 16 pairs of UV
|
"vld2.u8 {q0,q1}, [%0]! \n" // load 16 pairs of UV
|
||||||
"subs %3, %3, #16 \n" // 16 processed per loop
|
"subs %3, %3, #16 \n" // 16 processed per loop
|
||||||
@ -177,7 +177,7 @@ void SplitUV_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
|
|||||||
#ifdef HAS_COPYROW_NEON
|
#ifdef HAS_COPYROW_NEON
|
||||||
// Copy multiple of 64
|
// Copy multiple of 64
|
||||||
void CopyRow_NEON(const uint8* src, uint8* dst, int count) {
|
void CopyRow_NEON(const uint8* src, uint8* dst, int count) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"pld [%0, #0xC0] \n" // preload
|
"pld [%0, #0xC0] \n" // preload
|
||||||
"vldm %0!,{q0,q1,q2,q3} \n" // load 64
|
"vldm %0!,{q0,q1,q2,q3} \n" // load 64
|
||||||
@ -195,7 +195,7 @@ void CopyRow_NEON(const uint8* src, uint8* dst, int count) {
|
|||||||
|
|
||||||
#ifdef HAS_MIRRORROW_NEON
|
#ifdef HAS_MIRRORROW_NEON
|
||||||
void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
|
void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
// compute where to start writing destination
|
// compute where to start writing destination
|
||||||
"add %1, %2 \n"
|
"add %1, %2 \n"
|
||||||
// work on segments that are multiples of 16
|
// work on segments that are multiples of 16
|
||||||
@ -270,7 +270,7 @@ void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
|
|||||||
|
|
||||||
#ifdef HAS_MIRRORROWUV_NEON
|
#ifdef HAS_MIRRORROWUV_NEON
|
||||||
void MirrorRowUV_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width) {
|
void MirrorRowUV_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
// compute where to start writing destination
|
// compute where to start writing destination
|
||||||
"add %1, %3 \n" // dst_a + width
|
"add %1, %3 \n" // dst_a + width
|
||||||
"add %2, %3 \n" // dst_b + width
|
"add %2, %3 \n" // dst_b + width
|
||||||
|
|||||||
@ -8,7 +8,7 @@
|
|||||||
* be found in the AUTHORS file in the root of the source tree.
|
* be found in the AUTHORS file in the root of the source tree.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "row.h"
|
#include "source/row.h"
|
||||||
|
|
||||||
#include "libyuv/basic_types.h"
|
#include "libyuv/basic_types.h"
|
||||||
|
|
||||||
@ -109,7 +109,7 @@ CONST uvec8 kShuffleMaskARGBToRAW = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) {
|
void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||||
"pslld $0x18,%%xmm5 \n"
|
"pslld $0x18,%%xmm5 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -138,7 +138,7 @@ void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void ABGRToARGBRow_SSSE3(const uint8* src_abgr, uint8* dst_argb, int pix) {
|
void ABGRToARGBRow_SSSE3(const uint8* src_abgr, uint8* dst_argb, int pix) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"movdqa %3,%%xmm5 \n"
|
"movdqa %3,%%xmm5 \n"
|
||||||
"sub %0,%1 \n"
|
"sub %0,%1 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -161,7 +161,7 @@ void ABGRToARGBRow_SSSE3(const uint8* src_abgr, uint8* dst_argb, int pix) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void BGRAToARGBRow_SSSE3(const uint8* src_bgra, uint8* dst_argb, int pix) {
|
void BGRAToARGBRow_SSSE3(const uint8* src_bgra, uint8* dst_argb, int pix) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"movdqa %3,%%xmm5 \n"
|
"movdqa %3,%%xmm5 \n"
|
||||||
"sub %0,%1 \n"
|
"sub %0,%1 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -183,7 +183,7 @@ void BGRAToARGBRow_SSSE3(const uint8* src_bgra, uint8* dst_argb, int pix) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix) {
|
void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"pcmpeqb %%xmm5,%%xmm5 \n" // generate mask 0xff000000
|
"pcmpeqb %%xmm5,%%xmm5 \n" // generate mask 0xff000000
|
||||||
"pslld $0x18,%%xmm5 \n"
|
"pslld $0x18,%%xmm5 \n"
|
||||||
"movdqa %3,%%xmm4 \n"
|
"movdqa %3,%%xmm4 \n"
|
||||||
@ -223,7 +223,7 @@ void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix) {
|
void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"pcmpeqb %%xmm5,%%xmm5 \n" // generate mask 0xff000000
|
"pcmpeqb %%xmm5,%%xmm5 \n" // generate mask 0xff000000
|
||||||
"pslld $0x18,%%xmm5 \n"
|
"pslld $0x18,%%xmm5 \n"
|
||||||
"movdqa %3,%%xmm4 \n"
|
"movdqa %3,%%xmm4 \n"
|
||||||
@ -263,7 +263,7 @@ void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void RGB565ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) {
|
void RGB565ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"mov $0x1080108,%%eax \n"
|
"mov $0x1080108,%%eax \n"
|
||||||
"movd %%eax,%%xmm5 \n"
|
"movd %%eax,%%xmm5 \n"
|
||||||
"pshufd $0x0,%%xmm5,%%xmm5 \n"
|
"pshufd $0x0,%%xmm5,%%xmm5 \n"
|
||||||
@ -312,7 +312,7 @@ void RGB565ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void ARGB1555ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) {
|
void ARGB1555ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"mov $0x1080108,%%eax \n"
|
"mov $0x1080108,%%eax \n"
|
||||||
"movd %%eax,%%xmm5 \n"
|
"movd %%eax,%%xmm5 \n"
|
||||||
"pshufd $0x0,%%xmm5,%%xmm5 \n"
|
"pshufd $0x0,%%xmm5,%%xmm5 \n"
|
||||||
@ -364,7 +364,7 @@ void ARGB1555ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void ARGB4444ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) {
|
void ARGB4444ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"mov $0xf0f0f0f,%%eax \n"
|
"mov $0xf0f0f0f,%%eax \n"
|
||||||
"movd %%eax,%%xmm4 \n"
|
"movd %%eax,%%xmm4 \n"
|
||||||
"pshufd $0x0,%%xmm4,%%xmm4 \n"
|
"pshufd $0x0,%%xmm4,%%xmm4 \n"
|
||||||
@ -403,7 +403,7 @@ void ARGB4444ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void ARGBToRGB24Row_SSSE3(const uint8* src, uint8* dst, int pix) {
|
void ARGBToRGB24Row_SSSE3(const uint8* src, uint8* dst, int pix) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"movdqa %3,%%xmm6 \n"
|
"movdqa %3,%%xmm6 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movdqa (%0),%%xmm0 \n"
|
"movdqa (%0),%%xmm0 \n"
|
||||||
@ -443,7 +443,7 @@ void ARGBToRGB24Row_SSSE3(const uint8* src, uint8* dst, int pix) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void ARGBToRAWRow_SSSE3(const uint8* src, uint8* dst, int pix) {
|
void ARGBToRAWRow_SSSE3(const uint8* src, uint8* dst, int pix) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"movdqa %3,%%xmm6 \n"
|
"movdqa %3,%%xmm6 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movdqa (%0),%%xmm0 \n"
|
"movdqa (%0),%%xmm0 \n"
|
||||||
@ -483,7 +483,7 @@ void ARGBToRAWRow_SSSE3(const uint8* src, uint8* dst, int pix) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void ARGBToRGB565Row_SSE2(const uint8* src, uint8* dst, int pix) {
|
void ARGBToRGB565Row_SSE2(const uint8* src, uint8* dst, int pix) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"pcmpeqb %%xmm3,%%xmm3 \n"
|
"pcmpeqb %%xmm3,%%xmm3 \n"
|
||||||
"psrld $0x1b,%%xmm3 \n"
|
"psrld $0x1b,%%xmm3 \n"
|
||||||
"pcmpeqb %%xmm4,%%xmm4 \n"
|
"pcmpeqb %%xmm4,%%xmm4 \n"
|
||||||
@ -522,7 +522,7 @@ void ARGBToRGB565Row_SSE2(const uint8* src, uint8* dst, int pix) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void ARGBToARGB1555Row_SSE2(const uint8* src, uint8* dst, int pix) {
|
void ARGBToARGB1555Row_SSE2(const uint8* src, uint8* dst, int pix) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"pcmpeqb %%xmm4,%%xmm4 \n"
|
"pcmpeqb %%xmm4,%%xmm4 \n"
|
||||||
"psrld $0x1b,%%xmm4 \n"
|
"psrld $0x1b,%%xmm4 \n"
|
||||||
"movdqa %%xmm4,%%xmm5 \n"
|
"movdqa %%xmm4,%%xmm5 \n"
|
||||||
@ -565,7 +565,7 @@ void ARGBToARGB1555Row_SSE2(const uint8* src, uint8* dst, int pix) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void ARGBToARGB4444Row_SSE2(const uint8* src, uint8* dst, int pix) {
|
void ARGBToARGB4444Row_SSE2(const uint8* src, uint8* dst, int pix) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"pcmpeqb %%xmm4,%%xmm4 \n"
|
"pcmpeqb %%xmm4,%%xmm4 \n"
|
||||||
"psllw $0xc,%%xmm4 \n"
|
"psllw $0xc,%%xmm4 \n"
|
||||||
"movdqa %%xmm4,%%xmm3 \n"
|
"movdqa %%xmm4,%%xmm3 \n"
|
||||||
@ -596,7 +596,7 @@ void ARGBToARGB4444Row_SSE2(const uint8* src, uint8* dst, int pix) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
|
void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"movdqa %4,%%xmm5 \n"
|
"movdqa %4,%%xmm5 \n"
|
||||||
"movdqa %3,%%xmm4 \n"
|
"movdqa %3,%%xmm4 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -632,7 +632,7 @@ void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void ARGBToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
|
void ARGBToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"movdqa %4,%%xmm5 \n"
|
"movdqa %4,%%xmm5 \n"
|
||||||
"movdqa %3,%%xmm4 \n"
|
"movdqa %3,%%xmm4 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -674,7 +674,7 @@ void ARGBToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
|
|||||||
// and considered unsafe.
|
// and considered unsafe.
|
||||||
void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
||||||
uint8* dst_u, uint8* dst_v, int width) {
|
uint8* dst_u, uint8* dst_v, int width) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"movdqa %0,%%xmm4 \n"
|
"movdqa %0,%%xmm4 \n"
|
||||||
"movdqa %1,%%xmm3 \n"
|
"movdqa %1,%%xmm3 \n"
|
||||||
"movdqa %2,%%xmm5 \n"
|
"movdqa %2,%%xmm5 \n"
|
||||||
@ -687,7 +687,7 @@ void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
|||||||
"xmm3", "xmm4", "xmm5"
|
"xmm3", "xmm4", "xmm5"
|
||||||
#endif
|
#endif
|
||||||
);
|
);
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"sub %1,%2 \n"
|
"sub %1,%2 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movdqa (%0),%%xmm0 \n"
|
"movdqa (%0),%%xmm0 \n"
|
||||||
@ -738,7 +738,7 @@ void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
|||||||
|
|
||||||
void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
||||||
uint8* dst_u, uint8* dst_v, int width) {
|
uint8* dst_u, uint8* dst_v, int width) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"movdqa %0,%%xmm4 \n"
|
"movdqa %0,%%xmm4 \n"
|
||||||
"movdqa %1,%%xmm3 \n"
|
"movdqa %1,%%xmm3 \n"
|
||||||
"movdqa %2,%%xmm5 \n"
|
"movdqa %2,%%xmm5 \n"
|
||||||
@ -751,7 +751,7 @@ void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
|||||||
"xmm3", "xmm4", "xmm5"
|
"xmm3", "xmm4", "xmm5"
|
||||||
#endif
|
#endif
|
||||||
);
|
);
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"sub %1,%2 \n"
|
"sub %1,%2 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movdqu (%0),%%xmm0 \n"
|
"movdqu (%0),%%xmm0 \n"
|
||||||
@ -805,7 +805,7 @@ void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
|||||||
}
|
}
|
||||||
|
|
||||||
void BGRAToYRow_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix) {
|
void BGRAToYRow_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"movdqa %4,%%xmm5 \n"
|
"movdqa %4,%%xmm5 \n"
|
||||||
"movdqa %3,%%xmm4 \n"
|
"movdqa %3,%%xmm4 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -841,7 +841,7 @@ void BGRAToYRow_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void BGRAToYRow_Unaligned_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix) {
|
void BGRAToYRow_Unaligned_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"movdqa %4,%%xmm5 \n"
|
"movdqa %4,%%xmm5 \n"
|
||||||
"movdqa %3,%%xmm4 \n"
|
"movdqa %3,%%xmm4 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -878,7 +878,7 @@ void BGRAToYRow_Unaligned_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix) {
|
|||||||
|
|
||||||
void BGRAToUVRow_SSSE3(const uint8* src_bgra0, int src_stride_bgra,
|
void BGRAToUVRow_SSSE3(const uint8* src_bgra0, int src_stride_bgra,
|
||||||
uint8* dst_u, uint8* dst_v, int width) {
|
uint8* dst_u, uint8* dst_v, int width) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"movdqa %0,%%xmm4 \n"
|
"movdqa %0,%%xmm4 \n"
|
||||||
"movdqa %1,%%xmm3 \n"
|
"movdqa %1,%%xmm3 \n"
|
||||||
"movdqa %2,%%xmm5 \n"
|
"movdqa %2,%%xmm5 \n"
|
||||||
@ -891,7 +891,7 @@ void BGRAToUVRow_SSSE3(const uint8* src_bgra0, int src_stride_bgra,
|
|||||||
"xmm3", "xmm4", "xmm5"
|
"xmm3", "xmm4", "xmm5"
|
||||||
#endif
|
#endif
|
||||||
);
|
);
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"sub %1,%2 \n"
|
"sub %1,%2 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movdqa (%0),%%xmm0 \n"
|
"movdqa (%0),%%xmm0 \n"
|
||||||
@ -942,7 +942,7 @@ void BGRAToUVRow_SSSE3(const uint8* src_bgra0, int src_stride_bgra,
|
|||||||
|
|
||||||
void BGRAToUVRow_Unaligned_SSSE3(const uint8* src_bgra0, int src_stride_bgra,
|
void BGRAToUVRow_Unaligned_SSSE3(const uint8* src_bgra0, int src_stride_bgra,
|
||||||
uint8* dst_u, uint8* dst_v, int width) {
|
uint8* dst_u, uint8* dst_v, int width) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"movdqa %0,%%xmm4 \n"
|
"movdqa %0,%%xmm4 \n"
|
||||||
"movdqa %1,%%xmm3 \n"
|
"movdqa %1,%%xmm3 \n"
|
||||||
"movdqa %2,%%xmm5 \n"
|
"movdqa %2,%%xmm5 \n"
|
||||||
@ -955,7 +955,7 @@ void BGRAToUVRow_Unaligned_SSSE3(const uint8* src_bgra0, int src_stride_bgra,
|
|||||||
"xmm3", "xmm4", "xmm5"
|
"xmm3", "xmm4", "xmm5"
|
||||||
#endif
|
#endif
|
||||||
);
|
);
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"sub %1,%2 \n"
|
"sub %1,%2 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movdqu (%0),%%xmm0 \n"
|
"movdqu (%0),%%xmm0 \n"
|
||||||
@ -1009,7 +1009,7 @@ void BGRAToUVRow_Unaligned_SSSE3(const uint8* src_bgra0, int src_stride_bgra,
|
|||||||
}
|
}
|
||||||
|
|
||||||
void ABGRToYRow_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix) {
|
void ABGRToYRow_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"movdqa %4,%%xmm5 \n"
|
"movdqa %4,%%xmm5 \n"
|
||||||
"movdqa %3,%%xmm4 \n"
|
"movdqa %3,%%xmm4 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -1045,7 +1045,7 @@ void ABGRToYRow_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void ABGRToYRow_Unaligned_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix) {
|
void ABGRToYRow_Unaligned_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"movdqa %4,%%xmm5 \n"
|
"movdqa %4,%%xmm5 \n"
|
||||||
"movdqa %3,%%xmm4 \n"
|
"movdqa %3,%%xmm4 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -1082,7 +1082,7 @@ void ABGRToYRow_Unaligned_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix) {
|
|||||||
|
|
||||||
void ABGRToUVRow_SSSE3(const uint8* src_abgr0, int src_stride_abgr,
|
void ABGRToUVRow_SSSE3(const uint8* src_abgr0, int src_stride_abgr,
|
||||||
uint8* dst_u, uint8* dst_v, int width) {
|
uint8* dst_u, uint8* dst_v, int width) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"movdqa %0,%%xmm4 \n"
|
"movdqa %0,%%xmm4 \n"
|
||||||
"movdqa %1,%%xmm3 \n"
|
"movdqa %1,%%xmm3 \n"
|
||||||
"movdqa %2,%%xmm5 \n"
|
"movdqa %2,%%xmm5 \n"
|
||||||
@ -1095,7 +1095,7 @@ void ABGRToUVRow_SSSE3(const uint8* src_abgr0, int src_stride_abgr,
|
|||||||
"xmm3", "xmm4", "xmm5"
|
"xmm3", "xmm4", "xmm5"
|
||||||
#endif
|
#endif
|
||||||
);
|
);
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"sub %1,%2 \n"
|
"sub %1,%2 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movdqa (%0),%%xmm0 \n"
|
"movdqa (%0),%%xmm0 \n"
|
||||||
@ -1146,7 +1146,7 @@ void ABGRToUVRow_SSSE3(const uint8* src_abgr0, int src_stride_abgr,
|
|||||||
|
|
||||||
void ABGRToUVRow_Unaligned_SSSE3(const uint8* src_abgr0, int src_stride_abgr,
|
void ABGRToUVRow_Unaligned_SSSE3(const uint8* src_abgr0, int src_stride_abgr,
|
||||||
uint8* dst_u, uint8* dst_v, int width) {
|
uint8* dst_u, uint8* dst_v, int width) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"movdqa %0,%%xmm4 \n"
|
"movdqa %0,%%xmm4 \n"
|
||||||
"movdqa %1,%%xmm3 \n"
|
"movdqa %1,%%xmm3 \n"
|
||||||
"movdqa %2,%%xmm5 \n"
|
"movdqa %2,%%xmm5 \n"
|
||||||
@ -1159,7 +1159,7 @@ void ABGRToUVRow_Unaligned_SSSE3(const uint8* src_abgr0, int src_stride_abgr,
|
|||||||
"xmm3", "xmm4", "xmm5"
|
"xmm3", "xmm4", "xmm5"
|
||||||
#endif
|
#endif
|
||||||
);
|
);
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"sub %1,%2 \n"
|
"sub %1,%2 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movdqu (%0),%%xmm0 \n"
|
"movdqu (%0),%%xmm0 \n"
|
||||||
@ -1291,7 +1291,7 @@ void OMITFP I420ToARGBRow_SSSE3(const uint8* y_buf,
|
|||||||
const uint8* v_buf,
|
const uint8* v_buf,
|
||||||
uint8* rgb_buf,
|
uint8* rgb_buf,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"sub %1,%2 \n"
|
"sub %1,%2 \n"
|
||||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||||
"pxor %%xmm4,%%xmm4 \n"
|
"pxor %%xmm4,%%xmm4 \n"
|
||||||
@ -1325,7 +1325,7 @@ void OMITFP I420ToBGRARow_SSSE3(const uint8* y_buf,
|
|||||||
const uint8* v_buf,
|
const uint8* v_buf,
|
||||||
uint8* rgb_buf,
|
uint8* rgb_buf,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"sub %1,%2 \n"
|
"sub %1,%2 \n"
|
||||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||||
"pxor %%xmm4,%%xmm4 \n"
|
"pxor %%xmm4,%%xmm4 \n"
|
||||||
@ -1360,7 +1360,7 @@ void OMITFP I420ToABGRRow_SSSE3(const uint8* y_buf,
|
|||||||
const uint8* v_buf,
|
const uint8* v_buf,
|
||||||
uint8* rgb_buf,
|
uint8* rgb_buf,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"sub %1,%2 \n"
|
"sub %1,%2 \n"
|
||||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||||
"pxor %%xmm4,%%xmm4 \n"
|
"pxor %%xmm4,%%xmm4 \n"
|
||||||
@ -1394,7 +1394,7 @@ void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf,
|
|||||||
const uint8* v_buf,
|
const uint8* v_buf,
|
||||||
uint8* rgb_buf,
|
uint8* rgb_buf,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"sub %1,%2 \n"
|
"sub %1,%2 \n"
|
||||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||||
"pxor %%xmm4,%%xmm4 \n"
|
"pxor %%xmm4,%%xmm4 \n"
|
||||||
@ -1450,7 +1450,7 @@ void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf,
|
|||||||
void YToARGBRow_SSE2(const uint8* y_buf,
|
void YToARGBRow_SSE2(const uint8* y_buf,
|
||||||
uint8* rgb_buf,
|
uint8* rgb_buf,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"pcmpeqb %%xmm4,%%xmm4 \n"
|
"pcmpeqb %%xmm4,%%xmm4 \n"
|
||||||
"pslld $0x18,%%xmm4 \n"
|
"pslld $0x18,%%xmm4 \n"
|
||||||
"mov $0x10001000,%%eax \n"
|
"mov $0x10001000,%%eax \n"
|
||||||
@ -1501,7 +1501,7 @@ CONST uvec8 kShuffleMirror = {
|
|||||||
|
|
||||||
void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) {
|
void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) {
|
||||||
intptr_t temp_width = static_cast<intptr_t>(width);
|
intptr_t temp_width = static_cast<intptr_t>(width);
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"movdqa %3,%%xmm5 \n"
|
"movdqa %3,%%xmm5 \n"
|
||||||
"lea -0x10(%0),%0 \n"
|
"lea -0x10(%0),%0 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -1526,7 +1526,7 @@ void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) {
|
|||||||
#ifdef HAS_MIRRORROW_SSE2
|
#ifdef HAS_MIRRORROW_SSE2
|
||||||
void MirrorRow_SSE2(const uint8* src, uint8* dst, int width) {
|
void MirrorRow_SSE2(const uint8* src, uint8* dst, int width) {
|
||||||
intptr_t temp_width = static_cast<intptr_t>(width);
|
intptr_t temp_width = static_cast<intptr_t>(width);
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"lea -0x10(%0),%0 \n"
|
"lea -0x10(%0),%0 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movdqu (%0,%2),%%xmm0 \n"
|
"movdqu (%0,%2),%%xmm0 \n"
|
||||||
@ -1561,7 +1561,7 @@ CONST uvec8 kShuffleMirrorUV = {
|
|||||||
void MirrorRowUV_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v,
|
void MirrorRowUV_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v,
|
||||||
int width) {
|
int width) {
|
||||||
intptr_t temp_width = static_cast<intptr_t>(width);
|
intptr_t temp_width = static_cast<intptr_t>(width);
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"movdqa %4,%%xmm1 \n"
|
"movdqa %4,%%xmm1 \n"
|
||||||
"lea -16(%0,%3,2),%0 \n"
|
"lea -16(%0,%3,2),%0 \n"
|
||||||
"sub %1,%2 \n"
|
"sub %1,%2 \n"
|
||||||
@ -1589,7 +1589,7 @@ void MirrorRowUV_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v,
|
|||||||
|
|
||||||
#ifdef HAS_SPLITUV_SSE2
|
#ifdef HAS_SPLITUV_SSE2
|
||||||
void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
|
void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||||
"psrlw $0x8,%%xmm5 \n"
|
"psrlw $0x8,%%xmm5 \n"
|
||||||
"sub %1,%2 \n"
|
"sub %1,%2 \n"
|
||||||
@ -1625,7 +1625,7 @@ void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
|
|||||||
|
|
||||||
#ifdef HAS_COPYROW_SSE2
|
#ifdef HAS_COPYROW_SSE2
|
||||||
void CopyRow_SSE2(const uint8* src, uint8* dst, int count) {
|
void CopyRow_SSE2(const uint8* src, uint8* dst, int count) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"sub %0,%1 \n"
|
"sub %0,%1 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movdqa (%0),%%xmm0 \n"
|
"movdqa (%0),%%xmm0 \n"
|
||||||
@ -1650,7 +1650,7 @@ void CopyRow_SSE2(const uint8* src, uint8* dst, int count) {
|
|||||||
#ifdef HAS_COPYROW_X86
|
#ifdef HAS_COPYROW_X86
|
||||||
void CopyRow_X86(const uint8* src, uint8* dst, int width) {
|
void CopyRow_X86(const uint8* src, uint8* dst, int width) {
|
||||||
size_t width_tmp = static_cast<size_t>(width);
|
size_t width_tmp = static_cast<size_t>(width);
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"shr $0x2,%2 \n"
|
"shr $0x2,%2 \n"
|
||||||
"rep movsl \n"
|
"rep movsl \n"
|
||||||
: "+S"(src), // %0
|
: "+S"(src), // %0
|
||||||
@ -1664,7 +1664,7 @@ void CopyRow_X86(const uint8* src, uint8* dst, int width) {
|
|||||||
|
|
||||||
#ifdef HAS_YUY2TOYROW_SSE2
|
#ifdef HAS_YUY2TOYROW_SSE2
|
||||||
void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix) {
|
void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||||
"psrlw $0x8,%%xmm5 \n"
|
"psrlw $0x8,%%xmm5 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -1691,7 +1691,7 @@ void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix) {
|
|||||||
|
|
||||||
void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
|
void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
|
||||||
uint8* dst_u, uint8* dst_y, int pix) {
|
uint8* dst_u, uint8* dst_y, int pix) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||||
"psrlw $0x8,%%xmm5 \n"
|
"psrlw $0x8,%%xmm5 \n"
|
||||||
"sub %1,%2 \n"
|
"sub %1,%2 \n"
|
||||||
@ -1730,7 +1730,7 @@ void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
|
|||||||
|
|
||||||
void YUY2ToYRow_Unaligned_SSE2(const uint8* src_yuy2,
|
void YUY2ToYRow_Unaligned_SSE2(const uint8* src_yuy2,
|
||||||
uint8* dst_y, int pix) {
|
uint8* dst_y, int pix) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||||
"psrlw $0x8,%%xmm5 \n"
|
"psrlw $0x8,%%xmm5 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -1759,7 +1759,7 @@ void YUY2ToUVRow_Unaligned_SSE2(const uint8* src_yuy2,
|
|||||||
int stride_yuy2,
|
int stride_yuy2,
|
||||||
uint8* dst_u, uint8* dst_y,
|
uint8* dst_u, uint8* dst_y,
|
||||||
int pix) {
|
int pix) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||||
"psrlw $0x8,%%xmm5 \n"
|
"psrlw $0x8,%%xmm5 \n"
|
||||||
"sub %1,%2 \n"
|
"sub %1,%2 \n"
|
||||||
@ -1797,7 +1797,7 @@ void YUY2ToUVRow_Unaligned_SSE2(const uint8* src_yuy2,
|
|||||||
}
|
}
|
||||||
|
|
||||||
void UYVYToYRow_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix) {
|
void UYVYToYRow_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movdqa (%0),%%xmm0 \n"
|
"movdqa (%0),%%xmm0 \n"
|
||||||
"movdqa 0x10(%0),%%xmm1 \n"
|
"movdqa 0x10(%0),%%xmm1 \n"
|
||||||
@ -1822,7 +1822,7 @@ void UYVYToYRow_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix) {
|
|||||||
|
|
||||||
void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy,
|
void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy,
|
||||||
uint8* dst_u, uint8* dst_y, int pix) {
|
uint8* dst_u, uint8* dst_y, int pix) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||||
"psrlw $0x8,%%xmm5 \n"
|
"psrlw $0x8,%%xmm5 \n"
|
||||||
"sub %1,%2 \n"
|
"sub %1,%2 \n"
|
||||||
@ -1861,7 +1861,7 @@ void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy,
|
|||||||
|
|
||||||
void UYVYToYRow_Unaligned_SSE2(const uint8* src_uyvy,
|
void UYVYToYRow_Unaligned_SSE2(const uint8* src_uyvy,
|
||||||
uint8* dst_y, int pix) {
|
uint8* dst_y, int pix) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movdqu (%0),%%xmm0 \n"
|
"movdqu (%0),%%xmm0 \n"
|
||||||
"movdqu 0x10(%0),%%xmm1 \n"
|
"movdqu 0x10(%0),%%xmm1 \n"
|
||||||
@ -1886,7 +1886,7 @@ void UYVYToYRow_Unaligned_SSE2(const uint8* src_uyvy,
|
|||||||
|
|
||||||
void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy,
|
void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy,
|
||||||
uint8* dst_u, uint8* dst_y, int pix) {
|
uint8* dst_u, uint8* dst_y, int pix) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||||
"psrlw $0x8,%%xmm5 \n"
|
"psrlw $0x8,%%xmm5 \n"
|
||||||
"sub %1,%2 \n"
|
"sub %1,%2 \n"
|
||||||
@ -1929,7 +1929,7 @@ void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy,
|
|||||||
// Destination aligned to 16 bytes, multiple of 4 pixels
|
// Destination aligned to 16 bytes, multiple of 4 pixels
|
||||||
void ARGBBlendRow_Aligned_SSE2(const uint8* src_argb, uint8* dst_argb,
|
void ARGBBlendRow_Aligned_SSE2(const uint8* src_argb, uint8* dst_argb,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"pcmpeqb %%xmm7,%%xmm7 \n"
|
"pcmpeqb %%xmm7,%%xmm7 \n"
|
||||||
"psrlw $0xf,%%xmm7 \n"
|
"psrlw $0xf,%%xmm7 \n"
|
||||||
"pcmpeqb %%xmm6,%%xmm6 \n"
|
"pcmpeqb %%xmm6,%%xmm6 \n"
|
||||||
@ -1999,7 +1999,7 @@ void ARGBBlendRow_Aligned_SSE2(const uint8* src_argb, uint8* dst_argb,
|
|||||||
|
|
||||||
// Blend 1 pixel at a time, unaligned
|
// Blend 1 pixel at a time, unaligned
|
||||||
void ARGBBlendRow1_SSE2(const uint8* src_argb, uint8* dst_argb, int width) {
|
void ARGBBlendRow1_SSE2(const uint8* src_argb, uint8* dst_argb, int width) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"pcmpeqb %%xmm7,%%xmm7 \n"
|
"pcmpeqb %%xmm7,%%xmm7 \n"
|
||||||
"psrlw $0xf,%%xmm7 \n"
|
"psrlw $0xf,%%xmm7 \n"
|
||||||
"pcmpeqb %%xmm6,%%xmm6 \n"
|
"pcmpeqb %%xmm6,%%xmm6 \n"
|
||||||
|
|||||||
@ -8,7 +8,7 @@
|
|||||||
* be found in the AUTHORS file in the root of the source tree.
|
* be found in the AUTHORS file in the root of the source tree.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "row.h"
|
#include "source/row.h"
|
||||||
|
|
||||||
#if defined(_M_IX86)
|
#if defined(_M_IX86)
|
||||||
#include "emmintrin.h"
|
#include "emmintrin.h"
|
||||||
|
|||||||
@ -16,7 +16,7 @@
|
|||||||
|
|
||||||
#include "libyuv/cpu_id.h"
|
#include "libyuv/cpu_id.h"
|
||||||
#include "libyuv/planar_functions.h" // For CopyPlane
|
#include "libyuv/planar_functions.h" // For CopyPlane
|
||||||
#include "row.h"
|
#include "source/row.h"
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
namespace libyuv {
|
namespace libyuv {
|
||||||
@ -59,7 +59,7 @@ void SetUseReferenceImpl(bool use) {
|
|||||||
#define HAS_SCALEROWDOWN2_NEON
|
#define HAS_SCALEROWDOWN2_NEON
|
||||||
void ScaleRowDown2_NEON(const uint8* src_ptr, int /* src_stride */,
|
void ScaleRowDown2_NEON(const uint8* src_ptr, int /* src_stride */,
|
||||||
uint8* dst, int dst_width) {
|
uint8* dst, int dst_width) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"vld2.u8 {q0,q1}, [%0]! \n" // load even pixels into q0, odd into q1
|
"vld2.u8 {q0,q1}, [%0]! \n" // load even pixels into q0, odd into q1
|
||||||
"vst1.u8 {q0}, [%1]! \n" // store even pixels
|
"vst1.u8 {q0}, [%1]! \n" // store even pixels
|
||||||
@ -75,7 +75,7 @@ void ScaleRowDown2_NEON(const uint8* src_ptr, int /* src_stride */,
|
|||||||
|
|
||||||
void ScaleRowDown2Int_NEON(const uint8* src_ptr, int src_stride,
|
void ScaleRowDown2Int_NEON(const uint8* src_ptr, int src_stride,
|
||||||
uint8* dst, int dst_width) {
|
uint8* dst, int dst_width) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"add %1, %0 \n" // change the stride to row 2 pointer
|
"add %1, %0 \n" // change the stride to row 2 pointer
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"vld1.u8 {q0,q1}, [%0]! \n" // load row 1 and post increment
|
"vld1.u8 {q0,q1}, [%0]! \n" // load row 1 and post increment
|
||||||
@ -101,7 +101,7 @@ void ScaleRowDown2Int_NEON(const uint8* src_ptr, int src_stride,
|
|||||||
#define HAS_SCALEROWDOWN4_NEON
|
#define HAS_SCALEROWDOWN4_NEON
|
||||||
static void ScaleRowDown4_NEON(const uint8* src_ptr, int /* src_stride */,
|
static void ScaleRowDown4_NEON(const uint8* src_ptr, int /* src_stride */,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"vld2.u8 {d0, d1}, [%0]! \n"
|
"vld2.u8 {d0, d1}, [%0]! \n"
|
||||||
"vtrn.u8 d1, d0 \n"
|
"vtrn.u8 d1, d0 \n"
|
||||||
@ -120,7 +120,7 @@ static void ScaleRowDown4_NEON(const uint8* src_ptr, int /* src_stride */,
|
|||||||
|
|
||||||
static void ScaleRowDown4Int_NEON(const uint8* src_ptr, int src_stride,
|
static void ScaleRowDown4Int_NEON(const uint8* src_ptr, int src_stride,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"add r4, %0, %3 \n"
|
"add r4, %0, %3 \n"
|
||||||
"add r5, r4, %3 \n"
|
"add r5, r4, %3 \n"
|
||||||
"add %3, r5, %3 \n"
|
"add %3, r5, %3 \n"
|
||||||
@ -159,7 +159,7 @@ static void ScaleRowDown4Int_NEON(const uint8* src_ptr, int src_stride,
|
|||||||
// Point samples 32 pixels to 24 pixels.
|
// Point samples 32 pixels to 24 pixels.
|
||||||
static void ScaleRowDown34_NEON(const uint8* src_ptr, int /* src_stride */,
|
static void ScaleRowDown34_NEON(const uint8* src_ptr, int /* src_stride */,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"vld4.u8 {d0, d1, d2, d3}, [%0]! \n" // src line 0
|
"vld4.u8 {d0, d1, d2, d3}, [%0]! \n" // src line 0
|
||||||
"vmov d2, d3 \n" // order needs to be d0, d1, d2
|
"vmov d2, d3 \n" // order needs to be d0, d1, d2
|
||||||
@ -176,7 +176,7 @@ static void ScaleRowDown34_NEON(const uint8* src_ptr, int /* src_stride */,
|
|||||||
|
|
||||||
static void ScaleRowDown34_0_Int_NEON(const uint8* src_ptr, int src_stride,
|
static void ScaleRowDown34_0_Int_NEON(const uint8* src_ptr, int src_stride,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"vmov.u8 d24, #3 \n"
|
"vmov.u8 d24, #3 \n"
|
||||||
"add %3, %0 \n"
|
"add %3, %0 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -231,7 +231,7 @@ static void ScaleRowDown34_0_Int_NEON(const uint8* src_ptr, int src_stride,
|
|||||||
|
|
||||||
static void ScaleRowDown34_1_Int_NEON(const uint8* src_ptr, int src_stride,
|
static void ScaleRowDown34_1_Int_NEON(const uint8* src_ptr, int src_stride,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"vmov.u8 d24, #3 \n"
|
"vmov.u8 d24, #3 \n"
|
||||||
"add %3, %0 \n"
|
"add %3, %0 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -283,7 +283,7 @@ const unsigned short mult38_div9[8] __attribute__ ((aligned(16))) =
|
|||||||
// 32 -> 12
|
// 32 -> 12
|
||||||
static void ScaleRowDown38_NEON(const uint8* src_ptr, int,
|
static void ScaleRowDown38_NEON(const uint8* src_ptr, int,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"vld1.u8 {q3}, [%3] \n"
|
"vld1.u8 {q3}, [%3] \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"vld1.u8 {d0, d1, d2, d3}, [%0]! \n"
|
"vld1.u8 {d0, d1, d2, d3}, [%0]! \n"
|
||||||
@ -304,7 +304,7 @@ static void ScaleRowDown38_NEON(const uint8* src_ptr, int,
|
|||||||
// 32x3 -> 12x1
|
// 32x3 -> 12x1
|
||||||
static void ScaleRowDown38_3_Int_NEON(const uint8* src_ptr, int src_stride,
|
static void ScaleRowDown38_3_Int_NEON(const uint8* src_ptr, int src_stride,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"vld1.u16 {q13}, [%4] \n"
|
"vld1.u16 {q13}, [%4] \n"
|
||||||
"vld1.u8 {q14}, [%5] \n"
|
"vld1.u8 {q14}, [%5] \n"
|
||||||
"vld1.u8 {q15}, [%6] \n"
|
"vld1.u8 {q15}, [%6] \n"
|
||||||
@ -413,7 +413,7 @@ static void ScaleRowDown38_3_Int_NEON(const uint8* src_ptr, int src_stride,
|
|||||||
// 32x2 -> 12x1
|
// 32x2 -> 12x1
|
||||||
static void ScaleRowDown38_2_Int_NEON(const uint8* src_ptr, int src_stride,
|
static void ScaleRowDown38_2_Int_NEON(const uint8* src_ptr, int src_stride,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"vld1.u16 {q13}, [%4] \n"
|
"vld1.u16 {q13}, [%4] \n"
|
||||||
"vld1.u8 {q14}, [%5] \n"
|
"vld1.u8 {q14}, [%5] \n"
|
||||||
"add %3, %0 \n"
|
"add %3, %0 \n"
|
||||||
@ -508,7 +508,7 @@ static void ScaleRowDown38_2_Int_NEON(const uint8* src_ptr, int src_stride,
|
|||||||
static void ScaleFilterRows_NEON(uint8* dst_ptr,
|
static void ScaleFilterRows_NEON(uint8* dst_ptr,
|
||||||
const uint8* src_ptr, int src_stride,
|
const uint8* src_ptr, int src_stride,
|
||||||
int dst_width, int source_y_fraction) {
|
int dst_width, int source_y_fraction) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"cmp %4, #0 \n"
|
"cmp %4, #0 \n"
|
||||||
"beq 2f \n"
|
"beq 2f \n"
|
||||||
"add %2, %1 \n"
|
"add %2, %1 \n"
|
||||||
@ -1555,7 +1555,7 @@ static void ScaleFilterCols34_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
|
|||||||
#define HAS_SCALEROWDOWN2_SSE2
|
#define HAS_SCALEROWDOWN2_SSE2
|
||||||
static void ScaleRowDown2_SSE2(const uint8* src_ptr, int src_stride,
|
static void ScaleRowDown2_SSE2(const uint8* src_ptr, int src_stride,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||||
"psrlw $0x8,%%xmm5 \n"
|
"psrlw $0x8,%%xmm5 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -1579,7 +1579,7 @@ static void ScaleRowDown2_SSE2(const uint8* src_ptr, int src_stride,
|
|||||||
|
|
||||||
static void ScaleRowDown2Int_SSE2(const uint8* src_ptr, int src_stride,
|
static void ScaleRowDown2Int_SSE2(const uint8* src_ptr, int src_stride,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||||
"psrlw $0x8,%%xmm5 \n"
|
"psrlw $0x8,%%xmm5 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -1614,7 +1614,7 @@ static void ScaleRowDown2Int_SSE2(const uint8* src_ptr, int src_stride,
|
|||||||
#define HAS_SCALEROWDOWN4_SSE2
|
#define HAS_SCALEROWDOWN4_SSE2
|
||||||
static void ScaleRowDown4_SSE2(const uint8* src_ptr, int src_stride,
|
static void ScaleRowDown4_SSE2(const uint8* src_ptr, int src_stride,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||||
"psrld $0x18,%%xmm5 \n"
|
"psrld $0x18,%%xmm5 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -1640,7 +1640,7 @@ static void ScaleRowDown4_SSE2(const uint8* src_ptr, int src_stride,
|
|||||||
static void ScaleRowDown4Int_SSE2(const uint8* src_ptr, int src_stride,
|
static void ScaleRowDown4Int_SSE2(const uint8* src_ptr, int src_stride,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
intptr_t temp = 0;
|
intptr_t temp = 0;
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"pcmpeqb %%xmm7,%%xmm7 \n"
|
"pcmpeqb %%xmm7,%%xmm7 \n"
|
||||||
"psrlw $0x8,%%xmm7 \n"
|
"psrlw $0x8,%%xmm7 \n"
|
||||||
"lea (%4,%4,2),%3 \n"
|
"lea (%4,%4,2),%3 \n"
|
||||||
@ -1693,7 +1693,7 @@ static void ScaleRowDown4Int_SSE2(const uint8* src_ptr, int src_stride,
|
|||||||
#define HAS_SCALEROWDOWN8_SSE2
|
#define HAS_SCALEROWDOWN8_SSE2
|
||||||
static void ScaleRowDown8_SSE2(const uint8* src_ptr, int src_stride,
|
static void ScaleRowDown8_SSE2(const uint8* src_ptr, int src_stride,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||||
"psrlq $0x38,%%xmm5 \n"
|
"psrlq $0x38,%%xmm5 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -1722,7 +1722,7 @@ static void ScaleAddRows_SSE2(const uint8* src_ptr, int src_stride,
|
|||||||
uint16* dst_ptr, int src_width, int src_height) {
|
uint16* dst_ptr, int src_width, int src_height) {
|
||||||
int tmp_height = 0;
|
int tmp_height = 0;
|
||||||
intptr_t tmp_src = 0;
|
intptr_t tmp_src = 0;
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"pxor %%xmm4,%%xmm4 \n"
|
"pxor %%xmm4,%%xmm4 \n"
|
||||||
"sub $0x1,%5 \n"
|
"sub $0x1,%5 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -2263,7 +2263,7 @@ extern "C" void ScaleFilterRows_SSSE3(uint8* dst_ptr,
|
|||||||
#elif defined(__x86_64__)
|
#elif defined(__x86_64__)
|
||||||
static void ScaleRowDown8Int_SSE2(const uint8* src_ptr, int src_stride,
|
static void ScaleRowDown8Int_SSE2(const uint8* src_ptr, int src_stride,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"lea (%3,%3,2),%%r10 \n"
|
"lea (%3,%3,2),%%r10 \n"
|
||||||
"pxor %%xmm7,%%xmm7 \n"
|
"pxor %%xmm7,%%xmm7 \n"
|
||||||
"1:"
|
"1:"
|
||||||
@ -2322,7 +2322,7 @@ static void ScaleRowDown8Int_SSE2(const uint8* src_ptr, int src_stride,
|
|||||||
#define HAS_SCALEROWDOWN34_SSSE3
|
#define HAS_SCALEROWDOWN34_SSSE3
|
||||||
static void ScaleRowDown34_SSSE3(const uint8* src_ptr, int src_stride,
|
static void ScaleRowDown34_SSSE3(const uint8* src_ptr, int src_stride,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"movdqa (%3),%%xmm3 \n"
|
"movdqa (%3),%%xmm3 \n"
|
||||||
"movdqa (%4),%%xmm4 \n"
|
"movdqa (%4),%%xmm4 \n"
|
||||||
"movdqa (%5),%%xmm5 \n"
|
"movdqa (%5),%%xmm5 \n"
|
||||||
@ -2353,7 +2353,7 @@ static void ScaleRowDown34_SSSE3(const uint8* src_ptr, int src_stride,
|
|||||||
|
|
||||||
static void ScaleRowDown34_1_Int_SSSE3(const uint8* src_ptr, int src_stride,
|
static void ScaleRowDown34_1_Int_SSSE3(const uint8* src_ptr, int src_stride,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"movdqa (%4),%%xmm2 \n" // _shuf01
|
"movdqa (%4),%%xmm2 \n" // _shuf01
|
||||||
"movdqa (%5),%%xmm3 \n" // _shuf11
|
"movdqa (%5),%%xmm3 \n" // _shuf11
|
||||||
"movdqa (%6),%%xmm4 \n" // _shuf21
|
"movdqa (%6),%%xmm4 \n" // _shuf21
|
||||||
@ -2410,7 +2410,7 @@ static void ScaleRowDown34_1_Int_SSSE3(const uint8* src_ptr, int src_stride,
|
|||||||
|
|
||||||
static void ScaleRowDown34_0_Int_SSSE3(const uint8* src_ptr, int src_stride,
|
static void ScaleRowDown34_0_Int_SSSE3(const uint8* src_ptr, int src_stride,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"movdqa (%4),%%xmm2 \n" // _shuf01
|
"movdqa (%4),%%xmm2 \n" // _shuf01
|
||||||
"movdqa (%5),%%xmm3 \n" // _shuf11
|
"movdqa (%5),%%xmm3 \n" // _shuf11
|
||||||
"movdqa (%6),%%xmm4 \n" // _shuf21
|
"movdqa (%6),%%xmm4 \n" // _shuf21
|
||||||
@ -2471,7 +2471,7 @@ static void ScaleRowDown34_0_Int_SSSE3(const uint8* src_ptr, int src_stride,
|
|||||||
#define HAS_SCALEROWDOWN38_SSSE3
|
#define HAS_SCALEROWDOWN38_SSSE3
|
||||||
static void ScaleRowDown38_SSSE3(const uint8* src_ptr, int src_stride,
|
static void ScaleRowDown38_SSSE3(const uint8* src_ptr, int src_stride,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"movdqa (%3),%%xmm4 \n"
|
"movdqa (%3),%%xmm4 \n"
|
||||||
"movdqa (%4),%%xmm5 \n"
|
"movdqa (%4),%%xmm5 \n"
|
||||||
"1:"
|
"1:"
|
||||||
@ -2498,7 +2498,7 @@ static void ScaleRowDown38_SSSE3(const uint8* src_ptr, int src_stride,
|
|||||||
|
|
||||||
static void ScaleRowDown38_3_Int_SSSE3(const uint8* src_ptr, int src_stride,
|
static void ScaleRowDown38_3_Int_SSSE3(const uint8* src_ptr, int src_stride,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"movdqa (%4),%%xmm4 \n"
|
"movdqa (%4),%%xmm4 \n"
|
||||||
"movdqa (%5),%%xmm5 \n"
|
"movdqa (%5),%%xmm5 \n"
|
||||||
"movdqa (%6),%%xmm6 \n"
|
"movdqa (%6),%%xmm6 \n"
|
||||||
@ -2555,7 +2555,7 @@ static void ScaleRowDown38_3_Int_SSSE3(const uint8* src_ptr, int src_stride,
|
|||||||
|
|
||||||
static void ScaleRowDown38_2_Int_SSSE3(const uint8* src_ptr, int src_stride,
|
static void ScaleRowDown38_2_Int_SSSE3(const uint8* src_ptr, int src_stride,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"movdqa (%4),%%xmm4 \n"
|
"movdqa (%4),%%xmm4 \n"
|
||||||
"movdqa (%5),%%xmm5 \n"
|
"movdqa (%5),%%xmm5 \n"
|
||||||
"movdqa (%6),%%xmm6 \n"
|
"movdqa (%6),%%xmm6 \n"
|
||||||
@ -2597,7 +2597,7 @@ static void ScaleFilterRows_SSE2(uint8* dst_ptr,
|
|||||||
const uint8* src_ptr, int src_stride,
|
const uint8* src_ptr, int src_stride,
|
||||||
int dst_width, int source_y_fraction) {
|
int dst_width, int source_y_fraction) {
|
||||||
if (source_y_fraction == 0) {
|
if (source_y_fraction == 0) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"1:"
|
"1:"
|
||||||
"movdqa (%1),%%xmm0 \n"
|
"movdqa (%1),%%xmm0 \n"
|
||||||
"lea 0x10(%1),%1 \n"
|
"lea 0x10(%1),%1 \n"
|
||||||
@ -2615,7 +2615,7 @@ static void ScaleFilterRows_SSE2(uint8* dst_ptr,
|
|||||||
);
|
);
|
||||||
return;
|
return;
|
||||||
} else if (source_y_fraction == 128) {
|
} else if (source_y_fraction == 128) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"1:"
|
"1:"
|
||||||
"movdqa (%1),%%xmm0 \n"
|
"movdqa (%1),%%xmm0 \n"
|
||||||
"movdqa (%1,%3,1),%%xmm2 \n"
|
"movdqa (%1,%3,1),%%xmm2 \n"
|
||||||
@ -2635,7 +2635,7 @@ static void ScaleFilterRows_SSE2(uint8* dst_ptr,
|
|||||||
);
|
);
|
||||||
return;
|
return;
|
||||||
} else {
|
} else {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"mov %3,%%eax \n"
|
"mov %3,%%eax \n"
|
||||||
"movd %%eax,%%xmm6 \n"
|
"movd %%eax,%%xmm6 \n"
|
||||||
"punpcklwd %%xmm6,%%xmm6 \n"
|
"punpcklwd %%xmm6,%%xmm6 \n"
|
||||||
@ -2688,7 +2688,7 @@ static void ScaleFilterRows_SSSE3(uint8* dst_ptr,
|
|||||||
const uint8* src_ptr, int src_stride,
|
const uint8* src_ptr, int src_stride,
|
||||||
int dst_width, int source_y_fraction) {
|
int dst_width, int source_y_fraction) {
|
||||||
if (source_y_fraction <= 1) {
|
if (source_y_fraction <= 1) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"1:"
|
"1:"
|
||||||
"movdqa (%1),%%xmm0 \n"
|
"movdqa (%1),%%xmm0 \n"
|
||||||
"lea 0x10(%1),%1 \n"
|
"lea 0x10(%1),%1 \n"
|
||||||
@ -2706,7 +2706,7 @@ static void ScaleFilterRows_SSSE3(uint8* dst_ptr,
|
|||||||
);
|
);
|
||||||
return;
|
return;
|
||||||
} else if (source_y_fraction == 128) {
|
} else if (source_y_fraction == 128) {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"1:"
|
"1:"
|
||||||
"movdqa (%1),%%xmm0 \n"
|
"movdqa (%1),%%xmm0 \n"
|
||||||
"movdqa (%1,%3,1),%%xmm2 \n"
|
"movdqa (%1,%3,1),%%xmm2 \n"
|
||||||
@ -2726,7 +2726,7 @@ static void ScaleFilterRows_SSSE3(uint8* dst_ptr,
|
|||||||
);
|
);
|
||||||
return;
|
return;
|
||||||
} else {
|
} else {
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"mov %3,%%eax \n"
|
"mov %3,%%eax \n"
|
||||||
"shr %%eax \n"
|
"shr %%eax \n"
|
||||||
"mov %%al,%%ah \n"
|
"mov %%al,%%ah \n"
|
||||||
|
|||||||
@ -8,12 +8,11 @@
|
|||||||
* be found in the AUTHORS file in the root of the source tree.
|
* be found in the AUTHORS file in the root of the source tree.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "unit_test.h"
|
|
||||||
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
|
|
||||||
|
#include "unit_test/unit_test.h"
|
||||||
#include "libyuv/basic_types.h"
|
#include "libyuv/basic_types.h"
|
||||||
#include "libyuv/compare.h"
|
#include "libyuv/compare.h"
|
||||||
#include "libyuv/cpu_id.h"
|
#include "libyuv/cpu_id.h"
|
||||||
@ -153,7 +152,7 @@ TEST_F(libyuvTest, BenchmarkPsnr_C) {
|
|||||||
_benchmark_width, _benchmark_height);
|
_benchmark_width, _benchmark_height);
|
||||||
|
|
||||||
c_time = (get_time() - c_time) / _benchmark_iterations;
|
c_time = (get_time() - c_time) / _benchmark_iterations;
|
||||||
printf ("BenchmarkPsnr_C - %8d us c\n", (int)(c_time*1e6));
|
printf("BenchmarkPsnr_C - %8.2f us c\n", c_time * 1e6);
|
||||||
|
|
||||||
MaskCpuFlags(-1);
|
MaskCpuFlags(-1);
|
||||||
|
|
||||||
@ -176,7 +175,7 @@ TEST_F(libyuvTest, BenchmarkPsnr_OPT) {
|
|||||||
_benchmark_width, _benchmark_height);
|
_benchmark_width, _benchmark_height);
|
||||||
|
|
||||||
opt_time = (get_time() - opt_time) / _benchmark_iterations;
|
opt_time = (get_time() - opt_time) / _benchmark_iterations;
|
||||||
printf ("BenchmarkPsnr_OPT - %8d us opt\n", (int)(opt_time*1e6));
|
printf("BenchmarkPsnr_OPT - %8.2f us opt\n", opt_time * 1e6);
|
||||||
|
|
||||||
EXPECT_EQ(0, 0);
|
EXPECT_EQ(0, 0);
|
||||||
|
|
||||||
@ -274,7 +273,7 @@ TEST_F(libyuvTest, BenchmarkSsim_C) {
|
|||||||
_benchmark_width, _benchmark_height);
|
_benchmark_width, _benchmark_height);
|
||||||
|
|
||||||
c_time = (get_time() - c_time) / _benchmark_iterations;
|
c_time = (get_time() - c_time) / _benchmark_iterations;
|
||||||
printf ("BenchmarkSsim_C - %8d us c\n", (int)(c_time*1e6));
|
printf("BenchmarkSsim_C - %8.2f us c\n", c_time * 1e6);
|
||||||
|
|
||||||
MaskCpuFlags(-1);
|
MaskCpuFlags(-1);
|
||||||
|
|
||||||
@ -297,7 +296,7 @@ TEST_F(libyuvTest, BenchmarkSsim_OPT) {
|
|||||||
_benchmark_width, _benchmark_height);
|
_benchmark_width, _benchmark_height);
|
||||||
|
|
||||||
opt_time = (get_time() - opt_time) / _benchmark_iterations;
|
opt_time = (get_time() - opt_time) / _benchmark_iterations;
|
||||||
printf ("BenchmarkPsnr_OPT - %8d us opt\n", (int)(opt_time*1e6));
|
printf("BenchmarkPsnr_OPT - %8.2f us opt\n", opt_time * 1e6);
|
||||||
|
|
||||||
EXPECT_EQ(0, 0);
|
EXPECT_EQ(0, 0);
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user