mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2026-01-01 03:12:16 +08:00
Mirror a plane at a time so each can check cpu/alignment independently
BUG=none TEST=none Review URL: https://webrtc-codereview.appspot.com/370001 git-svn-id: http://libyuv.googlecode.com/svn/trunk@148 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
ba03e4d99e
commit
42831e0aae
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 146
|
||||
Version: 147
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -278,6 +278,74 @@ int I420Copy(const uint8* src_y, int src_stride_y,
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Mirror a plane of data
|
||||
void MirrorPlane(const uint8* src_y, int src_stride_y,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
int width, int height) {
|
||||
void (*MirrorRow)(const uint8* src, uint8* dst, int width);
|
||||
#if defined(HAS_MIRRORROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
|
||||
MirrorRow = MirrorRow_NEON;
|
||||
} else
|
||||
#endif
|
||||
#if defined(HAS_MIRRORROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) &&
|
||||
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16)) {
|
||||
MirrorRow = MirrorRow_SSSE3;
|
||||
} else
|
||||
#endif
|
||||
#if defined(HAS_MIRRORROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16)) {
|
||||
MirrorRow = MirrorRow_SSE2;
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
MirrorRow = MirrorRow_C;
|
||||
}
|
||||
|
||||
// Mirror plane
|
||||
for (int y = 0; y < height; ++y) {
|
||||
MirrorRow(src_y, dst_y, width);
|
||||
src_y += src_stride_y;
|
||||
dst_y += dst_stride_y;
|
||||
}
|
||||
}
|
||||
|
||||
// Mirror I420 with optional flipping
|
||||
int I420Mirror(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height) {
|
||||
if (!src_y || !src_u || !src_v ||
|
||||
!dst_y || !dst_u || !dst_v ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
int halfheight = (height + 1) >> 1;
|
||||
src_y = src_y + (height - 1) * src_stride_y;
|
||||
src_u = src_u + (halfheight - 1) * src_stride_u;
|
||||
src_v = src_v + (halfheight - 1) * src_stride_v;
|
||||
src_stride_y = -src_stride_y;
|
||||
src_stride_u = -src_stride_u;
|
||||
src_stride_v = -src_stride_v;
|
||||
}
|
||||
|
||||
int halfwidth = (width + 1) >> 1;
|
||||
int halfheight = (height + 1) >> 1;
|
||||
if (dst_y) {
|
||||
MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
|
||||
}
|
||||
MirrorPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight);
|
||||
MirrorPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Copy ARGB with optional flipping
|
||||
int ARGBCopy(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
@ -299,89 +367,6 @@ int ARGBCopy(const uint8* src_argb, int src_stride_argb,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int I420Mirror(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height) {
|
||||
if (!src_y || !src_u || !src_v ||
|
||||
!dst_y || !dst_u || !dst_v ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
int halfwidth = (width + 1) >> 1;
|
||||
int halfheight = (height + 1) >> 1;
|
||||
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
halfheight = (height + 1) >> 1;
|
||||
src_y = src_y + (height - 1) * src_stride_y;
|
||||
src_u = src_u + (halfheight - 1) * src_stride_u;
|
||||
src_v = src_v + (halfheight - 1) * src_stride_v;
|
||||
src_stride_y = -src_stride_y;
|
||||
src_stride_u = -src_stride_u;
|
||||
src_stride_v = -src_stride_v;
|
||||
}
|
||||
void (*ReverseRow)(const uint8* src, uint8* dst, int width);
|
||||
#if defined(HAS_REVERSE_ROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) &&
|
||||
IS_ALIGNED(width, 32)) {
|
||||
ReverseRow = ReverseRow_NEON;
|
||||
} else
|
||||
#endif
|
||||
#if defined(HAS_REVERSE_ROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) &&
|
||||
IS_ALIGNED(width, 32) &&
|
||||
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
|
||||
IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) &&
|
||||
IS_ALIGNED(src_v, 16) && IS_ALIGNED(src_stride_v, 16) &&
|
||||
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16) &&
|
||||
IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) &&
|
||||
IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) {
|
||||
ReverseRow = ReverseRow_SSSE3;
|
||||
} else
|
||||
#endif
|
||||
#if defined(HAS_REVERSE_ROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2) &&
|
||||
IS_ALIGNED(width, 32) &&
|
||||
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
|
||||
IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) &&
|
||||
IS_ALIGNED(src_v, 16) && IS_ALIGNED(src_stride_v, 16) &&
|
||||
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16) &&
|
||||
IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) &&
|
||||
IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) {
|
||||
ReverseRow = ReverseRow_SSE2;
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
ReverseRow = ReverseRow_C;
|
||||
}
|
||||
|
||||
// Y Plane
|
||||
int y;
|
||||
for (y = 0; y < height; ++y) {
|
||||
ReverseRow(src_y, dst_y, width);
|
||||
src_y += src_stride_y;
|
||||
dst_y += dst_stride_y;
|
||||
}
|
||||
// U Plane
|
||||
for (y = 0; y < halfheight; ++y) {
|
||||
ReverseRow(src_u, dst_u, halfwidth);
|
||||
src_u += src_stride_u;
|
||||
dst_u += dst_stride_u;
|
||||
}
|
||||
// V Plane
|
||||
for (y = 0; y < halfheight; ++y) {
|
||||
ReverseRow(src_v, dst_v, halfwidth);
|
||||
src_v += src_stride_v;
|
||||
dst_v += dst_stride_v;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if defined(_M_IX86) && !defined(YUV_DISABLE_ASM)
|
||||
#define HAS_HALFROW_SSE2
|
||||
__declspec(naked)
|
||||
|
||||
@ -24,7 +24,7 @@ extern "C" {
|
||||
!defined(YUV_DISABLE_ASM)
|
||||
// Note static const preferred, but gives internal compiler error on gcc 4.2
|
||||
// Shuffle table for reversing the bytes of UV channels.
|
||||
uvec8 kShuffleReverseUV = {
|
||||
uvec8 kShuffleMirrorUV = {
|
||||
14u, 12u, 10u, 8u, 6u, 4u, 2u, 0u, 15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u
|
||||
};
|
||||
|
||||
@ -47,7 +47,7 @@ uvec8 kShuffleReverseUV = {
|
||||
#endif
|
||||
#endif
|
||||
|
||||
typedef void (*reverse_uv_func)(const uint8*, uint8*, uint8*, int);
|
||||
typedef void (*mirror_uv_func)(const uint8*, uint8*, uint8*, int);
|
||||
typedef void (*rotate_uv_wx8_func)(const uint8*, int,
|
||||
uint8*, int,
|
||||
uint8*, int, int);
|
||||
@ -58,10 +58,10 @@ typedef void (*rotate_wx8_func)(const uint8*, int, uint8*, int, int);
|
||||
typedef void (*rotate_wxh_func)(const uint8*, int, uint8*, int, int, int);
|
||||
|
||||
#ifdef __ARM_NEON__
|
||||
#define HAS_REVERSE_ROW_NEON
|
||||
void ReverseRow_NEON(const uint8* src, uint8* dst, int width);
|
||||
#define HAS_REVERSE_ROW_UV_NEON
|
||||
void ReverseRowUV_NEON(const uint8* src,
|
||||
#define HAS_MIRRORROW_NEON
|
||||
void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
|
||||
#define HAS_MIRRORROW_UV_NEON
|
||||
void MirrorRowUV_NEON(const uint8* src,
|
||||
uint8* dst_a, uint8* dst_b,
|
||||
int width);
|
||||
#define HAS_TRANSPOSE_WX8_NEON
|
||||
@ -852,37 +852,37 @@ void RotatePlane270(const uint8* src, int src_stride,
|
||||
void RotatePlane180(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride,
|
||||
int width, int height) {
|
||||
void (*ReverseRow)(const uint8* src, uint8* dst, int width);
|
||||
#if defined(HAS_REVERSE_ROW_NEON)
|
||||
void (*MirrorRow)(const uint8* src, uint8* dst, int width);
|
||||
#if defined(HAS_MIRRORROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
ReverseRow = ReverseRow_NEON;
|
||||
MirrorRow = MirrorRow_NEON;
|
||||
} else
|
||||
#endif
|
||||
#if defined(HAS_REVERSE_ROW_SSSE3)
|
||||
#if defined(HAS_MIRRORROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) &&
|
||||
IS_ALIGNED(width, 16) &&
|
||||
IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
|
||||
IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
|
||||
ReverseRow = ReverseRow_SSSE3;
|
||||
MirrorRow = MirrorRow_SSSE3;
|
||||
} else
|
||||
#endif
|
||||
#if defined(HAS_REVERSE_ROW_SSE2)
|
||||
#if defined(HAS_MIRRORROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2) &&
|
||||
IS_ALIGNED(width, 16) &&
|
||||
IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
|
||||
IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
|
||||
ReverseRow = ReverseRow_SSE2;
|
||||
MirrorRow = MirrorRow_SSE2;
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
ReverseRow = ReverseRow_C;
|
||||
MirrorRow = MirrorRow_C;
|
||||
}
|
||||
|
||||
// Rotate by 180 is a mirror and vertical flip
|
||||
src += src_stride * (height - 1);
|
||||
|
||||
for (int y = 0; y < height; ++y) {
|
||||
ReverseRow(src, dst, width);
|
||||
MirrorRow(src, dst, width);
|
||||
src -= src_stride;
|
||||
dst += dst_stride;
|
||||
}
|
||||
@ -1004,9 +1004,9 @@ void RotateUV270(const uint8* src, int src_stride,
|
||||
}
|
||||
|
||||
#if defined(_M_IX86) && !defined(YUV_DISABLE_ASM)
|
||||
#define HAS_REVERSE_ROW_UV_SSSE3
|
||||
#define HAS_MIRRORROW_UV_SSSE3
|
||||
__declspec(naked)
|
||||
void ReverseRowUV_SSSE3(const uint8* src,
|
||||
void MirrorRowUV_SSSE3(const uint8* src,
|
||||
uint8* dst_a, uint8* dst_b,
|
||||
int width) {
|
||||
__asm {
|
||||
@ -1015,7 +1015,7 @@ __asm {
|
||||
mov edx, [esp + 4 + 8] // dst_a
|
||||
mov edi, [esp + 4 + 12] // dst_b
|
||||
mov ecx, [esp + 4 + 16] // width
|
||||
movdqa xmm5, kShuffleReverseUV
|
||||
movdqa xmm5, kShuffleMirrorUV
|
||||
lea eax, [eax + ecx * 2 - 16]
|
||||
|
||||
convertloop:
|
||||
@ -1035,8 +1035,8 @@ __asm {
|
||||
|
||||
#elif (defined(__i386__) || defined(__x86_64__)) && \
|
||||
!defined(YUV_DISABLE_ASM)
|
||||
#define HAS_REVERSE_ROW_UV_SSSE3
|
||||
void ReverseRowUV_SSSE3(const uint8* src,
|
||||
#define HAS_MIRRORROW_UV_SSSE3
|
||||
void MirrorRowUV_SSSE3(const uint8* src,
|
||||
uint8* dst_a, uint8* dst_b,
|
||||
int width) {
|
||||
intptr_t temp_width = static_cast<intptr_t>(width);
|
||||
@ -1057,7 +1057,7 @@ void ReverseRowUV_SSSE3(const uint8* src,
|
||||
"+r"(dst_a), // %1
|
||||
"+r"(dst_b), // %2
|
||||
"+r"(temp_width) // %3
|
||||
: "m"(kShuffleReverseUV) // %4
|
||||
: "m"(kShuffleMirrorUV) // %4
|
||||
: "memory", "cc"
|
||||
#if defined(__SSE2__)
|
||||
, "xmm0", "xmm5"
|
||||
@ -1066,7 +1066,7 @@ void ReverseRowUV_SSSE3(const uint8* src,
|
||||
}
|
||||
#endif
|
||||
|
||||
static void ReverseRowUV_C(const uint8* src,
|
||||
static void MirrorRowUV_C(const uint8* src,
|
||||
uint8* dst_a, uint8* dst_b,
|
||||
int width) {
|
||||
int i;
|
||||
@ -1083,29 +1083,29 @@ void RotateUV180(const uint8* src, int src_stride,
|
||||
uint8* dst_b, int dst_stride_b,
|
||||
int width, int height) {
|
||||
int i;
|
||||
reverse_uv_func ReverseRow;
|
||||
mirror_uv_func MirrorRow;
|
||||
|
||||
#if defined(HAS_REVERSE_ROW_UV_NEON)
|
||||
#if defined(HAS_MIRRORROW_UV_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
ReverseRow = ReverseRowUV_NEON;
|
||||
MirrorRow = MirrorRowUV_NEON;
|
||||
} else
|
||||
#endif
|
||||
#if defined(HAS_REVERSE_ROW_UV_SSSE3)
|
||||
#if defined(HAS_MIRRORROW_UV_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) &&
|
||||
IS_ALIGNED(width, 16) &&
|
||||
IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) {
|
||||
ReverseRow = ReverseRowUV_SSSE3;
|
||||
MirrorRow = MirrorRowUV_SSSE3;
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
ReverseRow = ReverseRowUV_C;
|
||||
MirrorRow = MirrorRowUV_C;
|
||||
}
|
||||
|
||||
dst_a += dst_stride_a * (height - 1);
|
||||
dst_b += dst_stride_b * (height - 1);
|
||||
|
||||
for (i = 0; i < height; ++i) {
|
||||
ReverseRow(src, dst_a, dst_b, width);
|
||||
MirrorRow(src, dst_a, dst_b, width);
|
||||
|
||||
src += src_stride; // down one line at a time
|
||||
dst_a -= dst_stride_a; // nominally up one line at a time
|
||||
|
||||
@ -19,7 +19,7 @@ extern "C" {
|
||||
|
||||
#if defined(__ARM_NEON__) && !defined(YUV_DISABLE_ASM)
|
||||
|
||||
void ReverseRow_NEON(const uint8* src, uint8* dst, int width) {
|
||||
void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
|
||||
asm volatile (
|
||||
// compute where to start writing destination
|
||||
"add %1, %2 \n"
|
||||
@ -38,7 +38,7 @@ void ReverseRow_NEON(const uint8* src, uint8* dst, int width) {
|
||||
"beq 2f \n"
|
||||
|
||||
// back of destination by the size of the register that is
|
||||
// going to be reversed
|
||||
// going to be mirrord
|
||||
"sub %1, #16 \n"
|
||||
|
||||
// the loop needs to run on blocks of 16. what will be left
|
||||
@ -50,12 +50,12 @@ void ReverseRow_NEON(const uint8* src, uint8* dst, int width) {
|
||||
"1: \n"
|
||||
"vld1.8 {q0}, [%0]! \n" // src += 16
|
||||
|
||||
// reverse the bytes in the 64 bit segments. unable to reverse
|
||||
// mirror the bytes in the 64 bit segments. unable to mirror
|
||||
// the bytes in the entire 128 bits in one go.
|
||||
"vrev64.8 q0, q0 \n"
|
||||
|
||||
// because of the inability to reverse the entire 128 bits
|
||||
// reverse the writing out of the two 64 bit segments.
|
||||
// because of the inability to mirror the entire 128 bits
|
||||
// mirror the writing out of the two 64 bit segments.
|
||||
"vst1.8 {d1}, [%1]! \n"
|
||||
"vst1.8 {d0}, [%1], r3 \n" // dst -= 16
|
||||
|
||||
@ -272,7 +272,7 @@ void TransposeWx8_NEON(const uint8* src, int src_stride,
|
||||
);
|
||||
}
|
||||
|
||||
void ReverseRowUV_NEON(const uint8* src,
|
||||
void MirrorRowUV_NEON(const uint8* src,
|
||||
uint8* dst_a, uint8* dst_b,
|
||||
int width) {
|
||||
asm volatile (
|
||||
@ -291,7 +291,7 @@ void ReverseRowUV_NEON(const uint8* src,
|
||||
"mov r12, #-8 \n"
|
||||
|
||||
// back of destination by the size of the register that is
|
||||
// going to be reversed
|
||||
// going to be mirrord
|
||||
"sub %1, #8 \n"
|
||||
"sub %2, #8 \n"
|
||||
|
||||
@ -304,7 +304,7 @@ void ReverseRowUV_NEON(const uint8* src,
|
||||
"1: \n"
|
||||
"vld2.8 {d0, d1}, [%0]! \n" // src += 16
|
||||
|
||||
// reverse the bytes in the 64 bit segments
|
||||
// mirror the bytes in the 64 bit segments
|
||||
"vrev64.8 q0, q0 \n"
|
||||
|
||||
"vst1.8 {d0}, [%1], r12 \n" // dst_a -= 8
|
||||
|
||||
14
source/row.h
14
source/row.h
@ -39,8 +39,8 @@
|
||||
#define HAS_FASTCONVERTYUVTOBGRAROW_SSSE3
|
||||
#define HAS_FASTCONVERTYUVTOABGRROW_SSSE3
|
||||
#define HAS_FASTCONVERTYUV444TOARGBROW_SSSE3
|
||||
#define HAS_REVERSE_ROW_SSSE3
|
||||
#define HAS_REVERSE_ROW_SSE2
|
||||
#define HAS_MIRRORROW_SSSE3
|
||||
#define HAS_MIRRORROW_SSE2
|
||||
#endif
|
||||
|
||||
// The following are available on Windows platforms
|
||||
@ -58,7 +58,7 @@
|
||||
|
||||
// The following are available on Neon platforms
|
||||
#if defined(__ARM_NEON__) && !defined(YUV_DISABLE_ASM)
|
||||
#define HAS_REVERSE_ROW_NEON
|
||||
#define HAS_MIRRORROW_NEON
|
||||
#define HAS_FASTCONVERTYUVTOARGBROW_NEON
|
||||
#define HAS_FASTCONVERTYUVTOBGRAROW_NEON
|
||||
#define HAS_FASTCONVERTYUVTOABGRROW_NEON
|
||||
@ -107,10 +107,10 @@ void BGRAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
||||
void ABGRToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
|
||||
void ReverseRow_SSSE3(const uint8* src, uint8* dst, int width);
|
||||
void ReverseRow_SSE2(const uint8* src, uint8* dst, int width);
|
||||
void ReverseRow_NEON(const uint8* src, uint8* dst, int width);
|
||||
void ReverseRow_C(const uint8* src, uint8* dst, int width);
|
||||
void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width);
|
||||
void MirrorRow_SSE2(const uint8* src, uint8* dst, int width);
|
||||
void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
|
||||
void MirrorRow_C(const uint8* src, uint8* dst, int width);
|
||||
|
||||
void ARGBToYRow_C(const uint8* src_argb, uint8* dst_y, int pix);
|
||||
void BGRAToYRow_C(const uint8* src_argb, uint8* dst_y, int pix);
|
||||
|
||||
@ -360,7 +360,7 @@ void FastConvertYToARGBRow_C(const uint8* y_buf,
|
||||
}
|
||||
}
|
||||
|
||||
void ReverseRow_C(const uint8* src, uint8* dst, int width) {
|
||||
void MirrorRow_C(const uint8* src, uint8* dst, int width) {
|
||||
src += width - 1;
|
||||
for (int i = 0; i < width; ++i) {
|
||||
dst[i] = src[0];
|
||||
|
||||
@ -644,14 +644,14 @@ void BGRAToUVRow_SSSE3(const uint8* src_argb, int src_stride_argb,
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef HAS_REVERSE_ROW_SSSE3
|
||||
#ifdef HAS_MIRRORROW_SSSE3
|
||||
|
||||
// Shuffle table for reversing the bytes.
|
||||
CONST uvec8 kShuffleReverse = {
|
||||
CONST uvec8 kShuffleMirror = {
|
||||
15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u
|
||||
};
|
||||
|
||||
void ReverseRow_SSSE3(const uint8* src, uint8* dst, int width) {
|
||||
void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) {
|
||||
intptr_t temp_width = static_cast<intptr_t>(width);
|
||||
asm volatile (
|
||||
"movdqa %3,%%xmm5 \n"
|
||||
@ -666,7 +666,7 @@ void ReverseRow_SSSE3(const uint8* src, uint8* dst, int width) {
|
||||
: "+r"(src), // %0
|
||||
"+r"(dst), // %1
|
||||
"+r"(temp_width) // %2
|
||||
: "m"(kShuffleReverse) // %3
|
||||
: "m"(kShuffleMirror) // %3
|
||||
: "memory", "cc"
|
||||
#if defined(__SSE2__)
|
||||
, "xmm0", "xmm5"
|
||||
@ -675,15 +675,15 @@ void ReverseRow_SSSE3(const uint8* src, uint8* dst, int width) {
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef HAS_REVERSE_ROW_SSE2
|
||||
#ifdef HAS_MIRRORROW_SSE2
|
||||
|
||||
void ReverseRow_SSE2(const uint8* src, uint8* dst, int width) {
|
||||
void MirrorRow_SSE2(const uint8* src, uint8* dst, int width) {
|
||||
intptr_t temp_width = static_cast<intptr_t>(width);
|
||||
asm volatile (
|
||||
"lea -0x10(%0),%0 \n"
|
||||
"1: \n"
|
||||
"movdqa (%0,%2),%%xmm0 \n"
|
||||
"movdqa %%xmm0,%%xmm1 \n"
|
||||
"movdqu (%0,%2),%%xmm0 \n"
|
||||
"movdqu %%xmm0,%%xmm1 \n"
|
||||
"psllw $0x8,%%xmm0 \n"
|
||||
"psrlw $0x8,%%xmm1 \n"
|
||||
"por %%xmm1,%%xmm0 \n"
|
||||
@ -691,7 +691,7 @@ void ReverseRow_SSE2(const uint8* src, uint8* dst, int width) {
|
||||
"pshufhw $0x1b,%%xmm0,%%xmm0 \n"
|
||||
"pshufd $0x4e,%%xmm0,%%xmm0 \n"
|
||||
"sub $0x10,%2 \n"
|
||||
"movdqa %%xmm0,(%1) \n"
|
||||
"movdqu %%xmm0,(%1) \n"
|
||||
"lea 0x10(%1),%1 \n"
|
||||
"ja 1b \n"
|
||||
: "+r"(src), // %0
|
||||
|
||||
@ -1169,20 +1169,20 @@ void FastConvertYToARGBRow_SSE2(const uint8* y_buf,
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef HAS_REVERSE_ROW_SSSE3
|
||||
#ifdef HAS_MIRRORROW_SSSE3
|
||||
|
||||
// Shuffle table for reversing the bytes.
|
||||
static const uvec8 kShuffleReverse = {
|
||||
static const uvec8 kShuffleMirror = {
|
||||
15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u
|
||||
};
|
||||
|
||||
__declspec(naked)
|
||||
void ReverseRow_SSSE3(const uint8* src, uint8* dst, int width) {
|
||||
void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) {
|
||||
__asm {
|
||||
mov eax, [esp + 4] // src
|
||||
mov edx, [esp + 8] // dst
|
||||
mov ecx, [esp + 12] // width
|
||||
movdqa xmm5, kShuffleReverse
|
||||
movdqa xmm5, kShuffleMirror
|
||||
lea eax, [eax - 16]
|
||||
convertloop:
|
||||
movdqa xmm0, [eax + ecx]
|
||||
@ -1196,18 +1196,20 @@ __asm {
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef HAS_REVERSE_ROW_SSE2
|
||||
#ifdef HAS_MIRRORROW_SSE2
|
||||
|
||||
// SSE2 version has movdqu so it can be used on misaligned buffers when SSSE3
|
||||
// version can not.
|
||||
__declspec(naked)
|
||||
void ReverseRow_SSE2(const uint8* src, uint8* dst, int width) {
|
||||
void MirrorRow_SSE2(const uint8* src, uint8* dst, int width) {
|
||||
__asm {
|
||||
mov eax, [esp + 4] // src
|
||||
mov edx, [esp + 8] // dst
|
||||
mov ecx, [esp + 12] // width
|
||||
lea eax, [eax - 16]
|
||||
convertloop:
|
||||
movdqa xmm0, [eax + ecx]
|
||||
movdqa xmm1, xmm0 // swap bytes
|
||||
movdqu xmm0, [eax + ecx]
|
||||
movdqu xmm1, xmm0 // swap bytes
|
||||
psllw xmm0, 8
|
||||
psrlw xmm1, 8
|
||||
por xmm0, xmm1
|
||||
@ -1215,7 +1217,7 @@ __asm {
|
||||
pshufhw xmm0, xmm0, 0x1b
|
||||
pshufd xmm0, xmm0, 0x4e // swap qwords
|
||||
sub ecx, 16
|
||||
movdqa [edx], xmm0
|
||||
movdqu [edx], xmm0
|
||||
lea edx, [edx + 16]
|
||||
ja convertloop
|
||||
ret
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user