diff --git a/README.chromium b/README.chromium index 565a88ed9..40188988c 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 146 +Version: 147 License: BSD License File: LICENSE diff --git a/source/planar_functions.cc b/source/planar_functions.cc index 35b250477..4051e7e54 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -278,6 +278,74 @@ int I420Copy(const uint8* src_y, int src_stride_y, return 0; } +// Mirror a plane of data +void MirrorPlane(const uint8* src_y, int src_stride_y, + uint8* dst_y, int dst_stride_y, + int width, int height) { + void (*MirrorRow)(const uint8* src, uint8* dst, int width); +#if defined(HAS_MIRRORROW_NEON) + if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) { + MirrorRow = MirrorRow_NEON; + } else +#endif +#if defined(HAS_MIRRORROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) && + IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16)) { + MirrorRow = MirrorRow_SSSE3; + } else +#endif +#if defined(HAS_MIRRORROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16)) { + MirrorRow = MirrorRow_SSE2; + } else +#endif + { + MirrorRow = MirrorRow_C; + } + + // Mirror plane + for (int y = 0; y < height; ++y) { + MirrorRow(src_y, dst_y, width); + src_y += src_stride_y; + dst_y += dst_stride_y; + } +} + +// Mirror I420 with optional flipping +int I420Mirror(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height) { + if (!src_y || !src_u || !src_v || + !dst_y || !dst_u || !dst_v || + width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + int halfheight = (height + 1) >> 1; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (halfheight - 1) * src_stride_u; + src_v = src_v + (halfheight - 1) * src_stride_v; + src_stride_y = -src_stride_y; + src_stride_u = -src_stride_u; + src_stride_v = -src_stride_v; + } + + int halfwidth = (width + 1) >> 1; + int halfheight = (height + 1) >> 1; + if (dst_y) { + MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); + } + MirrorPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight); + MirrorPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight); + return 0; +} + // Copy ARGB with optional flipping int ARGBCopy(const uint8* src_argb, int src_stride_argb, uint8* dst_argb, int dst_stride_argb, @@ -299,89 +367,6 @@ int ARGBCopy(const uint8* src_argb, int src_stride_argb, return 0; } -int I420Mirror(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - if (!src_y || !src_u || !src_v || - !dst_y || !dst_u || !dst_v || - width <= 0 || height == 0) { - return -1; - } - int halfwidth = (width + 1) >> 1; - int halfheight = (height + 1) >> 1; - - // Negative height means invert the image. - if (height < 0) { - height = -height; - halfheight = (height + 1) >> 1; - src_y = src_y + (height - 1) * src_stride_y; - src_u = src_u + (halfheight - 1) * src_stride_u; - src_v = src_v + (halfheight - 1) * src_stride_v; - src_stride_y = -src_stride_y; - src_stride_u = -src_stride_u; - src_stride_v = -src_stride_v; - } - void (*ReverseRow)(const uint8* src, uint8* dst, int width); -#if defined(HAS_REVERSE_ROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && - IS_ALIGNED(width, 32)) { - ReverseRow = ReverseRow_NEON; - } else -#endif -#if defined(HAS_REVERSE_ROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && - IS_ALIGNED(width, 32) && - IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) && - IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) && - IS_ALIGNED(src_v, 16) && IS_ALIGNED(src_stride_v, 16) && - IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16) && - IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) && - IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) { - ReverseRow = ReverseRow_SSSE3; - } else -#endif -#if defined(HAS_REVERSE_ROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && - IS_ALIGNED(width, 32) && - IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) && - IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) && - IS_ALIGNED(src_v, 16) && IS_ALIGNED(src_stride_v, 16) && - IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16) && - IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) && - IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) { - ReverseRow = ReverseRow_SSE2; - } else -#endif - { - ReverseRow = ReverseRow_C; - } - - // Y Plane - int y; - for (y = 0; y < height; ++y) { - ReverseRow(src_y, dst_y, width); - src_y += src_stride_y; - dst_y += dst_stride_y; - } - // U Plane - for (y = 0; y < halfheight; ++y) { - ReverseRow(src_u, dst_u, halfwidth); - src_u += src_stride_u; - dst_u += dst_stride_u; - } - // V Plane - for (y = 0; y < halfheight; ++y) { - ReverseRow(src_v, dst_v, halfwidth); - src_v += src_stride_v; - dst_v += dst_stride_v; - } - return 0; -} - #if defined(_M_IX86) && !defined(YUV_DISABLE_ASM) #define HAS_HALFROW_SSE2 __declspec(naked) diff --git a/source/rotate.cc b/source/rotate.cc index b44fab80a..8f54ae1e1 100644 --- a/source/rotate.cc +++ b/source/rotate.cc @@ -24,7 +24,7 @@ extern "C" { !defined(YUV_DISABLE_ASM) // Note static const preferred, but gives internal compiler error on gcc 4.2 // Shuffle table for reversing the bytes of UV channels. -uvec8 kShuffleReverseUV = { +uvec8 kShuffleMirrorUV = { 14u, 12u, 10u, 8u, 6u, 4u, 2u, 0u, 15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u }; @@ -47,7 +47,7 @@ uvec8 kShuffleReverseUV = { #endif #endif -typedef void (*reverse_uv_func)(const uint8*, uint8*, uint8*, int); +typedef void (*mirror_uv_func)(const uint8*, uint8*, uint8*, int); typedef void (*rotate_uv_wx8_func)(const uint8*, int, uint8*, int, uint8*, int, int); @@ -58,10 +58,10 @@ typedef void (*rotate_wx8_func)(const uint8*, int, uint8*, int, int); typedef void (*rotate_wxh_func)(const uint8*, int, uint8*, int, int, int); #ifdef __ARM_NEON__ -#define HAS_REVERSE_ROW_NEON -void ReverseRow_NEON(const uint8* src, uint8* dst, int width); -#define HAS_REVERSE_ROW_UV_NEON -void ReverseRowUV_NEON(const uint8* src, +#define HAS_MIRRORROW_NEON +void MirrorRow_NEON(const uint8* src, uint8* dst, int width); +#define HAS_MIRRORROW_UV_NEON +void MirrorRowUV_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width); #define HAS_TRANSPOSE_WX8_NEON @@ -852,37 +852,37 @@ void RotatePlane270(const uint8* src, int src_stride, void RotatePlane180(const uint8* src, int src_stride, uint8* dst, int dst_stride, int width, int height) { - void (*ReverseRow)(const uint8* src, uint8* dst, int width); -#if defined(HAS_REVERSE_ROW_NEON) + void (*MirrorRow)(const uint8* src, uint8* dst, int width); +#if defined(HAS_MIRRORROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { - ReverseRow = ReverseRow_NEON; + MirrorRow = MirrorRow_NEON; } else #endif -#if defined(HAS_REVERSE_ROW_SSSE3) +#if defined(HAS_MIRRORROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) && IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) && IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) { - ReverseRow = ReverseRow_SSSE3; + MirrorRow = MirrorRow_SSSE3; } else #endif -#if defined(HAS_REVERSE_ROW_SSE2) +#if defined(HAS_MIRRORROW_SSE2) if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) && IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) && IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) { - ReverseRow = ReverseRow_SSE2; + MirrorRow = MirrorRow_SSE2; } else #endif { - ReverseRow = ReverseRow_C; + MirrorRow = MirrorRow_C; } // Rotate by 180 is a mirror and vertical flip src += src_stride * (height - 1); for (int y = 0; y < height; ++y) { - ReverseRow(src, dst, width); + MirrorRow(src, dst, width); src -= src_stride; dst += dst_stride; } @@ -1004,9 +1004,9 @@ void RotateUV270(const uint8* src, int src_stride, } #if defined(_M_IX86) && !defined(YUV_DISABLE_ASM) -#define HAS_REVERSE_ROW_UV_SSSE3 +#define HAS_MIRRORROW_UV_SSSE3 __declspec(naked) -void ReverseRowUV_SSSE3(const uint8* src, +void MirrorRowUV_SSSE3(const uint8* src, uint8* dst_a, uint8* dst_b, int width) { __asm { @@ -1015,7 +1015,7 @@ __asm { mov edx, [esp + 4 + 8] // dst_a mov edi, [esp + 4 + 12] // dst_b mov ecx, [esp + 4 + 16] // width - movdqa xmm5, kShuffleReverseUV + movdqa xmm5, kShuffleMirrorUV lea eax, [eax + ecx * 2 - 16] convertloop: @@ -1035,8 +1035,8 @@ __asm { #elif (defined(__i386__) || defined(__x86_64__)) && \ !defined(YUV_DISABLE_ASM) -#define HAS_REVERSE_ROW_UV_SSSE3 -void ReverseRowUV_SSSE3(const uint8* src, +#define HAS_MIRRORROW_UV_SSSE3 +void MirrorRowUV_SSSE3(const uint8* src, uint8* dst_a, uint8* dst_b, int width) { intptr_t temp_width = static_cast(width); @@ -1057,7 +1057,7 @@ void ReverseRowUV_SSSE3(const uint8* src, "+r"(dst_a), // %1 "+r"(dst_b), // %2 "+r"(temp_width) // %3 - : "m"(kShuffleReverseUV) // %4 + : "m"(kShuffleMirrorUV) // %4 : "memory", "cc" #if defined(__SSE2__) , "xmm0", "xmm5" @@ -1066,7 +1066,7 @@ void ReverseRowUV_SSSE3(const uint8* src, } #endif -static void ReverseRowUV_C(const uint8* src, +static void MirrorRowUV_C(const uint8* src, uint8* dst_a, uint8* dst_b, int width) { int i; @@ -1083,29 +1083,29 @@ void RotateUV180(const uint8* src, int src_stride, uint8* dst_b, int dst_stride_b, int width, int height) { int i; - reverse_uv_func ReverseRow; + mirror_uv_func MirrorRow; -#if defined(HAS_REVERSE_ROW_UV_NEON) +#if defined(HAS_MIRRORROW_UV_NEON) if (TestCpuFlag(kCpuHasNEON)) { - ReverseRow = ReverseRowUV_NEON; + MirrorRow = MirrorRowUV_NEON; } else #endif -#if defined(HAS_REVERSE_ROW_UV_SSSE3) +#if defined(HAS_MIRRORROW_UV_SSSE3) if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) && IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) { - ReverseRow = ReverseRowUV_SSSE3; + MirrorRow = MirrorRowUV_SSSE3; } else #endif { - ReverseRow = ReverseRowUV_C; + MirrorRow = MirrorRowUV_C; } dst_a += dst_stride_a * (height - 1); dst_b += dst_stride_b * (height - 1); for (i = 0; i < height; ++i) { - ReverseRow(src, dst_a, dst_b, width); + MirrorRow(src, dst_a, dst_b, width); src += src_stride; // down one line at a time dst_a -= dst_stride_a; // nominally up one line at a time diff --git a/source/rotate_neon.cc b/source/rotate_neon.cc index c40e1c33e..449476364 100644 --- a/source/rotate_neon.cc +++ b/source/rotate_neon.cc @@ -19,7 +19,7 @@ extern "C" { #if defined(__ARM_NEON__) && !defined(YUV_DISABLE_ASM) -void ReverseRow_NEON(const uint8* src, uint8* dst, int width) { +void MirrorRow_NEON(const uint8* src, uint8* dst, int width) { asm volatile ( // compute where to start writing destination "add %1, %2 \n" @@ -38,7 +38,7 @@ void ReverseRow_NEON(const uint8* src, uint8* dst, int width) { "beq 2f \n" // back of destination by the size of the register that is - // going to be reversed + // going to be mirrord "sub %1, #16 \n" // the loop needs to run on blocks of 16. what will be left @@ -50,12 +50,12 @@ void ReverseRow_NEON(const uint8* src, uint8* dst, int width) { "1: \n" "vld1.8 {q0}, [%0]! \n" // src += 16 - // reverse the bytes in the 64 bit segments. unable to reverse + // mirror the bytes in the 64 bit segments. unable to mirror // the bytes in the entire 128 bits in one go. "vrev64.8 q0, q0 \n" - // because of the inability to reverse the entire 128 bits - // reverse the writing out of the two 64 bit segments. + // because of the inability to mirror the entire 128 bits + // mirror the writing out of the two 64 bit segments. "vst1.8 {d1}, [%1]! \n" "vst1.8 {d0}, [%1], r3 \n" // dst -= 16 @@ -272,7 +272,7 @@ void TransposeWx8_NEON(const uint8* src, int src_stride, ); } -void ReverseRowUV_NEON(const uint8* src, +void MirrorRowUV_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width) { asm volatile ( @@ -291,7 +291,7 @@ void ReverseRowUV_NEON(const uint8* src, "mov r12, #-8 \n" // back of destination by the size of the register that is - // going to be reversed + // going to be mirrord "sub %1, #8 \n" "sub %2, #8 \n" @@ -304,7 +304,7 @@ void ReverseRowUV_NEON(const uint8* src, "1: \n" "vld2.8 {d0, d1}, [%0]! \n" // src += 16 - // reverse the bytes in the 64 bit segments + // mirror the bytes in the 64 bit segments "vrev64.8 q0, q0 \n" "vst1.8 {d0}, [%1], r12 \n" // dst_a -= 8 diff --git a/source/row.h b/source/row.h index ea0e55e6b..723c59388 100644 --- a/source/row.h +++ b/source/row.h @@ -39,8 +39,8 @@ #define HAS_FASTCONVERTYUVTOBGRAROW_SSSE3 #define HAS_FASTCONVERTYUVTOABGRROW_SSSE3 #define HAS_FASTCONVERTYUV444TOARGBROW_SSSE3 -#define HAS_REVERSE_ROW_SSSE3 -#define HAS_REVERSE_ROW_SSE2 +#define HAS_MIRRORROW_SSSE3 +#define HAS_MIRRORROW_SSE2 #endif // The following are available on Windows platforms @@ -58,7 +58,7 @@ // The following are available on Neon platforms #if defined(__ARM_NEON__) && !defined(YUV_DISABLE_ASM) -#define HAS_REVERSE_ROW_NEON +#define HAS_MIRRORROW_NEON #define HAS_FASTCONVERTYUVTOARGBROW_NEON #define HAS_FASTCONVERTYUVTOBGRAROW_NEON #define HAS_FASTCONVERTYUVTOABGRROW_NEON @@ -107,10 +107,10 @@ void BGRAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, void ABGRToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, uint8* dst_u, uint8* dst_v, int width); -void ReverseRow_SSSE3(const uint8* src, uint8* dst, int width); -void ReverseRow_SSE2(const uint8* src, uint8* dst, int width); -void ReverseRow_NEON(const uint8* src, uint8* dst, int width); -void ReverseRow_C(const uint8* src, uint8* dst, int width); +void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width); +void MirrorRow_SSE2(const uint8* src, uint8* dst, int width); +void MirrorRow_NEON(const uint8* src, uint8* dst, int width); +void MirrorRow_C(const uint8* src, uint8* dst, int width); void ARGBToYRow_C(const uint8* src_argb, uint8* dst_y, int pix); void BGRAToYRow_C(const uint8* src_argb, uint8* dst_y, int pix); diff --git a/source/row_common.cc b/source/row_common.cc index cb1e8b0d7..e35213732 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -360,7 +360,7 @@ void FastConvertYToARGBRow_C(const uint8* y_buf, } } -void ReverseRow_C(const uint8* src, uint8* dst, int width) { +void MirrorRow_C(const uint8* src, uint8* dst, int width) { src += width - 1; for (int i = 0; i < width; ++i) { dst[i] = src[0]; diff --git a/source/row_posix.cc b/source/row_posix.cc index 984281be3..a79277268 100644 --- a/source/row_posix.cc +++ b/source/row_posix.cc @@ -644,14 +644,14 @@ void BGRAToUVRow_SSSE3(const uint8* src_argb, int src_stride_argb, } #endif -#ifdef HAS_REVERSE_ROW_SSSE3 +#ifdef HAS_MIRRORROW_SSSE3 // Shuffle table for reversing the bytes. -CONST uvec8 kShuffleReverse = { +CONST uvec8 kShuffleMirror = { 15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u }; -void ReverseRow_SSSE3(const uint8* src, uint8* dst, int width) { +void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) { intptr_t temp_width = static_cast(width); asm volatile ( "movdqa %3,%%xmm5 \n" @@ -666,7 +666,7 @@ void ReverseRow_SSSE3(const uint8* src, uint8* dst, int width) { : "+r"(src), // %0 "+r"(dst), // %1 "+r"(temp_width) // %2 - : "m"(kShuffleReverse) // %3 + : "m"(kShuffleMirror) // %3 : "memory", "cc" #if defined(__SSE2__) , "xmm0", "xmm5" @@ -675,15 +675,15 @@ void ReverseRow_SSSE3(const uint8* src, uint8* dst, int width) { } #endif -#ifdef HAS_REVERSE_ROW_SSE2 +#ifdef HAS_MIRRORROW_SSE2 -void ReverseRow_SSE2(const uint8* src, uint8* dst, int width) { +void MirrorRow_SSE2(const uint8* src, uint8* dst, int width) { intptr_t temp_width = static_cast(width); asm volatile ( "lea -0x10(%0),%0 \n" "1: \n" - "movdqa (%0,%2),%%xmm0 \n" - "movdqa %%xmm0,%%xmm1 \n" + "movdqu (%0,%2),%%xmm0 \n" + "movdqu %%xmm0,%%xmm1 \n" "psllw $0x8,%%xmm0 \n" "psrlw $0x8,%%xmm1 \n" "por %%xmm1,%%xmm0 \n" @@ -691,7 +691,7 @@ void ReverseRow_SSE2(const uint8* src, uint8* dst, int width) { "pshufhw $0x1b,%%xmm0,%%xmm0 \n" "pshufd $0x4e,%%xmm0,%%xmm0 \n" "sub $0x10,%2 \n" - "movdqa %%xmm0,(%1) \n" + "movdqu %%xmm0,(%1) \n" "lea 0x10(%1),%1 \n" "ja 1b \n" : "+r"(src), // %0 diff --git a/source/row_win.cc b/source/row_win.cc index 71d081636..805811897 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -1169,20 +1169,20 @@ void FastConvertYToARGBRow_SSE2(const uint8* y_buf, #endif #endif -#ifdef HAS_REVERSE_ROW_SSSE3 +#ifdef HAS_MIRRORROW_SSSE3 // Shuffle table for reversing the bytes. -static const uvec8 kShuffleReverse = { +static const uvec8 kShuffleMirror = { 15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u }; __declspec(naked) -void ReverseRow_SSSE3(const uint8* src, uint8* dst, int width) { +void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) { __asm { mov eax, [esp + 4] // src mov edx, [esp + 8] // dst mov ecx, [esp + 12] // width - movdqa xmm5, kShuffleReverse + movdqa xmm5, kShuffleMirror lea eax, [eax - 16] convertloop: movdqa xmm0, [eax + ecx] @@ -1196,18 +1196,20 @@ __asm { } #endif -#ifdef HAS_REVERSE_ROW_SSE2 +#ifdef HAS_MIRRORROW_SSE2 +// SSE2 version has movdqu so it can be used on misaligned buffers when SSSE3 +// version can not. __declspec(naked) -void ReverseRow_SSE2(const uint8* src, uint8* dst, int width) { +void MirrorRow_SSE2(const uint8* src, uint8* dst, int width) { __asm { mov eax, [esp + 4] // src mov edx, [esp + 8] // dst mov ecx, [esp + 12] // width lea eax, [eax - 16] convertloop: - movdqa xmm0, [eax + ecx] - movdqa xmm1, xmm0 // swap bytes + movdqu xmm0, [eax + ecx] + movdqu xmm1, xmm0 // swap bytes psllw xmm0, 8 psrlw xmm1, 8 por xmm0, xmm1 @@ -1215,7 +1217,7 @@ __asm { pshufhw xmm0, xmm0, 0x1b pshufd xmm0, xmm0, 0x4e // swap qwords sub ecx, 16 - movdqa [edx], xmm0 + movdqu [edx], xmm0 lea edx, [edx + 16] ja convertloop ret