From 27d42c7ff6452c53643bc57ee8b7b17afbe8dfd0 Mon Sep 17 00:00:00 2001 From: "fbarchard@google.com" Date: Fri, 22 Jun 2012 23:57:26 +0000 Subject: [PATCH] Expose ARGB rotation using scale low levels BUG=none TEST=none Review URL: https://webrtc-codereview.appspot.com/675004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@292 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- README.chromium | 2 +- include/libyuv.h | 1 + include/libyuv/planar_functions.h | 5 ++++ include/libyuv/version.h | 2 +- source/planar_functions.cc | 41 ++++++++++++++++++++++++++++--- source/row.h | 4 +++ source/row_common.cc | 14 +++++++++++ source/row_posix.cc | 31 +++++++++++++++++++++++ source/row_win.cc | 29 ++++++++++++++++++++++ source/scale_argb.cc | 18 +++++++------- unit_test/planar_test.cc | 27 ++++++++++++++++++-- 11 files changed, 158 insertions(+), 16 deletions(-) diff --git a/README.chromium b/README.chromium index 75ee5f39e..cf6060b75 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 291 +Version: 292 License: BSD License File: LICENSE diff --git a/include/libyuv.h b/include/libyuv.h index d44557378..1657c255b 100644 --- a/include/libyuv.h +++ b/include/libyuv.h @@ -19,6 +19,7 @@ #include "libyuv/format_conversion.h" #include "libyuv/planar_functions.h" #include "libyuv/rotate.h" +#include "libyuv/rotate_argb.h" #include "libyuv/scale.h" #include "libyuv/scale_argb.h" #include "libyuv/version.h" diff --git a/include/libyuv/planar_functions.h b/include/libyuv/planar_functions.h index a54a6c28a..d902be172 100644 --- a/include/libyuv/planar_functions.h +++ b/include/libyuv/planar_functions.h @@ -47,6 +47,11 @@ int I420Mirror(const uint8* src_y, int src_stride_y, uint8* dst_v, int dst_stride_v, int width, int height); +// ARGB mirror. +int ARGBMirror(const uint8* src_argb, int src_stride_argb, + uint8* dst_argb, int dst_stride_argb, + int width, int height); + // Convert NV12 to ARGB. int NV12ToARGB(const uint8* src_y, int src_stride_y, const uint8* src_uv, int src_stride_uv, diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 233530a96..36aeb2601 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,7 +11,7 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 291 +#define LIBYUV_VERSION 292 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/source/planar_functions.cc b/source/planar_functions.cc index 04d8208a6..d89259a6b 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -139,6 +139,41 @@ int I420Mirror(const uint8* src_y, int src_stride_y, return 0; } +// ARGB mirror. +int ARGBMirror(const uint8* src_argb, int src_stride_argb, + uint8* dst_argb, int dst_stride_argb, + int width, int height) { + if (!src_argb || + !dst_argb || + width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_argb = src_argb + (height - 1) * src_stride_argb; + src_stride_argb = -src_stride_argb; + } + + void (*ARGBMirrorRow)(const uint8* src, uint8* dst, int width) = + ARGBMirrorRow_C; +#if defined(HAS_ARGBMIRRORROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4) && + IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) && + IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { + ARGBMirrorRow = ARGBMirrorRow_SSSE3; + } +#endif + + // Mirror plane + for (int y = 0; y < height; ++y) { + ARGBMirrorRow(src_argb, dst_argb, width); + src_argb += src_stride_argb; + dst_argb += dst_stride_argb; + } + return 0; +} + // Copy ARGB with optional flipping int ARGBCopy(const uint8* src_argb, int src_stride_argb, uint8* dst_argb, int dst_stride_argb, @@ -182,9 +217,9 @@ ARGBBlendRow GetARGBBlend() { // Alpha Blend 2 ARGB images and store to destination. int ARGBBlend(const uint8* src_argb0, int src_stride_argb0, - const uint8* src_argb1, int src_stride_argb1, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { + const uint8* src_argb1, int src_stride_argb1, + uint8* dst_argb, int dst_stride_argb, + int width, int height) { if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) { return -1; } diff --git a/source/row.h b/source/row.h index c6df90f81..5b5dc4675 100644 --- a/source/row.h +++ b/source/row.h @@ -64,6 +64,7 @@ extern "C" { #define HAS_I400TOARGBROW_SSE2 #define HAS_MIRRORROW_SSSE3 #define HAS_MIRRORROWUV_SSSE3 +#define HAS_ARGBMIRRORROW_SSSE3 #define HAS_RAWTOARGBROW_SSSE3 #define HAS_RGB24TOARGBROW_SSSE3 #define HAS_RGB565TOARGBROW_SSE2 @@ -183,6 +184,9 @@ void MirrorRowUV_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v, int width); void MirrorRowUV_NEON(const uint8* src, uint8* dst_u, uint8* dst_v, int width); void MirrorRowUV_C(const uint8* src, uint8* dst_u, uint8* dst_v, int width); +void ARGBMirrorRow_SSSE3(const uint8* src, uint8* dst, int width); +void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width); + void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix); void SplitUV_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix); void SplitUV_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix); diff --git a/source/row_common.cc b/source/row_common.cc index 50f47d212..63fe3818e 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -587,6 +587,20 @@ void MirrorRowUV_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) { } } +void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width) { + const uint32* src32 = reinterpret_cast(src); + uint32* dst32 = reinterpret_cast(dst); + src32 += width - 1; + for (int x = 0; x < width - 1; x += 2) { + dst32[x] = src32[0]; + dst32[x + 1] = src32[-1]; + src32 -= 2; + } + if (width & 1) { + dst32[width - 1] = src32[0]; + } +} + void SplitUV_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) { for (int x = 0; x < width - 1; x += 2) { dst_u[x] = src_uv[0]; diff --git a/source/row_posix.cc b/source/row_posix.cc index dcca4e8db..632a30c1f 100644 --- a/source/row_posix.cc +++ b/source/row_posix.cc @@ -1977,6 +1977,37 @@ void MirrorRowUV_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v, } #endif // HAS_MIRRORROW_UV_SSSE3 +#ifdef HAS_ARGBMIRRORROW_SSSE3 +// Shuffle table for reversing the bytes. +CONST uvec8 kShuffleMirror = { + 12u, 13u, 14u, 15u, 8u, 9u, 10u, 11u, 4u, 5u, 6u, 7u, 0u, 1u, 2u, 3u +}; + +void ARGBMirrorRow_SSSE3(const uint8* src, uint8* dst, int width) { + intptr_t temp_width = static_cast(width); + asm volatile ( + "movdqa %3,%%xmm5 \n" + "lea -0x10(%0),%0 \n" + ".p2align 4 \n" + "1: \n" + "movdqa (%0,%2,4),%%xmm0 \n" + "pshufb %%xmm5,%%xmm0 \n" + "sub $0x4,%2 \n" + "movdqa %%xmm0,(%1) \n" + "lea 0x10(%1),%1 \n" + "jg 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(temp_width) // %2 + : "m"(kShuffleMirror) // %3 + : "memory", "cc" +#if defined(__SSE2__) + , "xmm0", "xmm5" +#endif + ); +} +#endif // HAS_ARGBMIRRORROW_SSSE3 + #ifdef HAS_SPLITUV_SSE2 void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) { asm volatile ( diff --git a/source/row_win.cc b/source/row_win.cc index 0e73a0c58..82ad2f365 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -2059,6 +2059,35 @@ void MirrorRowUV_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v, } #endif // HAS_MIRRORROW_UV_SSSE3 +#ifdef HAS_ARGBMIRRORROW_SSSE3 + +// Shuffle table for reversing the bytes. +static const uvec8 kARGBShuffleMirror = { + 12u, 13u, 14u, 15u, 8u, 9u, 10u, 11u, 4u, 5u, 6u, 7u, 0u, 1u, 2u, 3u +}; + +__declspec(naked) __declspec(align(16)) +void ARGBMirrorRow_SSSE3(const uint8* src, uint8* dst, int width) { +__asm { + mov eax, [esp + 4] // src + mov edx, [esp + 8] // dst + mov ecx, [esp + 12] // width + movdqa xmm5, kARGBShuffleMirror + lea eax, [eax - 16] + + align 16 + convertloop: + movdqa xmm0, [eax + ecx * 4] + pshufb xmm0, xmm5 + sub ecx, 4 + movdqa [edx], xmm0 + lea edx, [edx + 16] + jg convertloop + ret + } +} +#endif // HAS_ARGBMIRRORROW_SSSE3 + #ifdef HAS_SPLITUV_SSE2 __declspec(naked) __declspec(align(16)) void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) { diff --git a/source/scale_argb.cc b/source/scale_argb.cc index 7731c2e4d..616ab9c1f 100644 --- a/source/scale_argb.cc +++ b/source/scale_argb.cc @@ -97,9 +97,9 @@ static void ScaleARGBRowDown2Int_SSE2(const uint8* src_ptr, int src_stride, // Reads 4 pixels at a time. // Alignment requirement: dst_ptr 16 byte aligned. __declspec(naked) __declspec(align(16)) -static void ScaleARGBRowDownEven_SSE2(const uint8* src_ptr, int src_stride, - int src_stepx, - uint8* dst_ptr, int dst_width) { +void ScaleARGBRowDownEven_SSE2(const uint8* src_ptr, int src_stride, + int src_stepx, + uint8* dst_ptr, int dst_width) { __asm { push ebx push edi @@ -414,9 +414,9 @@ static void ScaleARGBRowDown2Int_SSE2(const uint8* src_ptr, int src_stride, #define HAS_SCALEARGBROWDOWNEVEN_SSE2 // Reads 4 pixels at a time. // Alignment requirement: dst_ptr 16 byte aligned. -static void ScaleARGBRowDownEven_SSE2(const uint8* src_ptr, int src_stride, - int src_stepx, - uint8* dst_ptr, int dst_width) { +void ScaleARGBRowDownEven_SSE2(const uint8* src_ptr, int src_stride, + int src_stepx, + uint8* dst_ptr, int dst_width) { intptr_t src_stepx_x4 = static_cast(src_stepx); intptr_t src_stepx_x12 = 0; asm volatile ( @@ -669,9 +669,9 @@ static void ScaleARGBRowDown2Int_C(const uint8* src_ptr, int src_stride, } } -static void ScaleARGBRowDownEven_C(const uint8* src_ptr, int, - int src_stepx, - uint8* dst_ptr, int dst_width) { +void ScaleARGBRowDownEven_C(const uint8* src_ptr, int, + int src_stepx, + uint8* dst_ptr, int dst_width) { const uint32* src = reinterpret_cast(src_ptr); uint32* dst = reinterpret_cast(dst_ptr); diff --git a/unit_test/planar_test.cc b/unit_test/planar_test.cc index ea3b96d2e..76c6cedea 100644 --- a/unit_test/planar_test.cc +++ b/unit_test/planar_test.cc @@ -218,9 +218,9 @@ TESTATOPLANAR(RGB24, 3, I420, 2, 2) TESTATOPLANAR(RGB565, 2, I420, 2, 2) TESTATOPLANAR(ARGB1555, 2, I420, 2, 2) TESTATOPLANAR(ARGB4444, 2, I420, 2, 2) -// TESTATOPLANAR(ARGB, 4, I411, 4, 1) +// TESTATOPLANAR(ARGB, 4, I411, 4, 1) TESTATOPLANAR(ARGB, 4, I422, 2, 1) -// TESTATOPLANAR(ARGB, 4, I444, 1, 1) +// TESTATOPLANAR(ARGB, 4, I444, 1, 1) // TODO(fbarchard): Implement and test 411 and 444 #define TESTATOB(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B) \ @@ -625,4 +625,27 @@ TEST_F(libyuvTest, TestARGBQuantize) { } } +TEST_F(libyuvTest, TestARGBMirror) { + SIMD_ALIGNED(uint8 orig_pixels[256][4]); + SIMD_ALIGNED(uint8 dst_pixels[256][4]); + + for (int i = 0; i < 256; ++i) { + orig_pixels[i][0] = i; + orig_pixels[i][1] = i / 2; + orig_pixels[i][2] = i / 3; + orig_pixels[i][3] = i / 4; + } + ARGBMirror(&orig_pixels[0][0], 0, &dst_pixels[0][0], 0, 256, 1); + + for (int i = 0; i < 256; ++i) { + EXPECT_EQ(i, dst_pixels[255 - i][0]); + EXPECT_EQ(i / 2, dst_pixels[255 - i][1]); + EXPECT_EQ(i / 3, dst_pixels[255 - i][2]); + EXPECT_EQ(i / 4, dst_pixels[255 - i][3]); + } + for (int i = 0; i < 1000 * 1280 * 720 / 256; ++i) { + ARGBMirror(&orig_pixels[0][0], 0, &dst_pixels[0][0], 0, 256, 1); + } +} + } // namespace libyuv