From 30859f75f28c2435753d33eb7a48ccab169feb6d Mon Sep 17 00:00:00 2001 From: "fbarchard@google.com" Date: Fri, 2 Nov 2012 09:51:29 +0000 Subject: [PATCH] Neon YToARGB and fix SSE2 to match C version BUG=none TEST=YToARGB_Opt Review URL: https://webrtc-codereview.appspot.com/966007 git-svn-id: http://libyuv.googlecode.com/svn/trunk@466 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- README.chromium | 2 +- include/libyuv/convert_argb.h | 3 +++ include/libyuv/row.h | 4 ++++ include/libyuv/version.h | 2 +- source/convert_argb.cc | 4 ++++ source/row_posix.cc | 10 ++++++---- source/row_win.cc | 10 ++++++---- unit_test/convert_test.cc | 1 + 8 files changed, 26 insertions(+), 10 deletions(-) diff --git a/README.chromium b/README.chromium index 854e32785..14c4582c8 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 465 +Version: 466 License: BSD License File: LICENSE diff --git a/include/libyuv/convert_argb.h b/include/libyuv/convert_argb.h index 86085252f..19b439442 100644 --- a/include/libyuv/convert_argb.h +++ b/include/libyuv/convert_argb.h @@ -75,6 +75,9 @@ int I400ToARGB(const uint8* src_y, int src_stride_y, uint8* dst_argb, int dst_stride_argb, int width, int height); +// Alias. +#define YToARGB I400ToARGB_Reference + // Convert I400 to ARGB. Reverse of ARGBToI400. LIBYUV_API int I400ToARGB_Reference(const uint8* src_y, int src_stride_y, diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 4c2026583..087e95881 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -183,6 +183,7 @@ extern "C" { #define HAS_ARGBTOARGB4444ROW_NEON #define HAS_ARGBTOYROW_NEON #define HAS_MERGEUV_NEON +#define HAS_YTOARGBROW_NEON #endif // The following are available on Mips platforms @@ -718,6 +719,9 @@ void I422ToRAWRow_Any_SSSE3(const uint8* y_buf, void YToARGBRow_SSE2(const uint8* y_buf, uint8* argb_buf, int width); +void YToARGBRow_NEON(const uint8* y_buf, + uint8* argb_buf, + int width); // ARGB preattenuated alpha blend. void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1, diff --git a/include/libyuv/version.h b/include/libyuv/version.h index cd1e14ce0..95d51bb2a 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 465 +#define LIBYUV_VERSION 466 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/convert_argb.cc b/source/convert_argb.cc index cab63d8ff..8b8016b4f 100644 --- a/source/convert_argb.cc +++ b/source/convert_argb.cc @@ -220,6 +220,10 @@ int I400ToARGB_Reference(const uint8* src_y, int src_stride_y, IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { YToARGBRow = YToARGBRow_SSE2; } +#elif defined(HAS_YTOARGBROW_NEON) + if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { + YToARGBRow = YToARGBRow_NEON; + } #endif for (int y = 0; y < height; ++y) { diff --git a/source/row_posix.cc b/source/row_posix.cc index fa0c07ec6..5e26005b7 100644 --- a/source/row_posix.cc +++ b/source/row_posix.cc @@ -2334,12 +2334,13 @@ void YToARGBRow_SSE2(const uint8* y_buf, uint8* rgb_buf, int width) { asm volatile ( + "pxor %%xmm5,%%xmm5 \n" "pcmpeqb %%xmm4,%%xmm4 \n" "pslld $0x18,%%xmm4 \n" - "mov $0x10001000,%%eax \n" + "mov $0x00100010,%%eax \n" "movd %%eax,%%xmm3 \n" "pshufd $0x0,%%xmm3,%%xmm3 \n" - "mov $0x012a012a,%%eax \n" + "mov $0x004a004a,%%eax \n" "movd %%eax,%%xmm2 \n" "pshufd $0x0,%%xmm2,%%xmm2 \n" ".p2align 4 \n" @@ -2347,9 +2348,10 @@ void YToARGBRow_SSE2(const uint8* y_buf, // Step 1: Scale Y contribution to 8 G values. G = (y - 16) * 1.164 "movq (%0),%%xmm0 \n" "lea 0x8(%0),%0 \n" - "punpcklbw %%xmm0,%%xmm0 \n" + "punpcklbw %%xmm5,%%xmm0 \n" "psubusw %%xmm3,%%xmm0 \n" - "pmulhuw %%xmm2,%%xmm0 \n" + "pmullw %%xmm2,%%xmm0 \n" + "psrlw $6, %%xmm0 \n" "packuswb %%xmm0,%%xmm0 \n" // Step 2: Weave into ARGB diff --git a/source/row_win.cc b/source/row_win.cc index 680e24935..c1b77cfc7 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -2483,12 +2483,13 @@ void YToARGBRow_SSE2(const uint8* y_buf, uint8* rgb_buf, int width) { __asm { + pxor xmm5, xmm5 pcmpeqb xmm4, xmm4 // generate mask 0xff000000 pslld xmm4, 24 - mov eax,0x10001000 + mov eax,0x00100010 movd xmm3,eax pshufd xmm3,xmm3,0 - mov eax,0x012a012a + mov eax,0x004a004a // 74 movd xmm2,eax pshufd xmm2,xmm2,0 mov eax, [esp + 4] // Y @@ -2500,9 +2501,10 @@ void YToARGBRow_SSE2(const uint8* y_buf, // Step 1: Scale Y contribution to 8 G values. G = (y - 16) * 1.164 movq xmm0, qword ptr [eax] lea eax, [eax + 8] - punpcklbw xmm0, xmm0 // Y.Y + punpcklbw xmm0, xmm5 // 0.Y psubusw xmm0, xmm3 - pmulhuw xmm0, xmm2 + pmullw xmm0, xmm2 + psrlw xmm0, 6 packuswb xmm0, xmm0 // G // Step 2: Weave into ARGB diff --git a/unit_test/convert_test.cc b/unit_test/convert_test.cc index 34dde1ff6..f18beb675 100644 --- a/unit_test/convert_test.cc +++ b/unit_test/convert_test.cc @@ -707,6 +707,7 @@ TESTATOB(BayerGRBG, 1, 1, ARGB, 4, 0) TESTATOB(I400, 1, 1, ARGB, 4, 0) TESTATOB(I400, 1, 1, I400, 1, 0) TESTATOB(I400, 1, 1, I400Mirror, 1, 0) +TESTATOB(Y, 1, 1, ARGB, 4, 0) TESTATOB(ARGB, 4, 4, ARGBMirror, 4, 0) #define TESTATOBRANDOM(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, STRIDE_B, DIFF) \