From 1c2d8be1e1edcfe70f369184c5abcd0bd09180b9 Mon Sep 17 00:00:00 2001 From: "fbarchard@google.com" Date: Thu, 17 Nov 2011 21:57:54 +0000 Subject: [PATCH] port yuv to rgb to mac BUG=none TEST=none Review URL: http://webrtc-codereview.appspot.com/269017 git-svn-id: http://libyuv.googlecode.com/svn/trunk@83 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- README.chromium | 2 +- source/row.h | 10 +--- source/row_posix.cc | 118 +++++++++++++++++--------------------------- source/row_win.cc | 2 +- 4 files changed, 48 insertions(+), 84 deletions(-) diff --git a/README.chromium b/README.chromium index 6c49b6a08..8125e8aa1 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 82 +Version: 83 License: BSD License File: LICENSE diff --git a/source/row.h b/source/row.h index 53cf8a67a..21888ec4e 100644 --- a/source/row.h +++ b/source/row.h @@ -20,8 +20,8 @@ #endif // The following are available on all x86 platforms -#if (defined(WIN32) || defined(__x86_64__) || defined(__i386__)) && \ - !defined(LIBYUV_DISABLE_ASM) +#if (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) && \ + !defined(YUV_DISABLE_ASM) #define HAS_ABGRTOARGBROW_SSSE3 #define HAS_BGRATOARGBROW_SSSE3 #define HAS_BG24TOARGBROW_SSSE3 @@ -38,12 +38,6 @@ #define HAS_ABGRTOUVROW_SSSE3 #define HAS_I400TOARGBROW_SSE2 #define HAS_FASTCONVERTYTOARGBROW_SSE2 -#endif - -// The following are available on all x86 platforms except 32 bit OSX -#if (defined(WIN32) || defined(__x86_64__) || \ - (defined(__i386__) && !defined(__APPLE__))) && \ - !defined(LIBYUV_DISABLE_ASM) #define HAS_FASTCONVERTYUVTOARGBROW_SSSE3 #define HAS_FASTCONVERTYUVTOBGRAROW_SSSE3 #define HAS_FASTCONVERTYUVTOABGRROW_SSSE3 diff --git a/source/row_posix.cc b/source/row_posix.cc index 2eb5fc3af..83a92ba29 100644 --- a/source/row_posix.cc +++ b/source/row_posix.cc @@ -319,13 +319,6 @@ void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, #endif -#ifdef HAS_FASTCONVERTYTOARGBROW_SSE2 -#define YG 74 /* static_cast(1.164 * 64 + 0.5) */ - -vec16 kYToRgb = { YG, YG, YG, YG, YG, YG, YG, YG }; -vec16 kYSub16 = { 16, 16, 16, 16, 16, 16, 16, 16 }; -#endif - #ifdef HAS_FASTCONVERTYUVTOARGBROW_SSSE3 #define UB 127 /* min(63,static_cast(2.018 * 64)) */ #define UG -25 /* static_cast(-0.391 * 64 - 0.5) */ @@ -340,22 +333,7 @@ vec16 kYSub16 = { 16, 16, 16, 16, 16, 16, 16, 16 }; #define BG UG * 128 + VG * 128 #define BR UR * 128 + VR * 128 -vec8 kUVToB = { - UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB -}; - -vec8 kUVToR = { - UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR -}; - -vec8 kUVToG = { - UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG -}; - - -vec16 kUVBiasB = { BB, BB, BB, BB, BB, BB, BB, BB }; -vec16 kUVBiasG = { BG, BG, BG, BG, BG, BG, BG, BG }; -vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR }; +#define YG 74 /* static_cast(1.164 * 64 + 0.5) */ #if defined(__APPLE__) || defined(__x86_64__) #define OMITFP @@ -363,7 +341,27 @@ vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR }; #define OMITFP __attribute__((optimize("omit-frame-pointer"))) #endif -// This version produces 8 pixels +struct { + vec8 kUVToB; + vec8 kUVToG; + vec8 kUVToR; + vec16 kUVBiasB; + vec16 kUVBiasG; + vec16 kUVBiasR; + vec16 kYSub16; + vec16 kYToRgb; +} SIMD_ALIGNED(kYuvConstants) = { + { UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB }, + { UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG }, + { UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR }, + { BB, BB, BB, BB, BB, BB, BB, BB }, + { BG, BG, BG, BG, BG, BG, BG, BG }, + { BR, BR, BR, BR, BR, BR, BR, BR }, + { 16, 16, 16, 16, 16, 16, 16, 16 }, + { YG, YG, YG, YG, YG, YG, YG, YG } +}; + +// Convert 8 pixels #define YUVTORGB \ "movd (%1),%%xmm0 \n" \ "movd (%1,%2,1),%%xmm1 \n" \ @@ -372,17 +370,17 @@ vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR }; "punpcklwd %%xmm0,%%xmm0 \n" \ "movdqa %%xmm0,%%xmm1 \n" \ "movdqa %%xmm0,%%xmm2 \n" \ - "pmaddubsw %5,%%xmm0 \n" \ - "pmaddubsw %6,%%xmm1 \n" \ - "pmaddubsw %7,%%xmm2 \n" \ - "psubw %8,%%xmm0 \n" \ - "psubw %9,%%xmm1 \n" \ - "psubw %10,%%xmm2 \n" \ + "pmaddubsw (%5),%%xmm0 \n" \ + "pmaddubsw 16(%5),%%xmm1 \n" \ + "pmaddubsw 32(%5),%%xmm2 \n" \ + "psubw 48(%5),%%xmm0 \n" \ + "psubw 64(%5),%%xmm1 \n" \ + "psubw 80(%5),%%xmm2 \n" \ "movq (%0),%%xmm3 \n" \ "lea 0x8(%0),%0 \n" \ "punpcklbw %%xmm4,%%xmm3 \n" \ - "psubsw %11,%%xmm3 \n" \ - "pmullw %12,%%xmm3 \n" \ + "psubsw 96(%5),%%xmm3 \n" \ + "pmullw 112(%5),%%xmm3 \n" \ "paddw %%xmm3,%%xmm0 \n" \ "paddw %%xmm3,%%xmm1 \n" \ "paddw %%xmm3,%%xmm2 \n" \ @@ -420,14 +418,7 @@ void OMITFP FastConvertYUVToARGBRow_SSSE3(const uint8* y_buf, // rdi "+r"(v_buf), // %2 "+r"(rgb_buf), // %3 "+rm"(width) // %4 - : "m" (kUVToB), // %5 - "m" (kUVToG), // %6 - "m" (kUVToR), // %7 - "m" (kUVBiasB), // %8 - "m" (kUVBiasG), // %9 - "m" (kUVBiasR), // %10 - "m" (kYSub16), // %11 - "m" (kYToRgb) // %12 + : "r"(&kYuvConstants.kUVToB) // %5 : "memory", "cc" #if defined(__SSE2__) , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" @@ -463,14 +454,7 @@ void OMITFP FastConvertYUVToBGRARow_SSSE3(const uint8* y_buf, // rdi "+r"(v_buf), // %2 "+r"(rgb_buf), // %3 "+rm"(width) // %4 - : "m" (kUVToB), // %5 - "m" (kUVToG), // %6 - "m" (kUVToR), // %7 - "m" (kUVBiasB), // %8 - "m" (kUVBiasG), // %9 - "m" (kUVBiasR), // %10 - "m" (kYSub16), // %11 - "m" (kYToRgb) // %12 + : "r"(&kYuvConstants.kUVToB) // %5 : "memory", "cc" #if defined(__SSE2__) , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" @@ -508,14 +492,7 @@ void OMITFP FastConvertYUVToABGRRow_SSSE3(const uint8* y_buf, // rdi "+r"(v_buf), // %2 "+r"(rgb_buf), // %3 "+rm"(width) // %4 - : "m" (kUVToB), // %5 - "m" (kUVToG), // %6 - "m" (kUVToR), // %7 - "m" (kUVBiasB), // %8 - "m" (kUVBiasG), // %9 - "m" (kUVBiasR), // %10 - "m" (kYSub16), // %11 - "m" (kYToRgb) // %12 + : "r"(&kYuvConstants.kUVToB) // %5 : "memory", "cc" #if defined(__SSE2__) , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" @@ -540,17 +517,17 @@ void OMITFP FastConvertYUV444ToARGBRow_SSSE3(const uint8* y_buf, // rdi "punpcklbw %%xmm1,%%xmm0 \n" "movdqa %%xmm0,%%xmm1 \n" "movdqa %%xmm0,%%xmm2 \n" - "pmaddubsw %5,%%xmm0 \n" - "pmaddubsw %6,%%xmm1 \n" - "pmaddubsw %7,%%xmm2 \n" - "psubw %8,%%xmm0 \n" - "psubw %9,%%xmm1 \n" - "psubw %10,%%xmm2 \n" + "pmaddubsw (%5),%%xmm0 \n" + "pmaddubsw 16(%5),%%xmm1 \n" + "pmaddubsw 32(%5),%%xmm2 \n" + "psubw 48(%5),%%xmm0 \n" + "psubw 64(%5),%%xmm1 \n" + "psubw 80(%5),%%xmm2 \n" "movd (%0),%%xmm3 \n" "lea 0x4(%0),%0 \n" "punpcklbw %%xmm4,%%xmm3 \n" - "psubsw %11,%%xmm3 \n" - "pmullw %12,%%xmm3 \n" + "psubsw 96(%5),%%xmm3 \n" + "pmullw 112(%5),%%xmm3 \n" "paddw %%xmm3,%%xmm0 \n" "paddw %%xmm3,%%xmm1 \n" "paddw %%xmm3,%%xmm2 \n" @@ -572,14 +549,7 @@ void OMITFP FastConvertYUV444ToARGBRow_SSSE3(const uint8* y_buf, // rdi "+r"(v_buf), // %2 "+r"(rgb_buf), // %3 "+rm"(width) // %4 - : "m" (kUVToB), // %5 - "m" (kUVToG), // %6 - "m" (kUVToR), // %7 - "m" (kUVBiasB), // %8 - "m" (kUVBiasG), // %9 - "m" (kUVBiasR), // %10 - "m" (kYSub16), // %11 - "m" (kYToRgb) // %12 + : "r"(&kYuvConstants.kUVToB) // %5 : "memory", "cc" #if defined(__SSE2__) , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" @@ -625,8 +595,8 @@ void FastConvertYToARGBRow_SSE2(const uint8* y_buf, // rdi : "+r"(y_buf), // %0 "+r"(rgb_buf), // %1 "+rm"(width) // %2 - : "m" (kYSub16), // %3 - "m" (kYToRgb) // %4 + : "m"(kYuvConstants.kYSub16), // %3 + "m"(kYuvConstants.kYToRgb) // %4 : "memory", "cc" #if defined(__SSE2__) , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" diff --git a/source/row_win.cc b/source/row_win.cc index 12716fb03..2bc620f22 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -553,7 +553,7 @@ static const vec16 kUVBiasB = { BB, BB, BB, BB, BB, BB, BB, BB }; static const vec16 kUVBiasG = { BG, BG, BG, BG, BG, BG, BG, BG }; static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR }; -#define YUVTORGB __asm { \ +#define YUVTORGB __asm { \ /* Step 1: Find 4 UV contributions to 8 R,G,B values */ \ __asm movd xmm0, [esi] /* U */ \ __asm movd xmm1, [esi + edi] /* V */ \