From 8e16c1a341f09ac4ad5b142dd36af57fcfe4f10a Mon Sep 17 00:00:00 2001 From: "fbarchard@google.com" Date: Fri, 13 Feb 2015 21:53:16 +0000 Subject: [PATCH] Switch to macro for STOREBGRA etc on Posix SSSE3 BUG=393 TESTED=try bots R=harryjin@google.com Review URL: https://webrtc-codereview.appspot.com/33339004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@1282 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- source/row_posix.cc | 64 +++++++++++++++++++++++++++------------------ 1 file changed, 38 insertions(+), 26 deletions(-) diff --git a/source/row_posix.cc b/source/row_posix.cc index ae15e0ceb..1a6f7dc4d 100644 --- a/source/row_posix.cc +++ b/source/row_posix.cc @@ -1537,6 +1537,41 @@ static YuvConstants SIMD_ALIGNED(kYvuConstants) = { "movdqu %%xmm1," MEMACCESS2(0x10,[dst_argb]) " \n" \ "lea " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n" +// Store 8 BGRA values. Assumes XMM5 is zero. +#define STOREBGRA \ + "pcmpeqb %%xmm5,%%xmm5 \n" \ + "punpcklbw %%xmm0,%%xmm1 \n" \ + "punpcklbw %%xmm2,%%xmm5 \n" \ + "movdqa %%xmm5,%%xmm0 \n" \ + "punpcklwd %%xmm1,%%xmm5 \n" \ + "punpckhwd %%xmm1,%%xmm0 \n" \ + "movdqu %%xmm5," MEMACCESS([dst_bgra]) " \n" \ + "movdqu %%xmm0," MEMACCESS2(0x10,[dst_bgra]) " \n" \ + "lea " MEMLEA(0x20,[dst_bgra]) ",%[dst_bgra] \n" + +// Store 8 ABGR values. Assumes XMM5 is zero. +#define STOREABGR \ + "punpcklbw %%xmm1,%%xmm2 \n" \ + "punpcklbw %%xmm5,%%xmm0 \n" \ + "movdqa %%xmm2,%%xmm1 \n" \ + "punpcklwd %%xmm0,%%xmm2 \n" \ + "punpckhwd %%xmm0,%%xmm1 \n" \ + "movdqu %%xmm2," MEMACCESS([dst_abgr]) " \n" \ + "movdqu %%xmm1," MEMACCESS2(0x10,[dst_abgr]) " \n" \ + "lea " MEMLEA(0x20,[dst_abgr]) ",%[dst_abgr] \n" + +// Store 8 RGBA values. Assumes XMM5 is zero. +#define STORERGBA \ + "pcmpeqb %%xmm5,%%xmm5 \n" \ + "punpcklbw %%xmm2,%%xmm1 \n" \ + "punpcklbw %%xmm0,%%xmm5 \n" \ + "movdqa %%xmm5,%%xmm0 \n" \ + "punpcklwd %%xmm1,%%xmm5 \n" \ + "punpckhwd %%xmm1,%%xmm0 \n" \ + "movdqu %%xmm5," MEMACCESS([dst_rgba]) " \n" \ + "movdqu %%xmm0," MEMACCESS2(0x10,[dst_rgba]) " \n" \ + "lea " MEMLEA(0x20,[dst_rgba]) ",%[dst_rgba] \n" + void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, @@ -1762,15 +1797,7 @@ void OMITFP I422ToBGRARow_SSSE3(const uint8* y_buf, "1: \n" READYUV422 YUVTORGB(kYuvConstants) - "pcmpeqb %%xmm5,%%xmm5 \n" - "punpcklbw %%xmm0,%%xmm1 \n" - "punpcklbw %%xmm2,%%xmm5 \n" - "movdqa %%xmm5,%%xmm0 \n" - "punpcklwd %%xmm1,%%xmm5 \n" - "punpckhwd %%xmm1,%%xmm0 \n" - "movdqu %%xmm5," MEMACCESS([dst_bgra]) "\n" - "movdqu %%xmm0," MEMACCESS2(0x10,[dst_bgra]) "\n" - "lea " MEMLEA(0x20,[dst_bgra]) ",%[dst_bgra] \n" + STOREBGRA "sub $0x8,%[width] \n" "jg 1b \n" : [y_buf]"+r"(y_buf), // %[y_buf] @@ -1796,14 +1823,7 @@ void OMITFP I422ToABGRRow_SSSE3(const uint8* y_buf, "1: \n" READYUV422 YUVTORGB(kYuvConstants) - "punpcklbw %%xmm1,%%xmm2 \n" - "punpcklbw %%xmm5,%%xmm0 \n" - "movdqa %%xmm2,%%xmm1 \n" - "punpcklwd %%xmm0,%%xmm2 \n" - "punpckhwd %%xmm0,%%xmm1 \n" - "movdqu %%xmm2," MEMACCESS([dst_abgr]) "\n" - "movdqu %%xmm1," MEMACCESS2(0x10,[dst_abgr]) "\n" - "lea " MEMLEA(0x20,[dst_abgr]) ",%[dst_abgr] \n" + STOREABGR "sub $0x8,%[width] \n" "jg 1b \n" : [y_buf]"+r"(y_buf), // %[y_buf] @@ -1829,15 +1849,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf, "1: \n" READYUV422 YUVTORGB(kYuvConstants) - "pcmpeqb %%xmm5,%%xmm5 \n" - "punpcklbw %%xmm2,%%xmm1 \n" - "punpcklbw %%xmm0,%%xmm5 \n" - "movdqa %%xmm5,%%xmm0 \n" - "punpcklwd %%xmm1,%%xmm5 \n" - "punpckhwd %%xmm1,%%xmm0 \n" - "movdqu %%xmm5," MEMACCESS([dst_rgba]) "\n" - "movdqu %%xmm0," MEMACCESS2(0x10,[dst_rgba]) "\n" - "lea " MEMLEA(0x20,[dst_rgba]) ",%[dst_rgba] \n" + STORERGBA "sub $0x8,%[width] \n" "jg 1b \n" : [y_buf]"+r"(y_buf), // %[y_buf]