diff --git a/README.chromium b/README.chromium index 6cd2d60ee..eccd8c6b4 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1115 +Version: 1116 License: BSD License File: LICENSE diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 186dcbd2d..07d38756a 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -100,8 +100,8 @@ extern "C" { #define HAS_SOBELYROW_SSE2 // Conversions: -//#define HAS_ARGBTOUVROW_SSSE3 -//#define HAS_ABGRTOUVROW_SSSE3 +#define HAS_ARGBTOUVROW_SSSE3 +#define HAS_ABGRTOUVROW_SSSE3 #define HAS_ABGRTOYROW_SSSE3 #define HAS_ARGB1555TOARGBROW_SSE2 #define HAS_ARGB4444TOARGBROW_SSE2 @@ -116,10 +116,10 @@ extern "C" { #define HAS_ARGBTORGB565ROW_SSE2 #define HAS_ARGBTOUV422ROW_SSSE3 #define HAS_ARGBTOUV444ROW_SSSE3 -//#define HAS_ARGBTOUVJROW_SSSE3 +#define HAS_ARGBTOUVJROW_SSSE3 #define HAS_ARGBTOYJROW_SSSE3 #define HAS_ARGBTOYROW_SSSE3 -//#define HAS_BGRATOUVROW_SSSE3 +#define HAS_BGRATOUVROW_SSSE3 #define HAS_BGRATOYROW_SSSE3 #define HAS_COPYROW_ERMS #define HAS_COPYROW_SSE2 @@ -153,7 +153,7 @@ extern "C" { #define HAS_RGB24TOARGBROW_SSSE3 #define HAS_RGB24TOYROW_SSSE3 #define HAS_RGB565TOARGBROW_SSE2 -//#define HAS_RGBATOUVROW_SSSE3 +#define HAS_RGBATOUVROW_SSSE3 #define HAS_RGBATOYROW_SSSE3 #define HAS_SETROW_X86 #define HAS_SPLITUVROW_SSE2 diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 772dc4332..c1f4bdab0 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1115 +#define LIBYUV_VERSION 1116 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/row_posix.cc b/source/row_posix.cc index 90dc5f22c..55aa9a18a 100644 --- a/source/row_posix.cc +++ b/source/row_posix.cc @@ -807,14 +807,18 @@ void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, LABELALIGN "1: \n" "movdqu " MEMACCESS(0) ",%%xmm0 \n" + MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7 + "pavgb %%xmm7,%%xmm0 \n" "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" + MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7 + "pavgb %%xmm7,%%xmm1 \n" "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" + MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7 + "pavgb %%xmm7,%%xmm2 \n" "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n" - BUNDLEALIGN - MEMOPREG(pavgb,0x00,0,4,1,xmm0) // pavgb (%0,%4,1),%%xmm0 - MEMOPREG(pavgb,0x10,0,4,1,xmm1) // pavgb 0x10(%0,%4,1),%%xmm1 - MEMOPREG(pavgb,0x20,0,4,1,xmm2) // pavgb 0x20(%0,%4,1),%%xmm2 - MEMOPREG(pavgb,0x30,0,4,1,xmm6) // pavgb 0x30(%0,%4,1),%%xmm6 + MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7 + "pavgb %%xmm7,%%xmm6 \n" + "lea " MEMLEA(0x40,0) ",%0 \n" "movdqa %%xmm0,%%xmm7 \n" "shufps $0x88,%%xmm1,%%xmm0 \n" @@ -876,14 +880,18 @@ void ARGBToUVJRow_SSSE3(const uint8* src_argb0, int src_stride_argb, LABELALIGN "1: \n" "movdqu " MEMACCESS(0) ",%%xmm0 \n" + MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7 + "pavgb %%xmm7,%%xmm0 \n" "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" + MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7 + "pavgb %%xmm7,%%xmm1 \n" "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" + MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7 + "pavgb %%xmm7,%%xmm2 \n" "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n" - BUNDLEALIGN - MEMOPREG(pavgb,0x00,0,4,1,xmm0) // pavgb (%0,%4,1),%%xmm0 - MEMOPREG(pavgb,0x10,0,4,1,xmm1) // pavgb 0x10(%0,%4,1),%%xmm1 - MEMOPREG(pavgb,0x20,0,4,1,xmm2) // pavgb 0x20(%0,%4,1),%%xmm2 - MEMOPREG(pavgb,0x30,0,4,1,xmm6) // pavgb 0x30(%0,%4,1),%%xmm6 + MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7 + "pavgb %%xmm7,%%xmm6 \n" + "lea " MEMLEA(0x40,0) ",%0 \n" "movdqa %%xmm0,%%xmm7 \n" "shufps $0x88,%%xmm1,%%xmm0 \n" @@ -1111,14 +1119,18 @@ void BGRAToUVRow_SSSE3(const uint8* src_bgra0, int src_stride_bgra, LABELALIGN "1: \n" "movdqu " MEMACCESS(0) ",%%xmm0 \n" + MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7 + "pavgb %%xmm7,%%xmm0 \n" "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" + MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7 + "pavgb %%xmm7,%%xmm1 \n" "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" + MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7 + "pavgb %%xmm7,%%xmm2 \n" "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n" - BUNDLEALIGN - MEMOPREG(pavgb,0x00,0,4,1,xmm0) // pavgb (%0,%4,1),%%xmm0 - MEMOPREG(pavgb,0x10,0,4,1,xmm1) // pavgb 0x10(%0,%4,1),%%xmm1 - MEMOPREG(pavgb,0x20,0,4,1,xmm2) // pavgb 0x20(%0,%4,1),%%xmm2 - MEMOPREG(pavgb,0x30,0,4,1,xmm6) // pavgb 0x30(%0,%4,1),%%xmm6 + MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7 + "pavgb %%xmm7,%%xmm6 \n" + "lea " MEMLEA(0x40,0) ",%0 \n" "movdqa %%xmm0,%%xmm7 \n" "shufps $0x88,%%xmm1,%%xmm0 \n" @@ -1251,14 +1263,18 @@ void ABGRToUVRow_SSSE3(const uint8* src_abgr0, int src_stride_abgr, LABELALIGN "1: \n" "movdqu " MEMACCESS(0) ",%%xmm0 \n" + MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7 + "pavgb %%xmm7,%%xmm0 \n" "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" + MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7 + "pavgb %%xmm7,%%xmm1 \n" "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" + MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7 + "pavgb %%xmm7,%%xmm2 \n" "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n" - BUNDLEALIGN - MEMOPREG(pavgb,0x00,0,4,1,xmm0) // pavgb (%0,%4,1),%%xmm0 - MEMOPREG(pavgb,0x10,0,4,1,xmm1) // pavgb 0x10(%0,%4,1),%%xmm1 - MEMOPREG(pavgb,0x20,0,4,1,xmm2) // pavgb 0x20(%0,%4,1),%%xmm2 - MEMOPREG(pavgb,0x30,0,4,1,xmm6) // pavgb 0x30(%0,%4,1),%%xmm6 + MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7 + "pavgb %%xmm7,%%xmm6 \n" + "lea " MEMLEA(0x40,0) ",%0 \n" "movdqa %%xmm0,%%xmm7 \n" "shufps $0x88,%%xmm1,%%xmm0 \n" @@ -1317,14 +1333,18 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0, int src_stride_rgba, LABELALIGN "1: \n" "movdqu " MEMACCESS(0) ",%%xmm0 \n" + MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7 + "pavgb %%xmm7,%%xmm0 \n" "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" + MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7 + "pavgb %%xmm7,%%xmm1 \n" "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" + MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7 + "pavgb %%xmm7,%%xmm2 \n" "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n" - BUNDLEALIGN - MEMOPREG(pavgb,0x00,0,4,1,xmm0) // pavgb (%0,%4,1),%%xmm0 - MEMOPREG(pavgb,0x10,0,4,1,xmm1) // pavgb 0x10(%0,%4,1),%%xmm1 - MEMOPREG(pavgb,0x20,0,4,1,xmm2) // pavgb 0x20(%0,%4,1),%%xmm2 - MEMOPREG(pavgb,0x30,0,4,1,xmm6) // pavgb 0x30(%0,%4,1),%%xmm6 + MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7 + "pavgb %%xmm7,%%xmm6 \n" + "lea " MEMLEA(0x40,0) ",%0 \n" "movdqa %%xmm0,%%xmm7 \n" "shufps $0x88,%%xmm1,%%xmm0 \n" diff --git a/source/row_win.cc b/source/row_win.cc index a5b99d7b0..e26a62275 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -977,13 +977,18 @@ void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, convertloop: /* step 1 - subsample 16x2 argb pixels to 8x1 */ movdqu xmm0, [eax] + movdqu xmm4, [eax + esi] + pavgb xmm0, xmm4 movdqu xmm1, [eax + 16] + movdqu xmm4, [eax + esi + 16] + pavgb xmm1, xmm4 movdqu xmm2, [eax + 32] + movdqu xmm4, [eax + esi + 32] + pavgb xmm2, xmm4 movdqu xmm3, [eax + 48] - pavgb xmm0, [eax + esi] - pavgb xmm1, [eax + esi + 16] - pavgb xmm2, [eax + esi + 32] - pavgb xmm3, [eax + esi + 48] + movdqu xmm4, [eax + esi + 48] + pavgb xmm3, xmm4 + lea eax, [eax + 64] movdqa xmm4, xmm0 shufps xmm0, xmm1, 0x88 @@ -1043,13 +1048,18 @@ void ARGBToUVJRow_SSSE3(const uint8* src_argb0, int src_stride_argb, convertloop: /* step 1 - subsample 16x2 argb pixels to 8x1 */ movdqu xmm0, [eax] + movdqu xmm4, [eax + esi] + pavgb xmm0, xmm4 movdqu xmm1, [eax + 16] + movdqu xmm4, [eax + esi + 16] + pavgb xmm1, xmm4 movdqu xmm2, [eax + 32] + movdqu xmm4, [eax + esi + 32] + pavgb xmm2, xmm4 movdqu xmm3, [eax + 48] - pavgb xmm0, [eax + esi] - pavgb xmm1, [eax + esi + 16] - pavgb xmm2, [eax + esi + 32] - pavgb xmm3, [eax + esi + 48] + movdqu xmm4, [eax + esi + 48] + pavgb xmm3, xmm4 + lea eax, [eax + 64] movdqa xmm4, xmm0 shufps xmm0, xmm1, 0x88 @@ -1294,13 +1304,18 @@ void BGRAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, convertloop: /* step 1 - subsample 16x2 argb pixels to 8x1 */ movdqu xmm0, [eax] + movdqu xmm4, [eax + esi] + pavgb xmm0, xmm4 movdqu xmm1, [eax + 16] + movdqu xmm4, [eax + esi + 16] + pavgb xmm1, xmm4 movdqu xmm2, [eax + 32] + movdqu xmm4, [eax + esi + 32] + pavgb xmm2, xmm4 movdqu xmm3, [eax + 48] - pavgb xmm0, [eax + esi] - pavgb xmm1, [eax + esi + 16] - pavgb xmm2, [eax + esi + 32] - pavgb xmm3, [eax + esi + 48] + movdqu xmm4, [eax + esi + 48] + pavgb xmm3, xmm4 + lea eax, [eax + 64] movdqa xmm4, xmm0 shufps xmm0, xmm1, 0x88 @@ -1360,13 +1375,18 @@ void ABGRToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, convertloop: /* step 1 - subsample 16x2 argb pixels to 8x1 */ movdqu xmm0, [eax] + movdqu xmm4, [eax + esi] + pavgb xmm0, xmm4 movdqu xmm1, [eax + 16] + movdqu xmm4, [eax + esi + 16] + pavgb xmm1, xmm4 movdqu xmm2, [eax + 32] + movdqu xmm4, [eax + esi + 32] + pavgb xmm2, xmm4 movdqu xmm3, [eax + 48] - pavgb xmm0, [eax + esi] - pavgb xmm1, [eax + esi + 16] - pavgb xmm2, [eax + esi + 32] - pavgb xmm3, [eax + esi + 48] + movdqu xmm4, [eax + esi + 48] + pavgb xmm3, xmm4 + lea eax, [eax + 64] movdqa xmm4, xmm0 shufps xmm0, xmm1, 0x88 @@ -1426,13 +1446,18 @@ void RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, convertloop: /* step 1 - subsample 16x2 argb pixels to 8x1 */ movdqu xmm0, [eax] + movdqu xmm4, [eax + esi] + pavgb xmm0, xmm4 movdqu xmm1, [eax + 16] + movdqu xmm4, [eax + esi + 16] + pavgb xmm1, xmm4 movdqu xmm2, [eax + 32] + movdqu xmm4, [eax + esi + 32] + pavgb xmm2, xmm4 movdqu xmm3, [eax + 48] - pavgb xmm0, [eax + esi] - pavgb xmm1, [eax + esi + 16] - pavgb xmm2, [eax + esi + 32] - pavgb xmm3, [eax + esi + 48] + movdqu xmm4, [eax + esi + 48] + pavgb xmm3, xmm4 + lea eax, [eax + 64] movdqa xmm4, xmm0 shufps xmm0, xmm1, 0x88