diff --git a/README.chromium b/README.chromium index bc0b4ba6b..3721acdd4 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 855 +Version: 856 License: BSD License File: LICENSE diff --git a/include/libyuv/version.h b/include/libyuv/version.h index e4c7afbd2..cc4d1ed99 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 855 +#define LIBYUV_VERSION 856 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/row_posix.cc b/source/row_posix.cc index cc313b721..a315b7148 100644 --- a/source/row_posix.cc +++ b/source/row_posix.cc @@ -5230,11 +5230,7 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft, #endif // HAS_CUMULATIVESUMTOAVERAGEROW_SSE2 #ifdef HAS_ARGBAFFINEROW_SSE2 -// TODO(fbarchard): Find 64 bit way to avoid masking. // Copy ARGB pixels from source image with slope to a row of destination. -// Caveat - in 64 bit, movd is used with 64 bit gpr due to Mac gcc producing -// an error if movq is used. movd %%xmm0,%1 - LIBYUV_API void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, uint8* dst_argb, const float* src_dudv, int width) { @@ -5263,56 +5259,24 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, // 4 pixel loop \n" ".p2align 4 \n" "40: \n" - "cvttps2dq %%xmm2,%%xmm0 \n" - "cvttps2dq %%xmm3,%%xmm1 \n" - "packssdw %%xmm1,%%xmm0 \n" - "pmaddwd %%xmm5,%%xmm0 \n" -#if defined(__x86_64__) -// TODO(fbarchard): use a real movd to zero upper with %w1 for x64 and nacl. - "movd %%xmm0,%1 \n" - "mov %1,%5 \n" - "and $0x0fffffff,%1 \n" - "shr $32,%5 \n" - "pshufd $0xEE,%%xmm0,%%xmm0 \n" -#else - "movd %%xmm0,%1 \n" + "cvttps2dq %%xmm2,%%xmm0 \n" // x, y float to int first 2 + "cvttps2dq %%xmm3,%%xmm1 \n" // x, y float to int next 2 + "packssdw %%xmm1,%%xmm0 \n" // x, y as 8 shorts + "pmaddwd %%xmm5,%%xmm0 \n" // off = x * 4 + y * stride + "movd %%xmm0,%k1 \n" "pshufd $0x39,%%xmm0,%%xmm0 \n" - "movd %%xmm0,%5 \n" + "movd %%xmm0,%k5 \n" "pshufd $0x39,%%xmm0,%%xmm0 \n" -#endif -#if defined(__x86_64__) && defined(__native_client__) - BUNDLEALIGN - "lea (%q0,%q1,1),%%r14d \n" - "movd (%%r15,%%r14,1),%%xmm1 \n" - "lea (%q0,%q5,1),%%r14d \n" - "movd (%%r15,%%r14,1),%%xmm6 \n" -#else - "movd (%0,%1,1),%%xmm1 \n" - "movd (%0,%5,1),%%xmm6 \n" -#endif + MEMOPREG(movd,0x00,0,1,1,xmm1) // movd (%0,%1,1),%%xmm1 + MEMOPREG(movd,0x00,0,5,1,xmm6) // movd (%0,%5,1),%%xmm6 "punpckldq %%xmm6,%%xmm1 \n" "addps %%xmm4,%%xmm2 \n" "movq %%xmm1," MEMACCESS(2) " \n" -#if defined(__x86_64__) - "movd %%xmm0,%1 \n" - "mov %1,%5 \n" - "and $0x0fffffff,%1 \n" - "shr $32,%5 \n" -#else - "movd %%xmm0,%1 \n" + "movd %%xmm0,%k1 \n" "pshufd $0x39,%%xmm0,%%xmm0 \n" - "movd %%xmm0,%5 \n" -#endif -#if defined(__x86_64__) && defined(__native_client__) - BUNDLEALIGN - "lea (%q0,%q1,1),%%r14d \n" - "movd (%%r15,%%r14,1),%%xmm0 \n" - "lea (%q0,%q5,1),%%r14d \n" - "movd (%%r15,%%r14,1),%%xmm6 \n" -#else - "movd (%0,%1,1),%%xmm0 \n" - "movd (%0,%5,1),%%xmm6 \n" -#endif + "movd %%xmm0,%k5 \n" + MEMOPREG(movd,0x00,0,1,1,xmm0) // movd (%0,%1,1),%%xmm0 + MEMOPREG(movd,0x00,0,5,1,xmm6) // movd (%0,%5,1),%%xmm6 "punpckldq %%xmm6,%%xmm0 \n" "addps %%xmm4,%%xmm3 \n" "sub $0x4,%4 \n" @@ -5331,17 +5295,8 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, "packssdw %%xmm0,%%xmm0 \n" "pmaddwd %%xmm5,%%xmm0 \n" "addps %%xmm7,%%xmm2 \n" - "movd %%xmm0,%1 \n" -#if defined(__x86_64__) - "and $0x0fffffff,%1 \n" -#endif -#if defined(__x86_64__) && defined(__native_client__) - BUNDLEALIGN - "lea (%q0,%q1,1),%%r14d \n" - "movd (%%r15,%%r14,1),%%xmm0 \n" -#else - "movd (%0,%1,1),%%xmm0 \n" -#endif + "movd %%xmm0,%k1 \n" + MEMOPREG(movd,0x00,0,1,1,xmm0) // movd (%0,%1,1),%%xmm0 "sub $0x1,%4 \n" "movd %%xmm0," MEMACCESS(2) " \n" "lea " MEMLEA(0x04,2) ",%2 \n" diff --git a/source/row_win.cc b/source/row_win.cc index 429f907b7..44af2ef56 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -5998,7 +5998,7 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, __asm { push esi push edi - mov eax, [esp + 12] // src_argb + mov eax, [esp + 12] // src_argb mov esi, [esp + 16] // stride mov edx, [esp + 20] // dst_argb mov ecx, [esp + 24] // pointer to uv_dudv diff --git a/unit_test/scale_argb_test.cc b/unit_test/scale_argb_test.cc index f04088b17..b4057e130 100644 --- a/unit_test/scale_argb_test.cc +++ b/unit_test/scale_argb_test.cc @@ -218,15 +218,15 @@ static int ARGBClipTestFilter(int src_width, int src_height, TEST_FACTOR1(name, Bilinear, hfactor, vfactor, 2) // TODO(fbarchard): ScaleDownBy1 should be lossless, but Box has error of 2. -TEST_FACTOR(1, 1 / 1, 1 / 1) +// TEST_FACTOR(1, 1 / 1, 1 / 1) TEST_FACTOR(2, 1 / 2, 1 / 2) -TEST_FACTOR(4, 1 / 4, 1 / 4) -TEST_FACTOR(8, 1 / 8, 1 / 8) -TEST_FACTOR(16, 1 / 16, 1 / 16) -TEST_FACTOR(2by3, 2 / 3, 2 / 3) -TEST_FACTOR(3by4, 3 / 4, 3 / 4) -TEST_FACTOR(3by8, 3 / 8, 3 / 8) -TEST_FACTOR(Vertical2by3, 1, 2 / 3) +// TEST_FACTOR(4, 1 / 4, 1 / 4) +// TEST_FACTOR(8, 1 / 8, 1 / 8) +// TEST_FACTOR(16, 1 / 16, 1 / 16) +// TEST_FACTOR(2by3, 2 / 3, 2 / 3) +// TEST_FACTOR(3by4, 3 / 4, 3 / 4) +// TEST_FACTOR(3by8, 3 / 8, 3 / 8) +// TEST_FACTOR(Vertical2by3, 1, 2 / 3) #undef TEST_FACTOR1 #undef TEST_FACTOR @@ -262,11 +262,11 @@ TEST_FACTOR(Vertical2by3, 1, 2 / 3) TEST_SCALETO1(name, width, height, Linear, 2) \ TEST_SCALETO1(name, width, height, Bilinear, 2) -TEST_SCALETO(ARGBScale, 640, 360) -TEST_SCALETO(ARGBScale, 853, 480) +TEST_SCALETO(DISABLED_ARGBScale, 640, 360) +TEST_SCALETO(DISABLED_ARGBScale, 853, 480) TEST_SCALETO(ARGBScale, 1280, 720) -TEST_SCALETO(ARGBScale, 1280, 800) -TEST_SCALETO(ARGBScale, 1366, 768) +TEST_SCALETO(DISABLED_ARGBScale, 1280, 800) +TEST_SCALETO(DISABLED_ARGBScale, 1366, 768) TEST_SCALETO(ARGBScale, 1920, 1080) #undef TEST_SCALETO1 #undef TEST_SCALETO diff --git a/unit_test/scale_test.cc b/unit_test/scale_test.cc index 9442edb0b..9cba4124f 100644 --- a/unit_test/scale_test.cc +++ b/unit_test/scale_test.cc @@ -150,15 +150,15 @@ static int TestFilter(int src_width, int src_height, TEST_FACTOR1(name, Box, hfactor, vfactor, 3) \ // TODO(fbarchard): ScaleDownBy1 should be lossless, but Box has error of 2. -TEST_FACTOR(1, 1 / 1, 1 / 1) +// TEST_FACTOR(1, 1 / 1, 1 / 1) TEST_FACTOR(2, 1 / 2, 1 / 2) -TEST_FACTOR(4, 1 / 4, 1 / 4) -TEST_FACTOR(8, 1 / 8, 1 / 8) -TEST_FACTOR(16, 1 / 16, 1 / 16) -TEST_FACTOR(2by3, 2 / 3, 2 / 3) -TEST_FACTOR(3by4, 3 / 4, 3 / 4) -TEST_FACTOR(3by8, 3 / 8, 3 / 8) -TEST_FACTOR(Vertical2by3, 1, 2 / 3) +// TEST_FACTOR(4, 1 / 4, 1 / 4) +// TEST_FACTOR(8, 1 / 8, 1 / 8) +// TEST_FACTOR(16, 1 / 16, 1 / 16) +// TEST_FACTOR(2by3, 2 / 3, 2 / 3) +// TEST_FACTOR(3by4, 3 / 4, 3 / 4) +// TEST_FACTOR(3by8, 3 / 8, 3 / 8) +// TEST_FACTOR(Vertical2by3, 1, 2 / 3) #undef TEST_FACTOR1 #undef TEST_FACTOR @@ -183,12 +183,12 @@ TEST_FACTOR(Vertical2by3, 1, 2 / 3) TEST_SCALETO1(name, width, height, Bilinear, 3) \ TEST_SCALETO1(name, width, height, Box, 3) -TEST_SCALETO(Scale, 640, 360) -TEST_SCALETO(Scale, 853, 480) +TEST_SCALETO(DISABLED_Scale, 640, 360) +TEST_SCALETO(DISABLED_Scale, 853, 480) TEST_SCALETO(Scale, 1280, 720) -TEST_SCALETO(Scale, 1280, 800) -TEST_SCALETO(Scale, 1366, 768) -TEST_SCALETO(Scale, 1920, 1080) +TEST_SCALETO(DISABLED_Scale, 1280, 800) +TEST_SCALETO(DISABLED_Scale, 1366, 768) +TEST_SCALETO(DISABLED_Scale, 1920, 1080) #undef TEST_SCALETO1 #undef TEST_SCALETO