From 8f439eac1dc6352c214d3797a2af5cee80ead300 Mon Sep 17 00:00:00 2001 From: "fbarchard@google.com" Date: Thu, 21 Jun 2012 02:00:34 +0000 Subject: [PATCH] ARGBColorMatrixRow_SSSE3 use signed psraw instead of psrlw before doing pack BUG=none TEST=none Review URL: https://webrtc-codereview.appspot.com/652009 git-svn-id: http://libyuv.googlecode.com/svn/trunk@291 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- README.chromium | 2 +- include/libyuv/version.h | 2 +- source/row_posix.cc | 16 ++++++++-------- source/row_win.cc | 19 ++++++++----------- unit_test/planar_test.cc | 10 +++++----- 5 files changed, 23 insertions(+), 26 deletions(-) diff --git a/README.chromium b/README.chromium index 578446ba4..75ee5f39e 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 290 +Version: 291 License: BSD License File: LICENSE diff --git a/include/libyuv/version.h b/include/libyuv/version.h index abf00a12c..233530a96 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,7 +11,7 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 290 +#define LIBYUV_VERSION 291 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/source/row_posix.cc b/source/row_posix.cc index da5e1e189..dcca4e8db 100644 --- a/source/row_posix.cc +++ b/source/row_posix.cc @@ -2881,23 +2881,23 @@ void ARGBColorMatrixRow_SSSE3(uint8* dst_argb, const int8* matrix_argb, "movdqa 0x10(%0),%%xmm6 \n" "pmaddubsw %%xmm2,%%xmm0 \n" "pmaddubsw %%xmm2,%%xmm6 \n" - "phaddw %%xmm6,%%xmm0 \n" - "psrlw $0x7,%%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" "movdqa (%0),%%xmm5 \n" "movdqa 0x10(%0),%%xmm1 \n" "pmaddubsw %%xmm3,%%xmm5 \n" "pmaddubsw %%xmm3,%%xmm1 \n" - "phaddw %%xmm1,%%xmm5 \n" - "psrlw $0x7,%%xmm5 \n" + "phaddsw %%xmm6,%%xmm0 \n" + "phaddsw %%xmm1,%%xmm5 \n" + "psraw $0x7,%%xmm0 \n" + "psraw $0x7,%%xmm5 \n" + "packuswb %%xmm0,%%xmm0 \n" "packuswb %%xmm5,%%xmm5 \n" "punpcklbw %%xmm5,%%xmm0 \n" "movdqa (%0),%%xmm5 \n" "movdqa 0x10(%0),%%xmm1 \n" "pmaddubsw %%xmm4,%%xmm5 \n" "pmaddubsw %%xmm4,%%xmm1 \n" - "phaddw %%xmm1,%%xmm5 \n" - "psrlw $0x7,%%xmm5 \n" + "phaddsw %%xmm1,%%xmm5 \n" + "psraw $0x7,%%xmm5 \n" "packuswb %%xmm5,%%xmm5 \n" "movdqa (%0),%%xmm6 \n" "movdqa 0x10(%0),%%xmm1 \n" @@ -2905,8 +2905,8 @@ void ARGBColorMatrixRow_SSSE3(uint8* dst_argb, const int8* matrix_argb, "psrld $0x18,%%xmm1 \n" "packuswb %%xmm1,%%xmm6 \n" "packuswb %%xmm6,%%xmm6 \n" - "punpcklbw %%xmm6,%%xmm5 \n" "movdqa %%xmm0,%%xmm1 \n" + "punpcklbw %%xmm6,%%xmm5 \n" "punpcklwd %%xmm5,%%xmm0 \n" "punpckhwd %%xmm5,%%xmm1 \n" "sub $0x8,%1 \n" diff --git a/source/row_win.cc b/source/row_win.cc index 47e9dff3e..0e73a0c58 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -2936,9 +2936,6 @@ void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) { // Same as Sepia except matrix is provided. // TODO(fbarchard): packuswbs only use half of the reg. To make RGBA, combine R // and B into a high and low, then G/A, unpackl/hbw and then unpckl/hwd. -// TODO(fbarchard): phaddw not paired. -// TODO(fbarchard): Test data copying from mem instead of from reg. -// TODO(fbarchard): packing and then unpacking the A - is simple pand/por faster __declspec(naked) __declspec(align(16)) void ARGBColorMatrixRow_SSSE3(uint8* dst_argb, const int8* matrix_argb, int width) { @@ -2959,23 +2956,23 @@ void ARGBColorMatrixRow_SSSE3(uint8* dst_argb, const int8* matrix_argb, movdqa xmm6, [eax + 16] pmaddubsw xmm0, xmm2 pmaddubsw xmm6, xmm2 - phaddw xmm0, xmm6 - psrlw xmm0, 7 - packuswb xmm0, xmm0 // 8 B values movdqa xmm5, [eax] // G movdqa xmm1, [eax + 16] pmaddubsw xmm5, xmm3 pmaddubsw xmm1, xmm3 - phaddw xmm5, xmm1 - psrlw xmm5, 7 + phaddsw xmm0, xmm6 // B + phaddsw xmm5, xmm1 // G + psraw xmm0, 7 // B + psraw xmm5, 7 // G + packuswb xmm0, xmm0 // 8 B values packuswb xmm5, xmm5 // 8 G values punpcklbw xmm0, xmm5 // 8 BG values movdqa xmm5, [eax] // R movdqa xmm1, [eax + 16] pmaddubsw xmm5, xmm4 pmaddubsw xmm1, xmm4 - phaddw xmm5, xmm1 - psrlw xmm5, 7 + phaddsw xmm5, xmm1 + psraw xmm5, 7 packuswb xmm5, xmm5 // 8 R values movdqa xmm6, [eax] // A movdqa xmm1, [eax + 16] @@ -2983,8 +2980,8 @@ void ARGBColorMatrixRow_SSSE3(uint8* dst_argb, const int8* matrix_argb, psrld xmm1, 24 packuswb xmm6, xmm1 packuswb xmm6, xmm6 // 8 A values - punpcklbw xmm5, xmm6 // 8 RA values movdqa xmm1, xmm0 // Weave BG, RA together + punpcklbw xmm5, xmm6 // 8 RA values punpcklwd xmm0, xmm5 // BGRA first 4 punpckhwd xmm1, xmm5 // BGRA next 4 sub ecx, 8 diff --git a/unit_test/planar_test.cc b/unit_test/planar_test.cc index a550af4e2..ea3b96d2e 100644 --- a/unit_test/planar_test.cc +++ b/unit_test/planar_test.cc @@ -218,9 +218,9 @@ TESTATOPLANAR(RGB24, 3, I420, 2, 2) TESTATOPLANAR(RGB565, 2, I420, 2, 2) TESTATOPLANAR(ARGB1555, 2, I420, 2, 2) TESTATOPLANAR(ARGB4444, 2, I420, 2, 2) -//TESTATOPLANAR(ARGB, 4, I411, 4, 1) +// TESTATOPLANAR(ARGB, 4, I411, 4, 1) TESTATOPLANAR(ARGB, 4, I422, 2, 1) -//TESTATOPLANAR(ARGB, 4, I444, 1, 1) +// TESTATOPLANAR(ARGB, 4, I444, 1, 1) // TODO(fbarchard): Implement and test 411 and 444 #define TESTATOB(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B) \ @@ -544,7 +544,7 @@ TEST_F(libyuvTest, TestARGBColorMatrix) { SIMD_ALIGNED(uint8 orig_pixels[256][4]); // Matrix for Sepia. - static const int8 kARGBToSepiaB[] = { + static const int8 kARGBToSepia[] = { 17, 68, 35, 0, 22, 88, 45, 0, 24, 98, 50, 0, @@ -571,7 +571,7 @@ TEST_F(libyuvTest, TestARGBColorMatrix) { orig_pixels[3][2] = 192u; orig_pixels[3][3] = 224u; // Do 16 to test asm version. - ARGBColorMatrix(&orig_pixels[0][0], 0, &kARGBToSepiaB[0], 0, 0, 16, 1); + ARGBColorMatrix(&orig_pixels[0][0], 0, &kARGBToSepia[0], 0, 0, 16, 1); EXPECT_EQ(33u, orig_pixels[0][0]); EXPECT_EQ(43u, orig_pixels[0][1]); EXPECT_EQ(47u, orig_pixels[0][2]); @@ -597,7 +597,7 @@ TEST_F(libyuvTest, TestARGBColorMatrix) { } for (int i = 0; i < 1000 * 1280 * 720 / 256; ++i) { - ARGBColorMatrix(&orig_pixels[0][0], 0, &kARGBToSepiaB[0], 0, 0, 256, 1); + ARGBColorMatrix(&orig_pixels[0][0], 0, &kARGBToSepia[0], 0, 0, 256, 1); } }