mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
ARGBColorMatrixRow_SSSE3 use signed psraw instead of psrlw before doing pack
BUG=none TEST=none Review URL: https://webrtc-codereview.appspot.com/652009 git-svn-id: http://libyuv.googlecode.com/svn/trunk@291 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
d9eb63fc16
commit
8f439eac1d
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 290
|
||||
Version: 291
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -11,7 +11,7 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 290
|
||||
#define LIBYUV_VERSION 291
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
|
||||
@ -2881,23 +2881,23 @@ void ARGBColorMatrixRow_SSSE3(uint8* dst_argb, const int8* matrix_argb,
|
||||
"movdqa 0x10(%0),%%xmm6 \n"
|
||||
"pmaddubsw %%xmm2,%%xmm0 \n"
|
||||
"pmaddubsw %%xmm2,%%xmm6 \n"
|
||||
"phaddw %%xmm6,%%xmm0 \n"
|
||||
"psrlw $0x7,%%xmm0 \n"
|
||||
"packuswb %%xmm0,%%xmm0 \n"
|
||||
"movdqa (%0),%%xmm5 \n"
|
||||
"movdqa 0x10(%0),%%xmm1 \n"
|
||||
"pmaddubsw %%xmm3,%%xmm5 \n"
|
||||
"pmaddubsw %%xmm3,%%xmm1 \n"
|
||||
"phaddw %%xmm1,%%xmm5 \n"
|
||||
"psrlw $0x7,%%xmm5 \n"
|
||||
"phaddsw %%xmm6,%%xmm0 \n"
|
||||
"phaddsw %%xmm1,%%xmm5 \n"
|
||||
"psraw $0x7,%%xmm0 \n"
|
||||
"psraw $0x7,%%xmm5 \n"
|
||||
"packuswb %%xmm0,%%xmm0 \n"
|
||||
"packuswb %%xmm5,%%xmm5 \n"
|
||||
"punpcklbw %%xmm5,%%xmm0 \n"
|
||||
"movdqa (%0),%%xmm5 \n"
|
||||
"movdqa 0x10(%0),%%xmm1 \n"
|
||||
"pmaddubsw %%xmm4,%%xmm5 \n"
|
||||
"pmaddubsw %%xmm4,%%xmm1 \n"
|
||||
"phaddw %%xmm1,%%xmm5 \n"
|
||||
"psrlw $0x7,%%xmm5 \n"
|
||||
"phaddsw %%xmm1,%%xmm5 \n"
|
||||
"psraw $0x7,%%xmm5 \n"
|
||||
"packuswb %%xmm5,%%xmm5 \n"
|
||||
"movdqa (%0),%%xmm6 \n"
|
||||
"movdqa 0x10(%0),%%xmm1 \n"
|
||||
@ -2905,8 +2905,8 @@ void ARGBColorMatrixRow_SSSE3(uint8* dst_argb, const int8* matrix_argb,
|
||||
"psrld $0x18,%%xmm1 \n"
|
||||
"packuswb %%xmm1,%%xmm6 \n"
|
||||
"packuswb %%xmm6,%%xmm6 \n"
|
||||
"punpcklbw %%xmm6,%%xmm5 \n"
|
||||
"movdqa %%xmm0,%%xmm1 \n"
|
||||
"punpcklbw %%xmm6,%%xmm5 \n"
|
||||
"punpcklwd %%xmm5,%%xmm0 \n"
|
||||
"punpckhwd %%xmm5,%%xmm1 \n"
|
||||
"sub $0x8,%1 \n"
|
||||
|
||||
@ -2936,9 +2936,6 @@ void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) {
|
||||
// Same as Sepia except matrix is provided.
|
||||
// TODO(fbarchard): packuswbs only use half of the reg. To make RGBA, combine R
|
||||
// and B into a high and low, then G/A, unpackl/hbw and then unpckl/hwd.
|
||||
// TODO(fbarchard): phaddw not paired.
|
||||
// TODO(fbarchard): Test data copying from mem instead of from reg.
|
||||
// TODO(fbarchard): packing and then unpacking the A - is simple pand/por faster
|
||||
__declspec(naked) __declspec(align(16))
|
||||
void ARGBColorMatrixRow_SSSE3(uint8* dst_argb, const int8* matrix_argb,
|
||||
int width) {
|
||||
@ -2959,23 +2956,23 @@ void ARGBColorMatrixRow_SSSE3(uint8* dst_argb, const int8* matrix_argb,
|
||||
movdqa xmm6, [eax + 16]
|
||||
pmaddubsw xmm0, xmm2
|
||||
pmaddubsw xmm6, xmm2
|
||||
phaddw xmm0, xmm6
|
||||
psrlw xmm0, 7
|
||||
packuswb xmm0, xmm0 // 8 B values
|
||||
movdqa xmm5, [eax] // G
|
||||
movdqa xmm1, [eax + 16]
|
||||
pmaddubsw xmm5, xmm3
|
||||
pmaddubsw xmm1, xmm3
|
||||
phaddw xmm5, xmm1
|
||||
psrlw xmm5, 7
|
||||
phaddsw xmm0, xmm6 // B
|
||||
phaddsw xmm5, xmm1 // G
|
||||
psraw xmm0, 7 // B
|
||||
psraw xmm5, 7 // G
|
||||
packuswb xmm0, xmm0 // 8 B values
|
||||
packuswb xmm5, xmm5 // 8 G values
|
||||
punpcklbw xmm0, xmm5 // 8 BG values
|
||||
movdqa xmm5, [eax] // R
|
||||
movdqa xmm1, [eax + 16]
|
||||
pmaddubsw xmm5, xmm4
|
||||
pmaddubsw xmm1, xmm4
|
||||
phaddw xmm5, xmm1
|
||||
psrlw xmm5, 7
|
||||
phaddsw xmm5, xmm1
|
||||
psraw xmm5, 7
|
||||
packuswb xmm5, xmm5 // 8 R values
|
||||
movdqa xmm6, [eax] // A
|
||||
movdqa xmm1, [eax + 16]
|
||||
@ -2983,8 +2980,8 @@ void ARGBColorMatrixRow_SSSE3(uint8* dst_argb, const int8* matrix_argb,
|
||||
psrld xmm1, 24
|
||||
packuswb xmm6, xmm1
|
||||
packuswb xmm6, xmm6 // 8 A values
|
||||
punpcklbw xmm5, xmm6 // 8 RA values
|
||||
movdqa xmm1, xmm0 // Weave BG, RA together
|
||||
punpcklbw xmm5, xmm6 // 8 RA values
|
||||
punpcklwd xmm0, xmm5 // BGRA first 4
|
||||
punpckhwd xmm1, xmm5 // BGRA next 4
|
||||
sub ecx, 8
|
||||
|
||||
@ -544,7 +544,7 @@ TEST_F(libyuvTest, TestARGBColorMatrix) {
|
||||
SIMD_ALIGNED(uint8 orig_pixels[256][4]);
|
||||
|
||||
// Matrix for Sepia.
|
||||
static const int8 kARGBToSepiaB[] = {
|
||||
static const int8 kARGBToSepia[] = {
|
||||
17, 68, 35, 0,
|
||||
22, 88, 45, 0,
|
||||
24, 98, 50, 0,
|
||||
@ -571,7 +571,7 @@ TEST_F(libyuvTest, TestARGBColorMatrix) {
|
||||
orig_pixels[3][2] = 192u;
|
||||
orig_pixels[3][3] = 224u;
|
||||
// Do 16 to test asm version.
|
||||
ARGBColorMatrix(&orig_pixels[0][0], 0, &kARGBToSepiaB[0], 0, 0, 16, 1);
|
||||
ARGBColorMatrix(&orig_pixels[0][0], 0, &kARGBToSepia[0], 0, 0, 16, 1);
|
||||
EXPECT_EQ(33u, orig_pixels[0][0]);
|
||||
EXPECT_EQ(43u, orig_pixels[0][1]);
|
||||
EXPECT_EQ(47u, orig_pixels[0][2]);
|
||||
@ -597,7 +597,7 @@ TEST_F(libyuvTest, TestARGBColorMatrix) {
|
||||
}
|
||||
|
||||
for (int i = 0; i < 1000 * 1280 * 720 / 256; ++i) {
|
||||
ARGBColorMatrix(&orig_pixels[0][0], 0, &kARGBToSepiaB[0], 0, 0, 256, 1);
|
||||
ARGBColorMatrix(&orig_pixels[0][0], 0, &kARGBToSepia[0], 0, 0, 256, 1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user