mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 01:06:46 +08:00
use movdqu on 2nd source for blend
BUG=none TEST=none Review URL: https://webrtc-codereview.appspot.com/479001 git-svn-id: http://libyuv.googlecode.com/svn/trunk@235 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
d2f4413d29
commit
1702ec78f8
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: http://code.google.com/p/libyuv/
|
URL: http://code.google.com/p/libyuv/
|
||||||
Version: 234
|
Version: 235
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|
||||||
|
|||||||
@ -11,7 +11,7 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 234
|
#define LIBYUV_VERSION 235
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
|
|||||||
@ -2048,14 +2048,14 @@ void ARGBBlendRow_Aligned_SSE2(const uint8* src_argb0, const uint8* src_argb1,
|
|||||||
"movdqu (%0),%%xmm3 \n" // first 4 pixels
|
"movdqu (%0),%%xmm3 \n" // first 4 pixels
|
||||||
"movdqa %%xmm3,%%xmm0 \n"
|
"movdqa %%xmm3,%%xmm0 \n"
|
||||||
"pxor %%xmm4,%%xmm3 \n"
|
"pxor %%xmm4,%%xmm3 \n"
|
||||||
"movdqa (%1),%%xmm2 \n"
|
"movdqu (%1),%%xmm2 \n"
|
||||||
"psrlw $0x8,%%xmm3 \n"
|
"psrlw $0x8,%%xmm3 \n"
|
||||||
"pshufhw $0xf5,%%xmm3,%%xmm3 \n"
|
"pshufhw $0xf5,%%xmm3,%%xmm3 \n"
|
||||||
"pshuflw $0xf5,%%xmm3,%%xmm3 \n"
|
"pshuflw $0xf5,%%xmm3,%%xmm3 \n"
|
||||||
"pand %%xmm6,%%xmm2 \n"
|
"pand %%xmm6,%%xmm2 \n"
|
||||||
"paddw %%xmm7,%%xmm3 \n"
|
"paddw %%xmm7,%%xmm3 \n"
|
||||||
"pmullw %%xmm3,%%xmm2 \n"
|
"pmullw %%xmm3,%%xmm2 \n"
|
||||||
"movdqa (%1),%%xmm1 \n"
|
"movdqu (%1),%%xmm1 \n"
|
||||||
"psrlw $0x8,%%xmm1 \n"
|
"psrlw $0x8,%%xmm1 \n"
|
||||||
"por %%xmm4,%%xmm0 \n"
|
"por %%xmm4,%%xmm0 \n"
|
||||||
"pmullw %%xmm3,%%xmm1 \n"
|
"pmullw %%xmm3,%%xmm1 \n"
|
||||||
@ -2070,14 +2070,14 @@ void ARGBBlendRow_Aligned_SSE2(const uint8* src_argb0, const uint8* src_argb1,
|
|||||||
"jle 9f \n"
|
"jle 9f \n"
|
||||||
"movdqa %%xmm3,%%xmm0 \n" // next 4 pixels
|
"movdqa %%xmm3,%%xmm0 \n" // next 4 pixels
|
||||||
"pxor %%xmm4,%%xmm3 \n"
|
"pxor %%xmm4,%%xmm3 \n"
|
||||||
"movdqa 0x10(%1),%%xmm2 \n"
|
"movdqu 0x10(%1),%%xmm2 \n"
|
||||||
"psrlw $0x8,%%xmm3 \n"
|
"psrlw $0x8,%%xmm3 \n"
|
||||||
"pshufhw $0xf5,%%xmm3,%%xmm3 \n"
|
"pshufhw $0xf5,%%xmm3,%%xmm3 \n"
|
||||||
"pshuflw $0xf5,%%xmm3,%%xmm3 \n"
|
"pshuflw $0xf5,%%xmm3,%%xmm3 \n"
|
||||||
"pand %%xmm6,%%xmm2 \n"
|
"pand %%xmm6,%%xmm2 \n"
|
||||||
"paddw %%xmm7,%%xmm3 \n"
|
"paddw %%xmm7,%%xmm3 \n"
|
||||||
"pmullw %%xmm3,%%xmm2 \n"
|
"pmullw %%xmm3,%%xmm2 \n"
|
||||||
"movdqa 0x10(%1),%%xmm1 \n"
|
"movdqu 0x10(%1),%%xmm1 \n"
|
||||||
"lea 0x20(%1),%1 \n"
|
"lea 0x20(%1),%1 \n"
|
||||||
"psrlw $0x8,%%xmm1 \n"
|
"psrlw $0x8,%%xmm1 \n"
|
||||||
"por %%xmm4,%%xmm0 \n"
|
"por %%xmm4,%%xmm0 \n"
|
||||||
|
|||||||
@ -2075,11 +2075,13 @@ void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy,
|
|||||||
#endif // HAS_YUY2TOYROW_SSE2
|
#endif // HAS_YUY2TOYROW_SSE2
|
||||||
|
|
||||||
#ifdef HAS_ARGBBLENDROW_SSE2
|
#ifdef HAS_ARGBBLENDROW_SSE2
|
||||||
// Blend 8 pixels at a time
|
// Blend 8 pixels at a time.
|
||||||
// Destination aligned to 16 bytes, multiple of 4 pixels
|
// src_argb0 unaligned.
|
||||||
|
// src_argb1 and dst_argb aligned to 16 bytes.
|
||||||
|
// width must be multiple of 4 pixels.
|
||||||
__declspec(naked) __declspec(align(16))
|
__declspec(naked) __declspec(align(16))
|
||||||
void ARGBBlendRow_Aligned_SSE2(const uint8* src_argb0, const uint8* src_argb1,
|
void ARGBBlendRow_Aligned_SSE2(const uint8* src_argb0, const uint8* src_argb1,
|
||||||
uint8* dst_argb, int width) {
|
uint8* dst_argb, int width) {
|
||||||
__asm {
|
__asm {
|
||||||
push esi
|
push esi
|
||||||
mov eax, [esp + 4 + 4] // src_argb0
|
mov eax, [esp + 4 + 4] // src_argb0
|
||||||
@ -2100,14 +2102,14 @@ void ARGBBlendRow_Aligned_SSE2(const uint8* src_argb0, const uint8* src_argb1,
|
|||||||
movdqu xmm3, [eax]
|
movdqu xmm3, [eax]
|
||||||
movdqa xmm0, xmm3 // src argb
|
movdqa xmm0, xmm3 // src argb
|
||||||
pxor xmm3, xmm4 // ~alpha
|
pxor xmm3, xmm4 // ~alpha
|
||||||
movdqa xmm2, [esi] // _r_b
|
movdqu xmm2, [esi] // _r_b
|
||||||
psrlw xmm3, 8 // alpha
|
psrlw xmm3, 8 // alpha
|
||||||
pshufhw xmm3, xmm3,0F5h // 8 alpha words
|
pshufhw xmm3, xmm3,0F5h // 8 alpha words
|
||||||
pshuflw xmm3, xmm3,0F5h
|
pshuflw xmm3, xmm3,0F5h
|
||||||
pand xmm2, xmm6 // _r_b
|
pand xmm2, xmm6 // _r_b
|
||||||
paddw xmm3, xmm7 // 256 - alpha
|
paddw xmm3, xmm7 // 256 - alpha
|
||||||
pmullw xmm2, xmm3 // _r_b * alpha
|
pmullw xmm2, xmm3 // _r_b * alpha
|
||||||
movdqa xmm1, [esi] // _a_g
|
movdqu xmm1, [esi] // _a_g
|
||||||
psrlw xmm1, 8 // _a_g
|
psrlw xmm1, 8 // _a_g
|
||||||
por xmm0, xmm4 // set alpha to 255
|
por xmm0, xmm4 // set alpha to 255
|
||||||
pmullw xmm1, xmm3 // _a_g * alpha
|
pmullw xmm1, xmm3 // _a_g * alpha
|
||||||
@ -2123,14 +2125,14 @@ void ARGBBlendRow_Aligned_SSE2(const uint8* src_argb0, const uint8* src_argb1,
|
|||||||
|
|
||||||
movdqa xmm0, xmm3 // src argb
|
movdqa xmm0, xmm3 // src argb
|
||||||
pxor xmm3, xmm4 // ~alpha
|
pxor xmm3, xmm4 // ~alpha
|
||||||
movdqa xmm2, [esi + 16] // _r_b
|
movdqu xmm2, [esi + 16] // _r_b
|
||||||
psrlw xmm3, 8 // alpha
|
psrlw xmm3, 8 // alpha
|
||||||
pshufhw xmm3, xmm3,0F5h // 8 alpha words
|
pshufhw xmm3, xmm3,0F5h // 8 alpha words
|
||||||
pshuflw xmm3, xmm3,0F5h
|
pshuflw xmm3, xmm3,0F5h
|
||||||
pand xmm2, xmm6 // _r_b
|
pand xmm2, xmm6 // _r_b
|
||||||
paddw xmm3, xmm7 // 256 - alpha
|
paddw xmm3, xmm7 // 256 - alpha
|
||||||
pmullw xmm2, xmm3 // _r_b * alpha
|
pmullw xmm2, xmm3 // _r_b * alpha
|
||||||
movdqa xmm1, [esi + 16] // _a_g
|
movdqu xmm1, [esi + 16] // _a_g
|
||||||
lea esi, [esi + 32]
|
lea esi, [esi + 32]
|
||||||
psrlw xmm1, 8 // _a_g
|
psrlw xmm1, 8 // _a_g
|
||||||
por xmm0, xmm4 // set alpha to 255
|
por xmm0, xmm4 // set alpha to 255
|
||||||
@ -2150,7 +2152,7 @@ void ARGBBlendRow_Aligned_SSE2(const uint8* src_argb0, const uint8* src_argb1,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Blend 1 pixel at a time, unaligned
|
// Blend 1 pixel at a time, unaligned.
|
||||||
__declspec(naked) __declspec(align(16))
|
__declspec(naked) __declspec(align(16))
|
||||||
void ARGBBlendRow1_SSE2(const uint8* src_argb0, const uint8* src_argb1,
|
void ARGBBlendRow1_SSE2(const uint8* src_argb0, const uint8* src_argb1,
|
||||||
uint8* dst_argb, int width) {
|
uint8* dst_argb, int width) {
|
||||||
@ -2247,7 +2249,7 @@ static const uvec8 kShuffleAlpha = {
|
|||||||
// with..
|
// with..
|
||||||
// pshufb xmm3, kShuffleAlpha // alpha
|
// pshufb xmm3, kShuffleAlpha // alpha
|
||||||
|
|
||||||
// Destination aligned to 16 bytes, multiple of 4 pixels
|
// Destination aligned to 16 bytes, multiple of 4 pixels.
|
||||||
__declspec(naked) __declspec(align(16))
|
__declspec(naked) __declspec(align(16))
|
||||||
void ARGBBlendRow_Aligned_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
|
void ARGBBlendRow_Aligned_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
|
||||||
uint8* dst_argb, int width) {
|
uint8* dst_argb, int width) {
|
||||||
@ -2272,11 +2274,11 @@ void ARGBBlendRow_Aligned_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
|
|||||||
movdqa xmm0, xmm3 // src argb
|
movdqa xmm0, xmm3 // src argb
|
||||||
pxor xmm3, xmm4 // ~alpha
|
pxor xmm3, xmm4 // ~alpha
|
||||||
pshufb xmm3, kShuffleAlpha // alpha
|
pshufb xmm3, kShuffleAlpha // alpha
|
||||||
movdqa xmm2, [esi] // _r_b
|
movdqu xmm2, [esi] // _r_b
|
||||||
pand xmm2, xmm6 // _r_b
|
pand xmm2, xmm6 // _r_b
|
||||||
paddw xmm3, xmm7 // 256 - alpha
|
paddw xmm3, xmm7 // 256 - alpha
|
||||||
pmullw xmm2, xmm3 // _r_b * alpha
|
pmullw xmm2, xmm3 // _r_b * alpha
|
||||||
movdqa xmm1, [esi] // _a_g
|
movdqu xmm1, [esi] // _a_g
|
||||||
psrlw xmm1, 8 // _a_g
|
psrlw xmm1, 8 // _a_g
|
||||||
por xmm0, xmm4 // set alpha to 255
|
por xmm0, xmm4 // set alpha to 255
|
||||||
pmullw xmm1, xmm3 // _a_g * alpha
|
pmullw xmm1, xmm3 // _a_g * alpha
|
||||||
@ -2292,12 +2294,12 @@ void ARGBBlendRow_Aligned_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
|
|||||||
|
|
||||||
movdqa xmm0, xmm3 // src argb
|
movdqa xmm0, xmm3 // src argb
|
||||||
pxor xmm3, xmm4 // ~alpha
|
pxor xmm3, xmm4 // ~alpha
|
||||||
movdqa xmm2, [esi + 16] // _r_b
|
movdqu xmm2, [esi + 16] // _r_b
|
||||||
pshufb xmm3, kShuffleAlpha // alpha
|
pshufb xmm3, kShuffleAlpha // alpha
|
||||||
pand xmm2, xmm6 // _r_b
|
pand xmm2, xmm6 // _r_b
|
||||||
paddw xmm3, xmm7 // 256 - alpha
|
paddw xmm3, xmm7 // 256 - alpha
|
||||||
pmullw xmm2, xmm3 // _r_b * alpha
|
pmullw xmm2, xmm3 // _r_b * alpha
|
||||||
movdqa xmm1, [esi + 16] // _a_g
|
movdqu xmm1, [esi + 16] // _a_g
|
||||||
lea esi, [esi + 32]
|
lea esi, [esi + 32]
|
||||||
psrlw xmm1, 8 // _a_g
|
psrlw xmm1, 8 // _a_g
|
||||||
por xmm0, xmm4 // set alpha to 255
|
por xmm0, xmm4 // set alpha to 255
|
||||||
@ -2331,7 +2333,7 @@ void ARGBBlendRow_Any_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
|
|||||||
dst_argb += count * 4;
|
dst_argb += count * 4;
|
||||||
width -= count;
|
width -= count;
|
||||||
}
|
}
|
||||||
// Do multiple of 4 pixels
|
// Do multiple of 4 pixels.
|
||||||
if (width & ~3) {
|
if (width & ~3) {
|
||||||
ARGBBlendRow_Aligned_SSSE3(src_argb0, src_argb1, dst_argb, width & ~3);
|
ARGBBlendRow_Aligned_SSSE3(src_argb0, src_argb1, dst_argb, width & ~3);
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user