mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-08 01:36:47 +08:00
Change Attenuate and Unattenuate to unaligned memory ops.
BUG=279 TEST=ARGBAttenuate_Unaligned R=nfullagar@google.com, ryanpetrie@google.com Review URL: https://webrtc-codereview.appspot.com/2709004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@821 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
d2371686c2
commit
38157bdc71
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 820
|
||||
Version: 821
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 820
|
||||
#define LIBYUV_VERSION 821
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||
|
||||
@ -1134,9 +1134,7 @@ int ARGBAttenuate(const uint8* src_argb, int src_stride_argb,
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBATTENUATEROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && width >= 4 &&
|
||||
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
|
||||
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && width >= 4) {
|
||||
ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 4)) {
|
||||
ARGBAttenuateRow = ARGBAttenuateRow_SSSE3;
|
||||
@ -1191,9 +1189,7 @@ int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb,
|
||||
void (*ARGBUnattenuateRow)(const uint8* src_argb, uint8* dst_argb,
|
||||
int width) = ARGBUnattenuateRow_C;
|
||||
#if defined(HAS_ARGBUNATTENUATEROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2) && width >= 4 &&
|
||||
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
|
||||
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
|
||||
if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
|
||||
ARGBUnattenuateRow = ARGBUnattenuateRow_Any_SSE2;
|
||||
if (IS_ALIGNED(width, 4)) {
|
||||
ARGBUnattenuateRow = ARGBUnattenuateRow_SSE2;
|
||||
|
||||
@ -4117,17 +4117,17 @@ void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
|
||||
// 4 pixel loop.
|
||||
".p2align 4 \n"
|
||||
"1: \n"
|
||||
"movdqa "MEMACCESS(0)",%%xmm0 \n"
|
||||
"movdqu "MEMACCESS(0)",%%xmm0 \n"
|
||||
"pshufb %%xmm4,%%xmm0 \n"
|
||||
"movdqa "MEMACCESS(0)",%%xmm1 \n"
|
||||
"movdqu "MEMACCESS(0)",%%xmm1 \n"
|
||||
"punpcklbw %%xmm1,%%xmm1 \n"
|
||||
"pmulhuw %%xmm1,%%xmm0 \n"
|
||||
"movdqa "MEMACCESS(0)",%%xmm1 \n"
|
||||
"movdqu "MEMACCESS(0)",%%xmm1 \n"
|
||||
"pshufb %%xmm5,%%xmm1 \n"
|
||||
"movdqa "MEMACCESS(0)",%%xmm2 \n"
|
||||
"movdqu "MEMACCESS(0)",%%xmm2 \n"
|
||||
"punpckhbw %%xmm2,%%xmm2 \n"
|
||||
"pmulhuw %%xmm2,%%xmm1 \n"
|
||||
"movdqa "MEMACCESS(0)",%%xmm2 \n"
|
||||
"movdqu "MEMACCESS(0)",%%xmm2 \n"
|
||||
"lea "MEMLEA(0x10,0)",%0 \n"
|
||||
"pand %%xmm3,%%xmm2 \n"
|
||||
"psrlw $0x8,%%xmm0 \n"
|
||||
@ -4135,7 +4135,7 @@ void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
|
||||
"packuswb %%xmm1,%%xmm0 \n"
|
||||
"por %%xmm2,%%xmm0 \n"
|
||||
"sub $0x4,%2 \n"
|
||||
"movdqa %%xmm0,"MEMACCESS(1)" \n"
|
||||
"movdqu %%xmm0,"MEMACCESS(1)" \n"
|
||||
"lea "MEMLEA(0x10,1)",%1 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_argb), // %0
|
||||
@ -4161,7 +4161,7 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb,
|
||||
// 4 pixel loop.
|
||||
".p2align 4 \n"
|
||||
"1: \n"
|
||||
"movdqa "MEMACCESS(0)",%%xmm0 \n"
|
||||
"movdqu "MEMACCESS(0)",%%xmm0 \n"
|
||||
"movzb "MEMACCESS2(0x03,0)",%3 \n"
|
||||
"punpcklbw %%xmm0,%%xmm0 \n"
|
||||
MEMOPREG(movd,0x00,4,3,4,xmm2) // movd 0x0(%4,%3,4),%%xmm2
|
||||
@ -4171,7 +4171,7 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb,
|
||||
"pshuflw $0x40,%%xmm3,%%xmm3 \n"
|
||||
"movlhps %%xmm3,%%xmm2 \n"
|
||||
"pmulhuw %%xmm2,%%xmm0 \n"
|
||||
"movdqa "MEMACCESS(0)",%%xmm1 \n"
|
||||
"movdqu "MEMACCESS(0)",%%xmm1 \n"
|
||||
"movzb "MEMACCESS2(0x0b,0)",%3 \n"
|
||||
"punpckhbw %%xmm1,%%xmm1 \n"
|
||||
MEMOPREG(movd,0x00,4,3,4,xmm2) // movd 0x0(%4,%3,4),%%xmm2
|
||||
@ -4184,7 +4184,7 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb,
|
||||
"lea "MEMLEA(0x10,0)",%0 \n"
|
||||
"packuswb %%xmm1,%%xmm0 \n"
|
||||
"sub $0x4,%2 \n"
|
||||
"movdqa %%xmm0,"MEMACCESS(1)" \n"
|
||||
"movdqu %%xmm0,"MEMACCESS(1)" \n"
|
||||
"lea "MEMLEA(0x10,1)",%1 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_argb), // %0
|
||||
|
||||
@ -4586,7 +4586,7 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
|
||||
mov esi, [esp + 4 + 8] // src_argb1
|
||||
mov edx, [esp + 4 + 12] // dst_argb
|
||||
mov ecx, [esp + 4 + 16] // width
|
||||
pcmpeqb xmm7, xmm7 // generate constant 1
|
||||
pcmpeqb xmm7, xmm7 // generate constant 0x0001
|
||||
psrlw xmm7, 15
|
||||
pcmpeqb xmm6, xmm6 // generate mask 0x00ff00ff
|
||||
psrlw xmm6, 8
|
||||
@ -4788,17 +4788,17 @@ void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
|
||||
|
||||
align 16
|
||||
convertloop:
|
||||
movdqa xmm0, [eax] // read 4 pixels
|
||||
movdqu xmm0, [eax] // read 4 pixels
|
||||
pshufb xmm0, xmm4 // isolate first 2 alphas
|
||||
movdqa xmm1, [eax] // read 4 pixels
|
||||
movdqu xmm1, [eax] // read 4 pixels
|
||||
punpcklbw xmm1, xmm1 // first 2 pixel rgbs
|
||||
pmulhuw xmm0, xmm1 // rgb * a
|
||||
movdqa xmm1, [eax] // read 4 pixels
|
||||
movdqu xmm1, [eax] // read 4 pixels
|
||||
pshufb xmm1, xmm5 // isolate next 2 alphas
|
||||
movdqa xmm2, [eax] // read 4 pixels
|
||||
movdqu xmm2, [eax] // read 4 pixels
|
||||
punpckhbw xmm2, xmm2 // next 2 pixel rgbs
|
||||
pmulhuw xmm1, xmm2 // rgb * a
|
||||
movdqa xmm2, [eax] // mask original alpha
|
||||
movdqu xmm2, [eax] // mask original alpha
|
||||
lea eax, [eax + 16]
|
||||
pand xmm2, xmm3
|
||||
psrlw xmm0, 8
|
||||
@ -4806,7 +4806,7 @@ void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
|
||||
packuswb xmm0, xmm1
|
||||
por xmm0, xmm2 // copy original alpha
|
||||
sub ecx, 4
|
||||
movdqa [edx], xmm0
|
||||
movdqu [edx], xmm0
|
||||
lea edx, [edx + 16]
|
||||
jg convertloop
|
||||
|
||||
@ -4874,7 +4874,7 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb,
|
||||
|
||||
align 16
|
||||
convertloop:
|
||||
movdqa xmm0, [eax] // read 4 pixels
|
||||
movdqu xmm0, [eax] // read 4 pixels
|
||||
movzx esi, byte ptr [eax + 3] // first alpha
|
||||
movzx edi, byte ptr [eax + 7] // second alpha
|
||||
punpcklbw xmm0, xmm0 // first 2
|
||||
@ -4885,7 +4885,7 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb,
|
||||
movlhps xmm2, xmm3
|
||||
pmulhuw xmm0, xmm2 // rgb * a
|
||||
|
||||
movdqa xmm1, [eax] // read 4 pixels
|
||||
movdqu xmm1, [eax] // read 4 pixels
|
||||
movzx esi, byte ptr [eax + 11] // third alpha
|
||||
movzx edi, byte ptr [eax + 15] // forth alpha
|
||||
punpckhbw xmm1, xmm1 // next 2
|
||||
@ -4899,7 +4899,7 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb,
|
||||
|
||||
packuswb xmm0, xmm1
|
||||
sub ecx, 4
|
||||
movdqa [edx], xmm0
|
||||
movdqu [edx], xmm0
|
||||
lea edx, [edx + 16]
|
||||
jg convertloop
|
||||
pop edi
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user