mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 01:06:46 +08:00
NaCL port of Attenuate
BUG=253 TEST=out\release\libyuv_unittest --gtest_filter=*Attenuate* R=nfullagar@chromium.org Review URL: https://webrtc-codereview.appspot.com/1970004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@745 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
f8a86cb095
commit
008ecea4fe
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: http://code.google.com/p/libyuv/
|
URL: http://code.google.com/p/libyuv/
|
||||||
Version: 744
|
Version: 745
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|
||||||
|
|||||||
@ -39,6 +39,7 @@ extern "C" {
|
|||||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||||
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
|
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
|
||||||
#define HAS_ARGBBLENDROW_SSSE3
|
#define HAS_ARGBBLENDROW_SSSE3
|
||||||
|
#define HAS_ARGBATTENUATEROW_SSSE3
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// The following are available on all x86 platforms except NaCL x64:
|
// The following are available on all x86 platforms except NaCL x64:
|
||||||
@ -114,7 +115,6 @@ extern "C" {
|
|||||||
// Effects
|
// Effects
|
||||||
#define HAS_ARGBADDROW_SSE2
|
#define HAS_ARGBADDROW_SSE2
|
||||||
#define HAS_ARGBAFFINEROW_SSE2
|
#define HAS_ARGBAFFINEROW_SSE2
|
||||||
#define HAS_ARGBATTENUATEROW_SSSE3
|
|
||||||
#define HAS_ARGBCOLORMATRIXROW_SSSE3
|
#define HAS_ARGBCOLORMATRIXROW_SSSE3
|
||||||
#define HAS_ARGBGRAYROW_SSSE3
|
#define HAS_ARGBGRAYROW_SSSE3
|
||||||
#define HAS_ARGBMIRRORROW_SSSE3
|
#define HAS_ARGBMIRRORROW_SSSE3
|
||||||
@ -188,8 +188,8 @@ extern "C" {
|
|||||||
!defined(LIBYUV_SSSE3_ONLY)
|
!defined(LIBYUV_SSSE3_ONLY)
|
||||||
// Available with NaCL:
|
// Available with NaCL:
|
||||||
#define HAS_ARGBBLENDROW_SSE2
|
#define HAS_ARGBBLENDROW_SSE2
|
||||||
#if !(defined(__native_client__) && defined(__x86_64__))
|
|
||||||
#define HAS_ARGBATTENUATEROW_SSE2
|
#define HAS_ARGBATTENUATEROW_SSE2
|
||||||
|
#if !(defined(__native_client__) && defined(__x86_64__))
|
||||||
#define HAS_MIRRORROW_SSE2
|
#define HAS_MIRRORROW_SSE2
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@ -11,6 +11,6 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 744
|
#define LIBYUV_VERSION 745
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||||
|
|||||||
@ -3770,7 +3770,6 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
|
|||||||
// aligned to 16 bytes
|
// aligned to 16 bytes
|
||||||
void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) {
|
void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) {
|
||||||
asm volatile (
|
asm volatile (
|
||||||
"sub %0,%1 \n"
|
|
||||||
"pcmpeqb %%xmm4,%%xmm4 \n"
|
"pcmpeqb %%xmm4,%%xmm4 \n"
|
||||||
"pslld $0x18,%%xmm4 \n"
|
"pslld $0x18,%%xmm4 \n"
|
||||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||||
@ -3779,17 +3778,18 @@ void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) {
|
|||||||
// 4 pixel loop.
|
// 4 pixel loop.
|
||||||
".p2align 4 \n"
|
".p2align 4 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movdqa (%0),%%xmm0 \n"
|
"movdqa "MEMACCESS(0)",%%xmm0 \n"
|
||||||
"punpcklbw %%xmm0,%%xmm0 \n"
|
"punpcklbw %%xmm0,%%xmm0 \n"
|
||||||
"pshufhw $0xff,%%xmm0,%%xmm2 \n"
|
"pshufhw $0xff,%%xmm0,%%xmm2 \n"
|
||||||
"pshuflw $0xff,%%xmm2,%%xmm2 \n"
|
"pshuflw $0xff,%%xmm2,%%xmm2 \n"
|
||||||
"pmulhuw %%xmm2,%%xmm0 \n"
|
"pmulhuw %%xmm2,%%xmm0 \n"
|
||||||
"movdqa (%0),%%xmm1 \n"
|
"movdqa "MEMACCESS(0)",%%xmm1 \n"
|
||||||
"punpckhbw %%xmm1,%%xmm1 \n"
|
"punpckhbw %%xmm1,%%xmm1 \n"
|
||||||
"pshufhw $0xff,%%xmm1,%%xmm2 \n"
|
"pshufhw $0xff,%%xmm1,%%xmm2 \n"
|
||||||
"pshuflw $0xff,%%xmm2,%%xmm2 \n"
|
"pshuflw $0xff,%%xmm2,%%xmm2 \n"
|
||||||
"pmulhuw %%xmm2,%%xmm1 \n"
|
"pmulhuw %%xmm2,%%xmm1 \n"
|
||||||
"movdqa (%0),%%xmm2 \n"
|
"movdqa "MEMACCESS(0)",%%xmm2 \n"
|
||||||
|
"lea "MEMLEA(0x10,0)",%0 \n"
|
||||||
"psrlw $0x8,%%xmm0 \n"
|
"psrlw $0x8,%%xmm0 \n"
|
||||||
"pand %%xmm4,%%xmm2 \n"
|
"pand %%xmm4,%%xmm2 \n"
|
||||||
"psrlw $0x8,%%xmm1 \n"
|
"psrlw $0x8,%%xmm1 \n"
|
||||||
@ -3797,8 +3797,8 @@ void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) {
|
|||||||
"pand %%xmm5,%%xmm0 \n"
|
"pand %%xmm5,%%xmm0 \n"
|
||||||
"por %%xmm2,%%xmm0 \n"
|
"por %%xmm2,%%xmm0 \n"
|
||||||
"sub $0x4,%2 \n"
|
"sub $0x4,%2 \n"
|
||||||
"movdqa %%xmm0,(%0,%1,1) \n"
|
"movdqa %%xmm0,"MEMACCESS(1)" \n"
|
||||||
"lea 0x10(%0),%0 \n"
|
"lea "MEMLEA(0x10,1)",%1 \n"
|
||||||
"jg 1b \n"
|
"jg 1b \n"
|
||||||
: "+r"(src_argb), // %0
|
: "+r"(src_argb), // %0
|
||||||
"+r"(dst_argb), // %1
|
"+r"(dst_argb), // %1
|
||||||
@ -3825,7 +3825,6 @@ static uvec8 kShuffleAlpha1 = {
|
|||||||
// aligned to 16 bytes
|
// aligned to 16 bytes
|
||||||
void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
|
void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
|
||||||
asm volatile (
|
asm volatile (
|
||||||
"sub %0,%1 \n"
|
|
||||||
"pcmpeqb %%xmm3,%%xmm3 \n"
|
"pcmpeqb %%xmm3,%%xmm3 \n"
|
||||||
"pslld $0x18,%%xmm3 \n"
|
"pslld $0x18,%%xmm3 \n"
|
||||||
"movdqa %3,%%xmm4 \n"
|
"movdqa %3,%%xmm4 \n"
|
||||||
@ -3834,25 +3833,26 @@ void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
|
|||||||
// 4 pixel loop.
|
// 4 pixel loop.
|
||||||
".p2align 4 \n"
|
".p2align 4 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movdqa (%0),%%xmm0 \n"
|
"movdqa "MEMACCESS(0)",%%xmm0 \n"
|
||||||
"pshufb %%xmm4,%%xmm0 \n"
|
"pshufb %%xmm4,%%xmm0 \n"
|
||||||
"movdqa (%0),%%xmm1 \n"
|
"movdqa "MEMACCESS(0)",%%xmm1 \n"
|
||||||
"punpcklbw %%xmm1,%%xmm1 \n"
|
"punpcklbw %%xmm1,%%xmm1 \n"
|
||||||
"pmulhuw %%xmm1,%%xmm0 \n"
|
"pmulhuw %%xmm1,%%xmm0 \n"
|
||||||
"movdqa (%0),%%xmm1 \n"
|
"movdqa "MEMACCESS(0)",%%xmm1 \n"
|
||||||
"pshufb %%xmm5,%%xmm1 \n"
|
"pshufb %%xmm5,%%xmm1 \n"
|
||||||
"movdqa (%0),%%xmm2 \n"
|
"movdqa "MEMACCESS(0)",%%xmm2 \n"
|
||||||
"punpckhbw %%xmm2,%%xmm2 \n"
|
"punpckhbw %%xmm2,%%xmm2 \n"
|
||||||
"pmulhuw %%xmm2,%%xmm1 \n"
|
"pmulhuw %%xmm2,%%xmm1 \n"
|
||||||
"movdqa (%0),%%xmm2 \n"
|
"movdqa "MEMACCESS(0)",%%xmm2 \n"
|
||||||
|
"lea "MEMLEA(0x10,0)",%0 \n"
|
||||||
"pand %%xmm3,%%xmm2 \n"
|
"pand %%xmm3,%%xmm2 \n"
|
||||||
"psrlw $0x8,%%xmm0 \n"
|
"psrlw $0x8,%%xmm0 \n"
|
||||||
"psrlw $0x8,%%xmm1 \n"
|
"psrlw $0x8,%%xmm1 \n"
|
||||||
"packuswb %%xmm1,%%xmm0 \n"
|
"packuswb %%xmm1,%%xmm0 \n"
|
||||||
"por %%xmm2,%%xmm0 \n"
|
"por %%xmm2,%%xmm0 \n"
|
||||||
"sub $0x4,%2 \n"
|
"sub $0x4,%2 \n"
|
||||||
"movdqa %%xmm0,(%0,%1,1) \n"
|
"movdqa %%xmm0,"MEMACCESS(1)" \n"
|
||||||
"lea 0x10(%0),%0 \n"
|
"lea "MEMLEA(0x10,1)",%1 \n"
|
||||||
"jg 1b \n"
|
"jg 1b \n"
|
||||||
: "+r"(src_argb), // %0
|
: "+r"(src_argb), // %0
|
||||||
"+r"(dst_argb), // %1
|
"+r"(dst_argb), // %1
|
||||||
|
|||||||
@ -4624,7 +4624,6 @@ void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) {
|
|||||||
mov eax, [esp + 4] // src_argb0
|
mov eax, [esp + 4] // src_argb0
|
||||||
mov edx, [esp + 8] // dst_argb
|
mov edx, [esp + 8] // dst_argb
|
||||||
mov ecx, [esp + 12] // width
|
mov ecx, [esp + 12] // width
|
||||||
sub edx, eax
|
|
||||||
pcmpeqb xmm4, xmm4 // generate mask 0xff000000
|
pcmpeqb xmm4, xmm4 // generate mask 0xff000000
|
||||||
pslld xmm4, 24
|
pslld xmm4, 24
|
||||||
pcmpeqb xmm5, xmm5 // generate mask 0x00ffffff
|
pcmpeqb xmm5, xmm5 // generate mask 0x00ffffff
|
||||||
@ -4643,6 +4642,7 @@ void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) {
|
|||||||
pshuflw xmm2, xmm2, 0FFh
|
pshuflw xmm2, xmm2, 0FFh
|
||||||
pmulhuw xmm1, xmm2 // rgb * a
|
pmulhuw xmm1, xmm2 // rgb * a
|
||||||
movdqa xmm2, [eax] // alphas
|
movdqa xmm2, [eax] // alphas
|
||||||
|
lea eax, [eax + 16]
|
||||||
psrlw xmm0, 8
|
psrlw xmm0, 8
|
||||||
pand xmm2, xmm4
|
pand xmm2, xmm4
|
||||||
psrlw xmm1, 8
|
psrlw xmm1, 8
|
||||||
@ -4650,8 +4650,8 @@ void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) {
|
|||||||
pand xmm0, xmm5 // keep original alphas
|
pand xmm0, xmm5 // keep original alphas
|
||||||
por xmm0, xmm2
|
por xmm0, xmm2
|
||||||
sub ecx, 4
|
sub ecx, 4
|
||||||
movdqa [eax + edx], xmm0
|
movdqa [edx], xmm0
|
||||||
lea eax, [eax + 16]
|
lea edx, [edx + 16]
|
||||||
jg convertloop
|
jg convertloop
|
||||||
|
|
||||||
ret
|
ret
|
||||||
@ -4674,7 +4674,6 @@ void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
|
|||||||
mov eax, [esp + 4] // src_argb0
|
mov eax, [esp + 4] // src_argb0
|
||||||
mov edx, [esp + 8] // dst_argb
|
mov edx, [esp + 8] // dst_argb
|
||||||
mov ecx, [esp + 12] // width
|
mov ecx, [esp + 12] // width
|
||||||
sub edx, eax
|
|
||||||
pcmpeqb xmm3, xmm3 // generate mask 0xff000000
|
pcmpeqb xmm3, xmm3 // generate mask 0xff000000
|
||||||
pslld xmm3, 24
|
pslld xmm3, 24
|
||||||
movdqa xmm4, kShuffleAlpha0
|
movdqa xmm4, kShuffleAlpha0
|
||||||
@ -4693,14 +4692,15 @@ void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
|
|||||||
punpckhbw xmm2, xmm2 // next 2 pixel rgbs
|
punpckhbw xmm2, xmm2 // next 2 pixel rgbs
|
||||||
pmulhuw xmm1, xmm2 // rgb * a
|
pmulhuw xmm1, xmm2 // rgb * a
|
||||||
movdqa xmm2, [eax] // mask original alpha
|
movdqa xmm2, [eax] // mask original alpha
|
||||||
|
lea eax, [eax + 16]
|
||||||
pand xmm2, xmm3
|
pand xmm2, xmm3
|
||||||
psrlw xmm0, 8
|
psrlw xmm0, 8
|
||||||
psrlw xmm1, 8
|
psrlw xmm1, 8
|
||||||
packuswb xmm0, xmm1
|
packuswb xmm0, xmm1
|
||||||
por xmm0, xmm2 // copy original alpha
|
por xmm0, xmm2 // copy original alpha
|
||||||
sub ecx, 4
|
sub ecx, 4
|
||||||
movdqa [eax + edx], xmm0
|
movdqa [edx], xmm0
|
||||||
lea eax, [eax + 16]
|
lea edx, [edx + 16]
|
||||||
jg convertloop
|
jg convertloop
|
||||||
|
|
||||||
ret
|
ret
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user