mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2026-02-11 04:39:53 +08:00
ColorMatrix and friends ported to NaCL
BUG=253 TESTED=ncval_x86_64.exe newlib/Release/nacltest_x86_64.nexe R=nfullagar@chromium.org Review URL: https://webrtc-codereview.appspot.com/1981004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@747 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
abfeea9b81
commit
b8ffdc9e57
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: http://code.google.com/p/libyuv/
|
URL: http://code.google.com/p/libyuv/
|
||||||
Version: 746
|
Version: 747
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|
||||||
|
|||||||
@ -42,7 +42,11 @@ extern "C" {
|
|||||||
#define HAS_ARGBADDROW_SSE2
|
#define HAS_ARGBADDROW_SSE2
|
||||||
#define HAS_ARGBATTENUATEROW_SSSE3
|
#define HAS_ARGBATTENUATEROW_SSSE3
|
||||||
#define HAS_ARGBBLENDROW_SSSE3
|
#define HAS_ARGBBLENDROW_SSSE3
|
||||||
|
#define HAS_ARGBCOLORMATRIXROW_SSSE3
|
||||||
|
#define HAS_ARGBGRAYROW_SSSE3
|
||||||
#define HAS_ARGBMULTIPLYROW_SSE2
|
#define HAS_ARGBMULTIPLYROW_SSE2
|
||||||
|
#define HAS_ARGBQUANTIZEROW_SSE2
|
||||||
|
#define HAS_ARGBSEPIAROW_SSSE3
|
||||||
#define HAS_ARGBSHADEROW_SSE2
|
#define HAS_ARGBSHADEROW_SSE2
|
||||||
#define HAS_ARGBSUBTRACTROW_SSE2
|
#define HAS_ARGBSUBTRACTROW_SSE2
|
||||||
|
|
||||||
@ -122,11 +126,7 @@ extern "C" {
|
|||||||
|
|
||||||
// Effects:
|
// Effects:
|
||||||
#define HAS_ARGBAFFINEROW_SSE2
|
#define HAS_ARGBAFFINEROW_SSE2
|
||||||
#define HAS_ARGBCOLORMATRIXROW_SSSE3
|
|
||||||
#define HAS_ARGBGRAYROW_SSSE3
|
|
||||||
#define HAS_ARGBMIRRORROW_SSSE3
|
#define HAS_ARGBMIRRORROW_SSSE3
|
||||||
#define HAS_ARGBQUANTIZEROW_SSE2
|
|
||||||
#define HAS_ARGBSEPIAROW_SSSE3
|
|
||||||
#define HAS_ARGBUNATTENUATEROW_SSE2
|
#define HAS_ARGBUNATTENUATEROW_SSE2
|
||||||
#define HAS_COMPUTECUMULATIVESUMROW_SSE2
|
#define HAS_COMPUTECUMULATIVESUMROW_SSE2
|
||||||
#define HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
|
#define HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
|
||||||
|
|||||||
@ -11,6 +11,6 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 746
|
#define LIBYUV_VERSION 747
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||||
|
|||||||
@ -21,11 +21,13 @@ extern "C" {
|
|||||||
#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
|
#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
|
||||||
|
|
||||||
#if defined(__native_client__) && defined(__x86_64__)
|
#if defined(__native_client__) && defined(__x86_64__)
|
||||||
#define MEMACCESS(x) "%%nacl:(%%r15,%q" #x ")"
|
#define MEMACCESS(base) "%%nacl:(%%r15,%q" #base ")"
|
||||||
#define MEMLEA(x, y) #x "(%q" #y ")"
|
#define MEMACCESS2(offset, base) "%%nacl:" #offset "(%%r15,%q" #base ")"
|
||||||
|
#define MEMLEA(offset, base) #offset "(%q" #base ")"
|
||||||
#else
|
#else
|
||||||
#define MEMACCESS(x) "(%" #x ")"
|
#define MEMACCESS(base) "(%" #base ")"
|
||||||
#define MEMLEA(x, y) #x "(%" #y ")"
|
#define MEMACCESS2(offset, base) #offset "(%" #base ")"
|
||||||
|
#define MEMLEA(offset, base) #offset "(%" #base ")"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_ARGBTOYROW_SSSE3
|
#ifdef HAS_ARGBTOYROW_SSSE3
|
||||||
@ -3925,21 +3927,21 @@ void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
|
|||||||
asm volatile (
|
asm volatile (
|
||||||
"movdqa %3,%%xmm4 \n"
|
"movdqa %3,%%xmm4 \n"
|
||||||
"movdqa %4,%%xmm5 \n"
|
"movdqa %4,%%xmm5 \n"
|
||||||
"sub %0,%1 \n"
|
|
||||||
|
|
||||||
// 8 pixel loop.
|
// 8 pixel loop.
|
||||||
".p2align 4 \n"
|
".p2align 4 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movdqa (%0),%%xmm0 \n"
|
"movdqa "MEMACCESS(0)",%%xmm0 \n"
|
||||||
"movdqa 0x10(%0),%%xmm1 \n"
|
"movdqa "MEMACCESS2(0x10,0)",%%xmm1 \n"
|
||||||
"pmaddubsw %%xmm4,%%xmm0 \n"
|
"pmaddubsw %%xmm4,%%xmm0 \n"
|
||||||
"pmaddubsw %%xmm4,%%xmm1 \n"
|
"pmaddubsw %%xmm4,%%xmm1 \n"
|
||||||
"phaddw %%xmm1,%%xmm0 \n"
|
"phaddw %%xmm1,%%xmm0 \n"
|
||||||
"paddw %%xmm5,%%xmm0 \n"
|
"paddw %%xmm5,%%xmm0 \n"
|
||||||
"psrlw $0x7,%%xmm0 \n"
|
"psrlw $0x7,%%xmm0 \n"
|
||||||
"packuswb %%xmm0,%%xmm0 \n"
|
"packuswb %%xmm0,%%xmm0 \n"
|
||||||
"movdqa (%0),%%xmm2 \n"
|
"movdqa "MEMACCESS(0)",%%xmm2 \n"
|
||||||
"movdqa 0x10(%0),%%xmm3 \n"
|
"movdqa "MEMACCESS2(0x10,0)",%%xmm3 \n"
|
||||||
|
"lea "MEMLEA(0x20,0)",%0 \n"
|
||||||
"psrld $0x18,%%xmm2 \n"
|
"psrld $0x18,%%xmm2 \n"
|
||||||
"psrld $0x18,%%xmm3 \n"
|
"psrld $0x18,%%xmm3 \n"
|
||||||
"packuswb %%xmm3,%%xmm2 \n"
|
"packuswb %%xmm3,%%xmm2 \n"
|
||||||
@ -3951,9 +3953,9 @@ void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
|
|||||||
"punpcklwd %%xmm3,%%xmm0 \n"
|
"punpcklwd %%xmm3,%%xmm0 \n"
|
||||||
"punpckhwd %%xmm3,%%xmm1 \n"
|
"punpckhwd %%xmm3,%%xmm1 \n"
|
||||||
"sub $0x8,%2 \n"
|
"sub $0x8,%2 \n"
|
||||||
"movdqa %%xmm0,(%0,%1,1) \n"
|
"movdqa %%xmm0,"MEMACCESS(1)" \n"
|
||||||
"movdqa %%xmm1,0x10(%0,%1,1) \n"
|
"movdqa %%xmm1,"MEMACCESS2(0x10,1)" \n"
|
||||||
"lea 0x20(%0),%0 \n"
|
"lea "MEMLEA(0x20,1)",%1 \n"
|
||||||
"jg 1b \n"
|
"jg 1b \n"
|
||||||
: "+r"(src_argb), // %0
|
: "+r"(src_argb), // %0
|
||||||
"+r"(dst_argb), // %1
|
"+r"(dst_argb), // %1
|
||||||
@ -3995,30 +3997,30 @@ void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) {
|
|||||||
// 8 pixel loop.
|
// 8 pixel loop.
|
||||||
".p2align 4 \n"
|
".p2align 4 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movdqa (%0),%%xmm0 \n"
|
"movdqa "MEMACCESS(0)",%%xmm0 \n"
|
||||||
"movdqa 0x10(%0),%%xmm6 \n"
|
"movdqa "MEMACCESS2(0x10,0)",%%xmm6 \n"
|
||||||
"pmaddubsw %%xmm2,%%xmm0 \n"
|
"pmaddubsw %%xmm2,%%xmm0 \n"
|
||||||
"pmaddubsw %%xmm2,%%xmm6 \n"
|
"pmaddubsw %%xmm2,%%xmm6 \n"
|
||||||
"phaddw %%xmm6,%%xmm0 \n"
|
"phaddw %%xmm6,%%xmm0 \n"
|
||||||
"psrlw $0x7,%%xmm0 \n"
|
"psrlw $0x7,%%xmm0 \n"
|
||||||
"packuswb %%xmm0,%%xmm0 \n"
|
"packuswb %%xmm0,%%xmm0 \n"
|
||||||
"movdqa (%0),%%xmm5 \n"
|
"movdqa "MEMACCESS(0)",%%xmm5 \n"
|
||||||
"movdqa 0x10(%0),%%xmm1 \n"
|
"movdqa "MEMACCESS2(0x10,0)",%%xmm1 \n"
|
||||||
"pmaddubsw %%xmm3,%%xmm5 \n"
|
"pmaddubsw %%xmm3,%%xmm5 \n"
|
||||||
"pmaddubsw %%xmm3,%%xmm1 \n"
|
"pmaddubsw %%xmm3,%%xmm1 \n"
|
||||||
"phaddw %%xmm1,%%xmm5 \n"
|
"phaddw %%xmm1,%%xmm5 \n"
|
||||||
"psrlw $0x7,%%xmm5 \n"
|
"psrlw $0x7,%%xmm5 \n"
|
||||||
"packuswb %%xmm5,%%xmm5 \n"
|
"packuswb %%xmm5,%%xmm5 \n"
|
||||||
"punpcklbw %%xmm5,%%xmm0 \n"
|
"punpcklbw %%xmm5,%%xmm0 \n"
|
||||||
"movdqa (%0),%%xmm5 \n"
|
"movdqa "MEMACCESS(0)",%%xmm5 \n"
|
||||||
"movdqa 0x10(%0),%%xmm1 \n"
|
"movdqa "MEMACCESS2(0x10,0)",%%xmm1 \n"
|
||||||
"pmaddubsw %%xmm4,%%xmm5 \n"
|
"pmaddubsw %%xmm4,%%xmm5 \n"
|
||||||
"pmaddubsw %%xmm4,%%xmm1 \n"
|
"pmaddubsw %%xmm4,%%xmm1 \n"
|
||||||
"phaddw %%xmm1,%%xmm5 \n"
|
"phaddw %%xmm1,%%xmm5 \n"
|
||||||
"psrlw $0x7,%%xmm5 \n"
|
"psrlw $0x7,%%xmm5 \n"
|
||||||
"packuswb %%xmm5,%%xmm5 \n"
|
"packuswb %%xmm5,%%xmm5 \n"
|
||||||
"movdqa (%0),%%xmm6 \n"
|
"movdqa "MEMACCESS(0)",%%xmm6 \n"
|
||||||
"movdqa 0x10(%0),%%xmm1 \n"
|
"movdqa "MEMACCESS2(0x10,0)",%%xmm1 \n"
|
||||||
"psrld $0x18,%%xmm6 \n"
|
"psrld $0x18,%%xmm6 \n"
|
||||||
"psrld $0x18,%%xmm1 \n"
|
"psrld $0x18,%%xmm1 \n"
|
||||||
"packuswb %%xmm1,%%xmm6 \n"
|
"packuswb %%xmm1,%%xmm6 \n"
|
||||||
@ -4028,9 +4030,9 @@ void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) {
|
|||||||
"punpcklwd %%xmm5,%%xmm0 \n"
|
"punpcklwd %%xmm5,%%xmm0 \n"
|
||||||
"punpckhwd %%xmm5,%%xmm1 \n"
|
"punpckhwd %%xmm5,%%xmm1 \n"
|
||||||
"sub $0x8,%1 \n"
|
"sub $0x8,%1 \n"
|
||||||
"movdqa %%xmm0,(%0) \n"
|
"movdqa %%xmm0,"MEMACCESS(0)" \n"
|
||||||
"movdqa %%xmm1,0x10(%0) \n"
|
"movdqa %%xmm1,"MEMACCESS2(0x10,0)" \n"
|
||||||
"lea 0x20(%0),%0 \n"
|
"lea "MEMLEA(0x20,0)",%0 \n"
|
||||||
"jg 1b \n"
|
"jg 1b \n"
|
||||||
: "+r"(dst_argb), // %0
|
: "+r"(dst_argb), // %0
|
||||||
"+r"(width) // %1
|
"+r"(width) // %1
|
||||||
@ -4061,12 +4063,12 @@ void ARGBColorMatrixRow_SSSE3(uint8* dst_argb, const int8* matrix_argb,
|
|||||||
// 8 pixel loop.
|
// 8 pixel loop.
|
||||||
".p2align 4 \n"
|
".p2align 4 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movdqa (%0),%%xmm0 \n"
|
"movdqa "MEMACCESS(0)",%%xmm0 \n"
|
||||||
"movdqa 0x10(%0),%%xmm6 \n"
|
"movdqa "MEMACCESS2(0x10,0)",%%xmm6 \n"
|
||||||
"pmaddubsw %%xmm2,%%xmm0 \n"
|
"pmaddubsw %%xmm2,%%xmm0 \n"
|
||||||
"pmaddubsw %%xmm2,%%xmm6 \n"
|
"pmaddubsw %%xmm2,%%xmm6 \n"
|
||||||
"movdqa (%0),%%xmm5 \n"
|
"movdqa "MEMACCESS(0)",%%xmm5 \n"
|
||||||
"movdqa 0x10(%0),%%xmm1 \n"
|
"movdqa "MEMACCESS2(0x10,0)",%%xmm1 \n"
|
||||||
"pmaddubsw %%xmm3,%%xmm5 \n"
|
"pmaddubsw %%xmm3,%%xmm5 \n"
|
||||||
"pmaddubsw %%xmm3,%%xmm1 \n"
|
"pmaddubsw %%xmm3,%%xmm1 \n"
|
||||||
"phaddsw %%xmm6,%%xmm0 \n"
|
"phaddsw %%xmm6,%%xmm0 \n"
|
||||||
@ -4076,15 +4078,15 @@ void ARGBColorMatrixRow_SSSE3(uint8* dst_argb, const int8* matrix_argb,
|
|||||||
"packuswb %%xmm0,%%xmm0 \n"
|
"packuswb %%xmm0,%%xmm0 \n"
|
||||||
"packuswb %%xmm5,%%xmm5 \n"
|
"packuswb %%xmm5,%%xmm5 \n"
|
||||||
"punpcklbw %%xmm5,%%xmm0 \n"
|
"punpcklbw %%xmm5,%%xmm0 \n"
|
||||||
"movdqa (%0),%%xmm5 \n"
|
"movdqa "MEMACCESS(0)",%%xmm5 \n"
|
||||||
"movdqa 0x10(%0),%%xmm1 \n"
|
"movdqa "MEMACCESS2(0x10,0)",%%xmm1 \n"
|
||||||
"pmaddubsw %%xmm4,%%xmm5 \n"
|
"pmaddubsw %%xmm4,%%xmm5 \n"
|
||||||
"pmaddubsw %%xmm4,%%xmm1 \n"
|
"pmaddubsw %%xmm4,%%xmm1 \n"
|
||||||
"phaddsw %%xmm1,%%xmm5 \n"
|
"phaddsw %%xmm1,%%xmm5 \n"
|
||||||
"psraw $0x7,%%xmm5 \n"
|
"psraw $0x7,%%xmm5 \n"
|
||||||
"packuswb %%xmm5,%%xmm5 \n"
|
"packuswb %%xmm5,%%xmm5 \n"
|
||||||
"movdqa (%0),%%xmm6 \n"
|
"movdqa "MEMACCESS(0)",%%xmm6 \n"
|
||||||
"movdqa 0x10(%0),%%xmm1 \n"
|
"movdqa "MEMACCESS2(0x10,0)",%%xmm1 \n"
|
||||||
"psrld $0x18,%%xmm6 \n"
|
"psrld $0x18,%%xmm6 \n"
|
||||||
"psrld $0x18,%%xmm1 \n"
|
"psrld $0x18,%%xmm1 \n"
|
||||||
"packuswb %%xmm1,%%xmm6 \n"
|
"packuswb %%xmm1,%%xmm6 \n"
|
||||||
@ -4094,9 +4096,9 @@ void ARGBColorMatrixRow_SSSE3(uint8* dst_argb, const int8* matrix_argb,
|
|||||||
"punpcklwd %%xmm5,%%xmm0 \n"
|
"punpcklwd %%xmm5,%%xmm0 \n"
|
||||||
"punpckhwd %%xmm5,%%xmm1 \n"
|
"punpckhwd %%xmm5,%%xmm1 \n"
|
||||||
"sub $0x8,%1 \n"
|
"sub $0x8,%1 \n"
|
||||||
"movdqa %%xmm0,(%0) \n"
|
"movdqa %%xmm0,"MEMACCESS(0)" \n"
|
||||||
"movdqa %%xmm1,0x10(%0) \n"
|
"movdqa %%xmm1,"MEMACCESS2(0x10,0)" \n"
|
||||||
"lea 0x20(%0),%0 \n"
|
"lea "MEMLEA(0x20,0)",%0 \n"
|
||||||
"jg 1b \n"
|
"jg 1b \n"
|
||||||
: "+r"(dst_argb), // %0
|
: "+r"(dst_argb), // %0
|
||||||
"+r"(width) // %1
|
"+r"(width) // %1
|
||||||
@ -4131,7 +4133,7 @@ void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size,
|
|||||||
// 4 pixel loop.
|
// 4 pixel loop.
|
||||||
".p2align 2 \n"
|
".p2align 2 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movdqa (%0),%%xmm0 \n"
|
"movdqa "MEMACCESS(0)",%%xmm0 \n"
|
||||||
"punpcklbw %%xmm5,%%xmm0 \n"
|
"punpcklbw %%xmm5,%%xmm0 \n"
|
||||||
"pmulhuw %%xmm2,%%xmm0 \n"
|
"pmulhuw %%xmm2,%%xmm0 \n"
|
||||||
"movdqa (%0),%%xmm1 \n"
|
"movdqa (%0),%%xmm1 \n"
|
||||||
@ -4146,8 +4148,8 @@ void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size,
|
|||||||
"packuswb %%xmm1,%%xmm0 \n"
|
"packuswb %%xmm1,%%xmm0 \n"
|
||||||
"por %%xmm7,%%xmm0 \n"
|
"por %%xmm7,%%xmm0 \n"
|
||||||
"sub $0x4,%1 \n"
|
"sub $0x4,%1 \n"
|
||||||
"movdqa %%xmm0,(%0) \n"
|
"movdqa %%xmm0,"MEMACCESS(0)" \n"
|
||||||
"lea 0x10(%0),%0 \n"
|
"lea "MEMLEA(0x10,0)",%0 \n"
|
||||||
"jg 1b \n"
|
"jg 1b \n"
|
||||||
: "+r"(dst_argb), // %0
|
: "+r"(dst_argb), // %0
|
||||||
"+r"(width) // %1
|
"+r"(width) // %1
|
||||||
|
|||||||
@ -4922,7 +4922,6 @@ void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
|
|||||||
mov ecx, [esp + 12] /* width */
|
mov ecx, [esp + 12] /* width */
|
||||||
movdqa xmm4, kARGBToYJ
|
movdqa xmm4, kARGBToYJ
|
||||||
movdqa xmm5, kAddYJ64
|
movdqa xmm5, kAddYJ64
|
||||||
sub edx, eax
|
|
||||||
|
|
||||||
align 16
|
align 16
|
||||||
convertloop:
|
convertloop:
|
||||||
@ -4936,6 +4935,7 @@ void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
|
|||||||
packuswb xmm0, xmm0 // 8 G bytes
|
packuswb xmm0, xmm0 // 8 G bytes
|
||||||
movdqa xmm2, [eax] // A
|
movdqa xmm2, [eax] // A
|
||||||
movdqa xmm3, [eax + 16]
|
movdqa xmm3, [eax + 16]
|
||||||
|
lea eax, [eax + 32]
|
||||||
psrld xmm2, 24
|
psrld xmm2, 24
|
||||||
psrld xmm3, 24
|
psrld xmm3, 24
|
||||||
packuswb xmm2, xmm3
|
packuswb xmm2, xmm3
|
||||||
@ -4947,9 +4947,9 @@ void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
|
|||||||
punpcklwd xmm0, xmm3 // GGGA first 4
|
punpcklwd xmm0, xmm3 // GGGA first 4
|
||||||
punpckhwd xmm1, xmm3 // GGGA next 4
|
punpckhwd xmm1, xmm3 // GGGA next 4
|
||||||
sub ecx, 8
|
sub ecx, 8
|
||||||
movdqa [eax + edx], xmm0
|
movdqa [edx], xmm0
|
||||||
movdqa [eax + edx + 16], xmm1
|
movdqa [edx + 16], xmm1
|
||||||
lea eax, [eax + 32]
|
lea edx, [edx + 32]
|
||||||
jg convertloop
|
jg convertloop
|
||||||
ret
|
ret
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user