mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-08 01:36:47 +08:00
ARGBToI444_SSSE3 port to gcc
BUG=148 TEST=try bots Review URL: https://webrtc-codereview.appspot.com/1091004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@557 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
92352b7081
commit
762c050b65
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 556
|
||||
Version: 557
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -49,6 +49,7 @@ extern "C" {
|
||||
#define HAS_ARGBTORGB565ROW_SSE2
|
||||
#define HAS_ARGBTORGBAROW_SSSE3
|
||||
#define HAS_ARGBTOUV422ROW_SSSE3
|
||||
#define HAS_ARGBTOUV444ROW_SSSE3
|
||||
#define HAS_ARGBTOUVROW_SSSE3
|
||||
#define HAS_ARGBTOYROW_SSSE3
|
||||
#define HAS_BGRATOARGBROW_SSSE3
|
||||
@ -121,7 +122,6 @@ extern "C" {
|
||||
// TODO(fbarchard): Port to gcc.
|
||||
#if !defined(YUV_DISABLE_ASM) && defined(_M_IX86)
|
||||
#define HAS_ARGBCOLORTABLEROW_X86
|
||||
#define HAS_ARGBTOUV444ROW_SSSE3
|
||||
#define HAS_ARGBINTERPOLATEROW_SSE2
|
||||
#endif
|
||||
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 556
|
||||
#define LIBYUV_VERSION 557
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||
|
||||
@ -925,6 +925,128 @@ void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
||||
);
|
||||
}
|
||||
|
||||
void ARGBToUV444Row_SSSE3(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"movdqa %0,%%xmm4 \n"
|
||||
"movdqa %1,%%xmm3 \n"
|
||||
"movdqa %2,%%xmm5 \n"
|
||||
:
|
||||
: "m"(kARGBToU), // %0
|
||||
"m"(kARGBToV), // %1
|
||||
"m"(kAddUV128) // %2
|
||||
);
|
||||
asm volatile (
|
||||
"sub %1,%2 \n"
|
||||
".p2align 4 \n"
|
||||
"1: \n"
|
||||
"movdqa (%0),%%xmm0 \n"
|
||||
"movdqa 0x10(%0),%%xmm1 \n"
|
||||
"movdqa 0x20(%0),%%xmm2 \n"
|
||||
"movdqa 0x30(%0),%%xmm6 \n"
|
||||
"pmaddubsw %%xmm4,%%xmm0 \n"
|
||||
"pmaddubsw %%xmm4,%%xmm1 \n"
|
||||
"pmaddubsw %%xmm4,%%xmm2 \n"
|
||||
"pmaddubsw %%xmm4,%%xmm6 \n"
|
||||
"phaddw %%xmm1,%%xmm0 \n"
|
||||
"phaddw %%xmm6,%%xmm2 \n"
|
||||
"psrlw $0x8,%%xmm0 \n"
|
||||
"psrlw $0x8,%%xmm2 \n"
|
||||
"packuswb %%xmm2,%%xmm0 \n"
|
||||
"paddb %%xmm5,%%xmm0 \n"
|
||||
"sub $0x10,%3 \n"
|
||||
"movdqa %%xmm0,(%1) \n"
|
||||
"movdqa (%0),%%xmm0 \n"
|
||||
"movdqa 0x10(%0),%%xmm1 \n"
|
||||
"movdqa 0x20(%0),%%xmm2 \n"
|
||||
"movdqa 0x30(%0),%%xmm6 \n"
|
||||
"pmaddubsw %%xmm3,%%xmm0 \n"
|
||||
"pmaddubsw %%xmm3,%%xmm1 \n"
|
||||
"pmaddubsw %%xmm3,%%xmm2 \n"
|
||||
"pmaddubsw %%xmm3,%%xmm6 \n"
|
||||
"phaddw %%xmm1,%%xmm0 \n"
|
||||
"phaddw %%xmm6,%%xmm2 \n"
|
||||
"psrlw $0x8,%%xmm0 \n"
|
||||
"psrlw $0x8,%%xmm2 \n"
|
||||
"packuswb %%xmm2,%%xmm0 \n"
|
||||
"paddb %%xmm5,%%xmm0 \n"
|
||||
"lea 0x40(%0),%0 \n"
|
||||
"movdqa %%xmm0,(%1,%2,1) \n"
|
||||
"lea 0x10(%1),%1 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_argb), // %0
|
||||
"+r"(dst_u), // %1
|
||||
"+r"(dst_v), // %2
|
||||
"+rm"(width) // %3
|
||||
:
|
||||
: "memory", "cc"
|
||||
#if defined(__SSE2__)
|
||||
, "xmm0", "xmm1", "xmm2", "xmm6"
|
||||
#endif
|
||||
);
|
||||
}
|
||||
|
||||
void ARGBToUV444Row_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_u,
|
||||
uint8* dst_v, int width) {
|
||||
asm volatile (
|
||||
"movdqa %0,%%xmm4 \n"
|
||||
"movdqa %1,%%xmm3 \n"
|
||||
"movdqa %2,%%xmm5 \n"
|
||||
:
|
||||
: "m"(kARGBToU), // %0
|
||||
"m"(kARGBToV), // %1
|
||||
"m"(kAddUV128) // %2
|
||||
);
|
||||
asm volatile (
|
||||
"sub %1,%2 \n"
|
||||
".p2align 4 \n"
|
||||
"1: \n"
|
||||
"movdqu (%0),%%xmm0 \n"
|
||||
"movdqu 0x10(%0),%%xmm1 \n"
|
||||
"movdqu 0x20(%0),%%xmm2 \n"
|
||||
"movdqu 0x30(%0),%%xmm6 \n"
|
||||
"pmaddubsw %%xmm4,%%xmm0 \n"
|
||||
"pmaddubsw %%xmm4,%%xmm1 \n"
|
||||
"pmaddubsw %%xmm4,%%xmm2 \n"
|
||||
"pmaddubsw %%xmm4,%%xmm6 \n"
|
||||
"phaddw %%xmm1,%%xmm0 \n"
|
||||
"phaddw %%xmm6,%%xmm2 \n"
|
||||
"psrlw $0x8,%%xmm0 \n"
|
||||
"psrlw $0x8,%%xmm2 \n"
|
||||
"packuswb %%xmm2,%%xmm0 \n"
|
||||
"paddb %%xmm5,%%xmm0 \n"
|
||||
"sub $0x10,%3 \n"
|
||||
"movdqu %%xmm0,(%1) \n"
|
||||
"movdqu (%0),%%xmm0 \n"
|
||||
"movdqu 0x10(%0),%%xmm1 \n"
|
||||
"movdqu 0x20(%0),%%xmm2 \n"
|
||||
"movdqu 0x30(%0),%%xmm6 \n"
|
||||
"pmaddubsw %%xmm3,%%xmm0 \n"
|
||||
"pmaddubsw %%xmm3,%%xmm1 \n"
|
||||
"pmaddubsw %%xmm3,%%xmm2 \n"
|
||||
"pmaddubsw %%xmm3,%%xmm6 \n"
|
||||
"phaddw %%xmm1,%%xmm0 \n"
|
||||
"phaddw %%xmm6,%%xmm2 \n"
|
||||
"psrlw $0x8,%%xmm0 \n"
|
||||
"psrlw $0x8,%%xmm2 \n"
|
||||
"packuswb %%xmm2,%%xmm0 \n"
|
||||
"paddb %%xmm5,%%xmm0 \n"
|
||||
"lea 0x40(%0),%0 \n"
|
||||
"movdqu %%xmm0,(%1,%2,1) \n"
|
||||
"lea 0x10(%1),%1 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_argb), // %0
|
||||
"+r"(dst_u), // %1
|
||||
"+r"(dst_v), // %2
|
||||
"+rm"(width) // %3
|
||||
:
|
||||
: "memory", "cc"
|
||||
#if defined(__SSE2__)
|
||||
, "xmm0", "xmm1", "xmm2", "xmm6"
|
||||
#endif
|
||||
);
|
||||
}
|
||||
|
||||
void ARGBToUV422Row_SSSE3(const uint8* src_argb0,
|
||||
uint8* dst_u, uint8* dst_v, int width) {
|
||||
asm volatile (
|
||||
|
||||
@ -23,9 +23,6 @@ namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Bilinear SSE2 is disabled.
|
||||
#define SSE2_DISABLED 1
|
||||
|
||||
// ARGB scaling uses bilinear or point, but not box filter.
|
||||
|
||||
#if !defined(YUV_DISABLE_ASM) && (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user