mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2026-02-16 23:29:52 +08:00
port YToARGB AVX2 to GCC
BUG=393 TESTED=untested R=harryjin@google.com Review URL: https://webrtc-codereview.appspot.com/39819004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@1262 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
f7e5b5e361
commit
baafc97d6b
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: http://code.google.com/p/libyuv/
|
URL: http://code.google.com/p/libyuv/
|
||||||
Version: 1261
|
Version: 1262
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|
||||||
|
|||||||
@ -203,6 +203,7 @@ extern "C" {
|
|||||||
#define HAS_UYVYTOUV422ROW_AVX2
|
#define HAS_UYVYTOUV422ROW_AVX2
|
||||||
#define HAS_UYVYTOUVROW_AVX2
|
#define HAS_UYVYTOUVROW_AVX2
|
||||||
#define HAS_UYVYTOYROW_AVX2
|
#define HAS_UYVYTOYROW_AVX2
|
||||||
|
#define HAS_YTOARGBROW_AVX2
|
||||||
#define HAS_YUY2TOUV422ROW_AVX2
|
#define HAS_YUY2TOUV422ROW_AVX2
|
||||||
#define HAS_YUY2TOUVROW_AVX2
|
#define HAS_YUY2TOUVROW_AVX2
|
||||||
#define HAS_YUY2TOYROW_AVX2
|
#define HAS_YUY2TOYROW_AVX2
|
||||||
@ -217,7 +218,6 @@ extern "C" {
|
|||||||
|
|
||||||
// The following are available require VS2012. Port to GCC.
|
// The following are available require VS2012. Port to GCC.
|
||||||
#if !defined(LIBYUV_DISABLE_X86) && defined(VISUALC_HAS_AVX2)
|
#if !defined(LIBYUV_DISABLE_X86) && defined(VISUALC_HAS_AVX2)
|
||||||
#define HAS_YTOARGBROW_AVX2
|
|
||||||
// TODO(fbarchard): fix AVX2 versions of YUV conversion. bug=393
|
// TODO(fbarchard): fix AVX2 versions of YUV conversion. bug=393
|
||||||
#define HAS_I422TOABGRROW_AVX2
|
#define HAS_I422TOABGRROW_AVX2
|
||||||
#define HAS_I422TOARGBROW_AVX2
|
#define HAS_I422TOARGBROW_AVX2
|
||||||
|
|||||||
@ -11,6 +11,6 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 1261
|
#define LIBYUV_VERSION 1262
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||||
|
|||||||
@ -2292,9 +2292,7 @@ void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf,
|
|||||||
#endif // HAS_I422TORGBAROW_AVX2
|
#endif // HAS_I422TORGBAROW_AVX2
|
||||||
|
|
||||||
#ifdef HAS_YTOARGBROW_SSE2
|
#ifdef HAS_YTOARGBROW_SSE2
|
||||||
void YToARGBRow_SSE2(const uint8* y_buf,
|
void YToARGBRow_SSE2(const uint8* y_buf, uint8* dst_argb, int width) {
|
||||||
uint8* dst_argb,
|
|
||||||
int width) {
|
|
||||||
asm volatile (
|
asm volatile (
|
||||||
"mov $0x4a354a35,%%eax \n" // 4a35 = 18997 = 1.164
|
"mov $0x4a354a35,%%eax \n" // 4a35 = 18997 = 1.164
|
||||||
"movd %%eax,%%xmm2 \n"
|
"movd %%eax,%%xmm2 \n"
|
||||||
@ -2340,6 +2338,55 @@ void YToARGBRow_SSE2(const uint8* y_buf,
|
|||||||
}
|
}
|
||||||
#endif // HAS_YTOARGBROW_SSE2
|
#endif // HAS_YTOARGBROW_SSE2
|
||||||
|
|
||||||
|
#ifdef HAS_YTOARGBROW_AVX2
|
||||||
|
// 16 pixels of Y converted to 16 pixels of ARGB (64 bytes).
|
||||||
|
// note: vpunpcklbw mutates and vpackuswb unmutates.
|
||||||
|
void YToARGBRow_AVX2(const uint8* y_buf, uint8* dst_argb, int width) {
|
||||||
|
asm volatile (
|
||||||
|
"mov $0x4a354a35,%%eax \n" // 0488 = 1160 = 1.164 * 16
|
||||||
|
"vmovd %%eax,%%xmm2 \n"
|
||||||
|
"vbroadcastss %%xmm2,%%ymm2 \n"
|
||||||
|
"mov $0x4880488,%%eax \n" // 4a35 = 18997 = 1.164
|
||||||
|
"vmovd %%eax,%%xmm3 \n"
|
||||||
|
"vbroadcastss %%xmm3,%%ymm3 \n"
|
||||||
|
"vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n"
|
||||||
|
"vpslld $0x18,%%ymm4,%%ymm4 \n"
|
||||||
|
\n"
|
||||||
|
LABELALIGN
|
||||||
|
"1: \n"
|
||||||
|
// Step 1: Scale Y contribution to 16 G values. G = (y - 16) * 1.164
|
||||||
|
"vmovdqu (%0),%%xmm0 \n"
|
||||||
|
"lea 0x10(%0),%0 \n"
|
||||||
|
"vpermq $0xd8,%%ymm0,%%ymm0 \n"
|
||||||
|
"vpunpcklbw %%ymm0,%%ymm0,%%ymm0 \n"
|
||||||
|
"vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n"
|
||||||
|
"vpsubusw %%ymm3,%%ymm0,%%ymm0 \n"
|
||||||
|
"vpsrlw $0x6,%%ymm0,%%ymm0 \n"
|
||||||
|
"vpackuswb %%ymm0,%%ymm0,%%ymm0 \n"
|
||||||
|
"vpunpcklbw %%ymm0,%%ymm0,%%ymm1 \n"
|
||||||
|
"vpermq $0xd8,%%ymm1,%%ymm1 \n"
|
||||||
|
"vpunpcklwd %%ymm1,%%ymm1,%%ymm0 \n"
|
||||||
|
"vpunpckhwd %%ymm1,%%ymm1,%%ymm1 \n"
|
||||||
|
"vpor %%ymm4,%%ymm0,%%ymm0 \n"
|
||||||
|
"vpor %%ymm4,%%ymm1,%%ymm1 \n"
|
||||||
|
"vmovdqu %%ymm0,(%1) \n"
|
||||||
|
"vmovdqu %%ymm1,0x20(%1) \n"
|
||||||
|
"lea 0x40(%1),%1 \n"
|
||||||
|
"sub $0x10,%2 \n"
|
||||||
|
"jg 1b \n"
|
||||||
|
"vzeroupper \n"
|
||||||
|
: "+r"(y_buf), // %0
|
||||||
|
"+r"(dst_argb), // %1
|
||||||
|
"+rm"(width) // %2
|
||||||
|
:
|
||||||
|
: "memory", "cc", "eax"
|
||||||
|
#if defined(__SSE2__)
|
||||||
|
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
|
||||||
|
#endif
|
||||||
|
);
|
||||||
|
}
|
||||||
|
#endif // HAS_YTOARGBROW_AVX2
|
||||||
|
|
||||||
#ifdef HAS_MIRRORROW_SSSE3
|
#ifdef HAS_MIRRORROW_SSSE3
|
||||||
// Shuffle table for reversing the bytes.
|
// Shuffle table for reversing the bytes.
|
||||||
static uvec8 kShuffleMirror = {
|
static uvec8 kShuffleMirror = {
|
||||||
|
|||||||
@ -2354,14 +2354,14 @@ void YToARGBRow_AVX2(const uint8* y_buf,
|
|||||||
uint8* rgb_buf,
|
uint8* rgb_buf,
|
||||||
int width) {
|
int width) {
|
||||||
__asm {
|
__asm {
|
||||||
vpcmpeqb ymm4, ymm4, ymm4 // generate mask 0xff000000
|
|
||||||
vpslld ymm4, ymm4, 24
|
|
||||||
mov eax, 0x04880488 // 0488 = 1160 = round(1.164 * 64 * 16)
|
|
||||||
vmovd xmm3, eax
|
|
||||||
vbroadcastss ymm3, xmm3
|
|
||||||
mov eax, 0x4a354a35 // 4a35 = 18997 = round(1.164 * 64 * 256)
|
mov eax, 0x4a354a35 // 4a35 = 18997 = round(1.164 * 64 * 256)
|
||||||
vmovd xmm2, eax
|
vmovd xmm2, eax
|
||||||
vbroadcastss ymm2, xmm2
|
vbroadcastss ymm2, xmm2
|
||||||
|
mov eax, 0x04880488 // 0488 = 1160 = round(1.164 * 64 * 16)
|
||||||
|
vmovd xmm3, eax
|
||||||
|
vbroadcastss ymm3, xmm3
|
||||||
|
vpcmpeqb ymm4, ymm4, ymm4 // generate mask 0xff000000
|
||||||
|
vpslld ymm4, ymm4, 24
|
||||||
|
|
||||||
mov eax, [esp + 4] // Y
|
mov eax, [esp + 4] // Y
|
||||||
mov edx, [esp + 8] // rgb
|
mov edx, [esp + 8] // rgb
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user