mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 01:06:46 +08:00
Fix for ARGBToUV on AVX2
BUG=269 TESTED=local testing R=harryjin@google.com Review URL: https://webrtc-codereview.appspot.com/33669004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@1202 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
ddee77cdbd
commit
7892ea1fe1
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: http://code.google.com/p/libyuv/
|
URL: http://code.google.com/p/libyuv/
|
||||||
Version: 1201
|
Version: 1203
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|
||||||
|
|||||||
@ -11,6 +11,6 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 1201
|
#define LIBYUV_VERSION 1203
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||||
|
|||||||
@ -983,6 +983,7 @@ void ARGBToUVRow_AVX2(const uint8* src_argb0, int src_stride_argb,
|
|||||||
"vshufps $0x88,%%ymm3,%%ymm2,%%ymm4 \n"
|
"vshufps $0x88,%%ymm3,%%ymm2,%%ymm4 \n"
|
||||||
"vshufps $0xdd,%%ymm3,%%ymm2,%%ymm2 \n"
|
"vshufps $0xdd,%%ymm3,%%ymm2,%%ymm2 \n"
|
||||||
"vpavgb %%ymm4,%%ymm2,%%ymm2 \n"
|
"vpavgb %%ymm4,%%ymm2,%%ymm2 \n"
|
||||||
|
|
||||||
"vpmaddubsw %%ymm7,%%ymm0,%%ymm1 \n"
|
"vpmaddubsw %%ymm7,%%ymm0,%%ymm1 \n"
|
||||||
"vpmaddubsw %%ymm7,%%ymm2,%%ymm3 \n"
|
"vpmaddubsw %%ymm7,%%ymm2,%%ymm3 \n"
|
||||||
"vpmaddubsw %%ymm6,%%ymm0,%%ymm0 \n"
|
"vpmaddubsw %%ymm6,%%ymm0,%%ymm0 \n"
|
||||||
@ -995,6 +996,7 @@ void ARGBToUVRow_AVX2(const uint8* src_argb0, int src_stride_argb,
|
|||||||
"vpermq $0xd8,%%ymm0,%%ymm0 \n"
|
"vpermq $0xd8,%%ymm0,%%ymm0 \n"
|
||||||
"vpshufb %8,%%ymm0,%%ymm0 \n"
|
"vpshufb %8,%%ymm0,%%ymm0 \n"
|
||||||
"vpaddb %%ymm5,%%ymm0,%%ymm0 \n"
|
"vpaddb %%ymm5,%%ymm0,%%ymm0 \n"
|
||||||
|
|
||||||
"vextractf128 $0x0,%%ymm0," MEMACCESS(1) " \n"
|
"vextractf128 $0x0,%%ymm0," MEMACCESS(1) " \n"
|
||||||
VEXTOPMEM(vextractf128,ymm0,0x1,1,2,1) // vextractf128 $0x1,%%ymm0,(%1,%2,1)
|
VEXTOPMEM(vextractf128,ymm0,0x1,1,2,1) // vextractf128 $0x1,%%ymm0,(%1,%2,1)
|
||||||
"lea " MEMLEA(0x10,1) ",%1 \n"
|
"lea " MEMLEA(0x10,1) ",%1 \n"
|
||||||
|
|||||||
@ -1473,7 +1473,6 @@ void RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
|||||||
}
|
}
|
||||||
#endif // HAS_ARGBTOYROW_SSSE3
|
#endif // HAS_ARGBTOYROW_SSSE3
|
||||||
|
|
||||||
|
|
||||||
#if defined(HAS_I422TOARGBROW_AVX2) || defined(HAS_I422TOBGRAROW_AVX2)
|
#if defined(HAS_I422TOARGBROW_AVX2) || defined(HAS_I422TOBGRAROW_AVX2)
|
||||||
static const lvec8 kUVToB_AVX = {
|
static const lvec8 kUVToB_AVX = {
|
||||||
UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB,
|
UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB,
|
||||||
@ -1502,6 +1501,7 @@ static const lvec16 kUVBiasG_AVX = {
|
|||||||
static const lvec16 kUVBiasR_AVX = {
|
static const lvec16 kUVBiasR_AVX = {
|
||||||
BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR
|
BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR
|
||||||
};
|
};
|
||||||
|
#endif // defined(HAS_I422TOARGBROW_AVX2) || defined(HAS_I422TOBGRAROW_AVX2)
|
||||||
|
|
||||||
// Read 8 UV from 422, upsample to 16 UV.
|
// Read 8 UV from 422, upsample to 16 UV.
|
||||||
#define READYUV422_AVX2 __asm { \
|
#define READYUV422_AVX2 __asm { \
|
||||||
@ -1540,7 +1540,7 @@ static const lvec16 kUVBiasR_AVX = {
|
|||||||
__asm vpackuswb ymm2, ymm2, ymm2 /* R */ \
|
__asm vpackuswb ymm2, ymm2, ymm2 /* R */ \
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(HAS_I422TOARGBROW_AVX2)
|
#ifdef HAS_I422TOARGBROW_AVX2
|
||||||
// 16 pixels
|
// 16 pixels
|
||||||
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
|
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
|
||||||
__declspec(naked) __declspec(align(16))
|
__declspec(naked) __declspec(align(16))
|
||||||
@ -1584,7 +1584,9 @@ void I422ToARGBRow_AVX2(const uint8* y_buf,
|
|||||||
ret
|
ret
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif // HAS_I422TOARGBROW_AVX2
|
||||||
|
|
||||||
|
#ifdef HAS_I422TOBGRAROW_AVX2
|
||||||
// 16 pixels
|
// 16 pixels
|
||||||
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 BGRA (64 bytes).
|
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 BGRA (64 bytes).
|
||||||
// TODO(fbarchard): Use macros to reduce duplicate code. See SSSE3.
|
// TODO(fbarchard): Use macros to reduce duplicate code. See SSSE3.
|
||||||
@ -1629,7 +1631,9 @@ void I422ToBGRARow_AVX2(const uint8* y_buf,
|
|||||||
ret
|
ret
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif // HAS_I422TOBGRAROW_AVX2
|
||||||
|
|
||||||
|
#ifdef HAS_I422TORGBAROW_AVX2
|
||||||
// 16 pixels
|
// 16 pixels
|
||||||
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes).
|
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes).
|
||||||
// TODO(fbarchard): Use macros to reduce duplicate code. See SSSE3.
|
// TODO(fbarchard): Use macros to reduce duplicate code. See SSSE3.
|
||||||
@ -1674,7 +1678,9 @@ void I422ToRGBARow_AVX2(const uint8* y_buf,
|
|||||||
ret
|
ret
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif // HAS_I422TORGBAROW_AVX2
|
||||||
|
|
||||||
|
#ifdef HAS_I422TOABGRROW_AVX2
|
||||||
// 16 pixels
|
// 16 pixels
|
||||||
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes).
|
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes).
|
||||||
// TODO(fbarchard): Use macros to reduce duplicate code. See SSSE3.
|
// TODO(fbarchard): Use macros to reduce duplicate code. See SSSE3.
|
||||||
@ -1719,7 +1725,7 @@ void I422ToABGRRow_AVX2(const uint8* y_buf,
|
|||||||
ret
|
ret
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif // HAS_I422TOARGBROW_AVX2
|
#endif // HAS_I422TOABGRROW_AVX2
|
||||||
|
|
||||||
#ifdef HAS_I422TOARGBROW_SSSE3
|
#ifdef HAS_I422TOARGBROW_SSSE3
|
||||||
// TODO(fbarchard): Read that does half size on Y and treats 420 as 444.
|
// TODO(fbarchard): Read that does half size on Y and treats 420 as 444.
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user