mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 01:06:46 +08:00
Remove sub 16 from yuv conversions and change bias to include it.
BUG=388 TESTED=out\release\libyuv_unittest --gtest_catch_exceptions=0 --gtest_filter=*420ToARGB_Opt | sortms R=harryjin@google.com Review URL: https://webrtc-codereview.appspot.com/34609004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@1216 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
8723fc1109
commit
966233e5eb
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: http://code.google.com/p/libyuv/
|
URL: http://code.google.com/p/libyuv/
|
||||||
Version: 1215
|
Version: 1216
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|
||||||
|
|||||||
@ -96,7 +96,6 @@ extern "C" {
|
|||||||
#define HAS_I422TOUYVYROW_SSE2
|
#define HAS_I422TOUYVYROW_SSE2
|
||||||
#define HAS_I422TOYUY2ROW_SSE2
|
#define HAS_I422TOYUY2ROW_SSE2
|
||||||
#define HAS_I444TOARGBROW_SSSE3
|
#define HAS_I444TOARGBROW_SSSE3
|
||||||
// TODO(fbarchard): Implement SSSE3 version of J422ToARGB
|
|
||||||
// #define HAS_J422TOARGBROW_SSSE3
|
// #define HAS_J422TOARGBROW_SSSE3
|
||||||
#define HAS_MERGEUVROW_SSE2
|
#define HAS_MERGEUVROW_SSE2
|
||||||
#define HAS_MIRRORROW_SSE2
|
#define HAS_MIRRORROW_SSE2
|
||||||
|
|||||||
@ -11,6 +11,6 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 1215
|
#define LIBYUV_VERSION 1216
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||||
|
|||||||
@ -65,7 +65,7 @@ YANY(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, I422ToUYVYRow_C, 1, 2, 15)
|
|||||||
#ifdef HAS_J422TOARGBROW_SSSE3
|
#ifdef HAS_J422TOARGBROW_SSSE3
|
||||||
YANY(J422ToARGBRow_Any_SSSE3, J422ToARGBRow_SSSE3, J422ToARGBRow_C,
|
YANY(J422ToARGBRow_Any_SSSE3, J422ToARGBRow_SSSE3, J422ToARGBRow_C,
|
||||||
1, 4, 7)
|
1, 4, 7)
|
||||||
#endif // HAS_I422TOARGBROW_SSSE3
|
#endif // HAS_J422TOARGBROW_SSSE3
|
||||||
#ifdef HAS_I422TOARGBROW_AVX2
|
#ifdef HAS_I422TOARGBROW_AVX2
|
||||||
YANY(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, I422ToARGBRow_C, 1, 4, 15)
|
YANY(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, I422ToARGBRow_C, 1, 4, 15)
|
||||||
#endif // HAS_I422TOARGBROW_AVX2
|
#endif // HAS_I422TOARGBROW_AVX2
|
||||||
|
|||||||
@ -881,11 +881,6 @@ void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) {
|
|||||||
#endif // HAS_ARGBTOYJROW_AVX2
|
#endif // HAS_ARGBTOYJROW_AVX2
|
||||||
|
|
||||||
#ifdef HAS_ARGBTOUVROW_SSSE3
|
#ifdef HAS_ARGBTOUVROW_SSSE3
|
||||||
// TODO(fbarchard): pass xmm constants to single block of assembly.
|
|
||||||
// fpic on GCC 4.2 for OSX runs out of GPR registers. "m" effectively takes
|
|
||||||
// 3 registers - ebx, ebp and eax. "m" can be passed with 3 normal registers,
|
|
||||||
// or 4 if stack frame is disabled. Doing 2 assembly blocks is a work around
|
|
||||||
// and considered unsafe.
|
|
||||||
void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
||||||
uint8* dst_u, uint8* dst_v, int width) {
|
uint8* dst_u, uint8* dst_v, int width) {
|
||||||
asm volatile (
|
asm volatile (
|
||||||
@ -1523,20 +1518,20 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0, int src_stride_rgba,
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAS_I422TOARGBROW_SSSE3
|
#ifdef HAS_I422TOARGBROW_SSSE3
|
||||||
#define UB 127 /* min(63,(int8)(2.018 * 64)) */
|
#define YG 74 /* (int8)round(1.164 * 64 + 0.5) */
|
||||||
#define UG -25 /* (int8)(-0.391 * 64 - 0.5) */
|
|
||||||
|
#define UB 127 /* min(63,(int8)round(2.018 * 64)) */
|
||||||
|
#define UG -25 /* (int8)round(-0.391 * 64 - 0.5) */
|
||||||
#define UR 0
|
#define UR 0
|
||||||
|
|
||||||
#define VB 0
|
#define VB 0
|
||||||
#define VG -52 /* (int8)(-0.813 * 64 - 0.5) */
|
#define VG -52 /* (int8)round(-0.813 * 64 - 0.5) */
|
||||||
#define VR 102 /* (int8)(1.596 * 64 + 0.5) */
|
#define VR 102 /* (int8)round(1.596 * 64 + 0.5) */
|
||||||
|
|
||||||
// Bias
|
// Bias
|
||||||
#define BB UB * 128 + VB * 128
|
#define BB (UB * 128 + VB * 128 + YG * 16)
|
||||||
#define BG UG * 128 + VG * 128
|
#define BG (UG * 128 + VG * 128 + YG * 16)
|
||||||
#define BR UR * 128 + VR * 128
|
#define BR (UR * 128 + VR * 128 + YG * 16)
|
||||||
|
|
||||||
#define YG 74 /* (int8)(1.164 * 64 + 0.5) */
|
|
||||||
|
|
||||||
struct {
|
struct {
|
||||||
vec8 kUVToB; // 0
|
vec8 kUVToB; // 0
|
||||||
@ -1545,11 +1540,10 @@ struct {
|
|||||||
vec16 kUVBiasB; // 48
|
vec16 kUVBiasB; // 48
|
||||||
vec16 kUVBiasG; // 64
|
vec16 kUVBiasG; // 64
|
||||||
vec16 kUVBiasR; // 80
|
vec16 kUVBiasR; // 80
|
||||||
vec16 kYSub16; // 96
|
vec16 kYToRgb; // 96
|
||||||
vec16 kYToRgb; // 112
|
vec8 kVUToB; // 112
|
||||||
vec8 kVUToB; // 128
|
vec8 kVUToG; // 128
|
||||||
vec8 kVUToG; // 144
|
vec8 kVUToR; // 144
|
||||||
vec8 kVUToR; // 160
|
|
||||||
} static SIMD_ALIGNED(kYuvConstants) = {
|
} static SIMD_ALIGNED(kYuvConstants) = {
|
||||||
{ UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB },
|
{ UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB },
|
||||||
{ UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG },
|
{ UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG },
|
||||||
@ -1557,7 +1551,6 @@ struct {
|
|||||||
{ BB, BB, BB, BB, BB, BB, BB, BB },
|
{ BB, BB, BB, BB, BB, BB, BB, BB },
|
||||||
{ BG, BG, BG, BG, BG, BG, BG, BG },
|
{ BG, BG, BG, BG, BG, BG, BG, BG },
|
||||||
{ BR, BR, BR, BR, BR, BR, BR, BR },
|
{ BR, BR, BR, BR, BR, BR, BR, BR },
|
||||||
{ 16, 16, 16, 16, 16, 16, 16, 16 },
|
|
||||||
{ YG, YG, YG, YG, YG, YG, YG, YG },
|
{ YG, YG, YG, YG, YG, YG, YG, YG },
|
||||||
{ VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB },
|
{ VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB },
|
||||||
{ VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG },
|
{ VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG },
|
||||||
@ -1607,8 +1600,7 @@ struct {
|
|||||||
"movq " MEMACCESS([y_buf]) ",%%xmm3 \n" \
|
"movq " MEMACCESS([y_buf]) ",%%xmm3 \n" \
|
||||||
"lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" \
|
"lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" \
|
||||||
"punpcklbw %%xmm4,%%xmm3 \n" \
|
"punpcklbw %%xmm4,%%xmm3 \n" \
|
||||||
"psubsw " MEMACCESS2(96, [kYuvConstants]) ",%%xmm3 \n" \
|
"pmullw " MEMACCESS2(96, [kYuvConstants]) ",%%xmm3 \n" \
|
||||||
"pmullw " MEMACCESS2(112, [kYuvConstants]) ",%%xmm3 \n" \
|
|
||||||
"paddsw %%xmm3,%%xmm0 \n" \
|
"paddsw %%xmm3,%%xmm0 \n" \
|
||||||
"paddsw %%xmm3,%%xmm1 \n" \
|
"paddsw %%xmm3,%%xmm1 \n" \
|
||||||
"paddsw %%xmm3,%%xmm2 \n" \
|
"paddsw %%xmm3,%%xmm2 \n" \
|
||||||
@ -1623,17 +1615,16 @@ struct {
|
|||||||
#define YVUTORGB \
|
#define YVUTORGB \
|
||||||
"movdqa %%xmm0,%%xmm1 \n" \
|
"movdqa %%xmm0,%%xmm1 \n" \
|
||||||
"movdqa %%xmm0,%%xmm2 \n" \
|
"movdqa %%xmm0,%%xmm2 \n" \
|
||||||
"pmaddubsw " MEMACCESS2(128, [kYuvConstants]) ",%%xmm0 \n" \
|
"pmaddubsw " MEMACCESS2(112, [kYuvConstants]) ",%%xmm0 \n" \
|
||||||
"pmaddubsw " MEMACCESS2(144, [kYuvConstants]) ",%%xmm1 \n" \
|
"pmaddubsw " MEMACCESS2(128, [kYuvConstants]) ",%%xmm1 \n" \
|
||||||
"pmaddubsw " MEMACCESS2(160, [kYuvConstants]) ",%%xmm2 \n" \
|
"pmaddubsw " MEMACCESS2(144, [kYuvConstants]) ",%%xmm2 \n" \
|
||||||
"psubw " MEMACCESS2(48, [kYuvConstants]) ",%%xmm0 \n" \
|
"psubw " MEMACCESS2(48, [kYuvConstants]) ",%%xmm0 \n" \
|
||||||
"psubw " MEMACCESS2(64, [kYuvConstants]) ",%%xmm1 \n" \
|
"psubw " MEMACCESS2(64, [kYuvConstants]) ",%%xmm1 \n" \
|
||||||
"psubw " MEMACCESS2(80, [kYuvConstants]) ",%%xmm2 \n" \
|
"psubw " MEMACCESS2(80, [kYuvConstants]) ",%%xmm2 \n" \
|
||||||
"movq " MEMACCESS([y_buf]) ",%%xmm3 \n" \
|
"movq " MEMACCESS([y_buf]) ",%%xmm3 \n" \
|
||||||
"lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" \
|
"lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" \
|
||||||
"punpcklbw %%xmm4,%%xmm3 \n" \
|
"punpcklbw %%xmm4,%%xmm3 \n" \
|
||||||
"psubsw " MEMACCESS2(96, [kYuvConstants]) ",%%xmm3 \n" \
|
"pmullw " MEMACCESS2(96, [kYuvConstants]) ",%%xmm3 \n" \
|
||||||
"pmullw " MEMACCESS2(112, [kYuvConstants]) ",%%xmm3 \n" \
|
|
||||||
"paddsw %%xmm3,%%xmm0 \n" \
|
"paddsw %%xmm3,%%xmm0 \n" \
|
||||||
"paddsw %%xmm3,%%xmm1 \n" \
|
"paddsw %%xmm3,%%xmm1 \n" \
|
||||||
"paddsw %%xmm3,%%xmm2 \n" \
|
"paddsw %%xmm3,%%xmm2 \n" \
|
||||||
@ -2059,8 +2050,7 @@ struct {
|
|||||||
lvec16 kUVBiasB_AVX; // 96
|
lvec16 kUVBiasB_AVX; // 96
|
||||||
lvec16 kUVBiasG_AVX; // 128
|
lvec16 kUVBiasG_AVX; // 128
|
||||||
lvec16 kUVBiasR_AVX; // 160
|
lvec16 kUVBiasR_AVX; // 160
|
||||||
lvec16 kYSub16_AVX; // 192
|
lvec16 kYToRgb_AVX; // 192
|
||||||
lvec16 kYToRgb_AVX; // 224
|
|
||||||
} static SIMD_ALIGNED(kYuvConstants_AVX) = {
|
} static SIMD_ALIGNED(kYuvConstants_AVX) = {
|
||||||
{ UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB,
|
{ UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB,
|
||||||
UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB },
|
UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB },
|
||||||
@ -2074,8 +2064,6 @@ struct {
|
|||||||
BG, BG, BG, BG, BG, BG, BG, BG },
|
BG, BG, BG, BG, BG, BG, BG, BG },
|
||||||
{ BR, BR, BR, BR, BR, BR, BR, BR,
|
{ BR, BR, BR, BR, BR, BR, BR, BR,
|
||||||
BR, BR, BR, BR, BR, BR, BR, BR },
|
BR, BR, BR, BR, BR, BR, BR, BR },
|
||||||
{ 16, 16, 16, 16, 16, 16, 16, 16,
|
|
||||||
16, 16, 16, 16, 16, 16, 16, 16 },
|
|
||||||
{ YG, YG, YG, YG, YG, YG, YG, YG,
|
{ YG, YG, YG, YG, YG, YG, YG, YG,
|
||||||
YG, YG, YG, YG, YG, YG, YG, YG }
|
YG, YG, YG, YG, YG, YG, YG, YG }
|
||||||
};
|
};
|
||||||
@ -2102,8 +2090,7 @@ struct {
|
|||||||
"lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n" \
|
"lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n" \
|
||||||
"vpermq $0xd8,%%ymm3,%%ymm3 \n" \
|
"vpermq $0xd8,%%ymm3,%%ymm3 \n" \
|
||||||
"vpunpcklbw %%ymm4,%%ymm3,%%ymm3 \n" \
|
"vpunpcklbw %%ymm4,%%ymm3,%%ymm3 \n" \
|
||||||
"vpsubsw " MEMACCESS2(192, [kYuvConstants]) ",%%ymm3,%%ymm3 \n" \
|
"vpmullw " MEMACCESS2(192, [kYuvConstants]) ",%%ymm3,%%ymm3 \n" \
|
||||||
"vpmullw " MEMACCESS2(224, [kYuvConstants]) ",%%ymm3,%%ymm3 \n" \
|
|
||||||
"vpaddsw %%ymm3,%%ymm0,%%ymm0 \n" \
|
"vpaddsw %%ymm3,%%ymm0,%%ymm0 \n" \
|
||||||
"vpaddsw %%ymm3,%%ymm1,%%ymm1 \n" \
|
"vpaddsw %%ymm3,%%ymm1,%%ymm1 \n" \
|
||||||
"vpaddsw %%ymm3,%%ymm2,%%ymm2 \n" \
|
"vpaddsw %%ymm3,%%ymm2,%%ymm2 \n" \
|
||||||
|
|||||||
@ -24,20 +24,20 @@ extern "C" {
|
|||||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) && \
|
#if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) && \
|
||||||
(defined(_M_IX86) || defined(_M_X64))
|
(defined(_M_IX86) || defined(_M_X64))
|
||||||
|
|
||||||
#define YG 74 /* (int8)(1.164 * 64 + 0.5) */
|
#define YG 74 /* (int8)round(1.164 * 64 + 0.5) */
|
||||||
|
|
||||||
#define UB 127 /* min(127,(int8)(2.018 * 64)) */
|
#define UB 127 /* min(63,(int8)round(2.018 * 64)) */
|
||||||
#define UG -25 /* (int8)(-0.391 * 64 - 0.5) */
|
#define UG -25 /* (int8)round(-0.391 * 64 - 0.5) */
|
||||||
#define UR 0
|
#define UR 0
|
||||||
|
|
||||||
#define VB 0
|
#define VB 0
|
||||||
#define VG -52 /* (int8)(-0.813 * 64 - 0.5) */
|
#define VG -52 /* (int8)round(-0.813 * 64 - 0.5) */
|
||||||
#define VR 102 /* (int8)(1.596 * 64 + 0.5) */
|
#define VR 102 /* (int8)round(1.596 * 64 + 0.5) */
|
||||||
|
|
||||||
// Bias
|
// Bias
|
||||||
#define BB UB * 128 + VB * 128
|
#define BB (UB * 128 + VB * 128 + YG * 16)
|
||||||
#define BG UG * 128 + VG * 128
|
#define BG (UG * 128 + VG * 128 + YG * 16)
|
||||||
#define BR UR * 128 + VR * 128
|
#define BR (UR * 128 + VR * 128 + YG * 16)
|
||||||
|
|
||||||
static const vec8 kUVToB = {
|
static const vec8 kUVToB = {
|
||||||
UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB
|
UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB
|
||||||
@ -64,7 +64,6 @@ static const vec8 kVUToG = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
static const vec16 kYToRgb = { YG, YG, YG, YG, YG, YG, YG, YG };
|
static const vec16 kYToRgb = { YG, YG, YG, YG, YG, YG, YG, YG };
|
||||||
static const vec16 kYSub16 = { 16, 16, 16, 16, 16, 16, 16, 16 };
|
|
||||||
static const vec16 kUVBiasB = { BB, BB, BB, BB, BB, BB, BB, BB };
|
static const vec16 kUVBiasB = { BB, BB, BB, BB, BB, BB, BB, BB };
|
||||||
static const vec16 kUVBiasG = { BG, BG, BG, BG, BG, BG, BG, BG };
|
static const vec16 kUVBiasG = { BG, BG, BG, BG, BG, BG, BG, BG };
|
||||||
static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR };
|
static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR };
|
||||||
@ -98,7 +97,6 @@ void I422ToARGBRow_SSSE3(const uint8* y_buf,
|
|||||||
xmm2 = _mm_sub_epi16(xmm2, *(__m128i*)kUVBiasR);
|
xmm2 = _mm_sub_epi16(xmm2, *(__m128i*)kUVBiasR);
|
||||||
xmm3 = _mm_loadl_epi64((__m128i*)y_buf);
|
xmm3 = _mm_loadl_epi64((__m128i*)y_buf);
|
||||||
xmm3 = _mm_unpacklo_epi8(xmm3, xmm4);
|
xmm3 = _mm_unpacklo_epi8(xmm3, xmm4);
|
||||||
xmm3 = _mm_subs_epi16(xmm3, *(__m128i*)kYSub16);
|
|
||||||
xmm3 = _mm_mullo_epi16(xmm3, *(__m128i*)kYToRgb);
|
xmm3 = _mm_mullo_epi16(xmm3, *(__m128i*)kYToRgb);
|
||||||
xmm0 = _mm_adds_epi16(xmm0, xmm3);
|
xmm0 = _mm_adds_epi16(xmm0, xmm3);
|
||||||
xmm1 = _mm_adds_epi16(xmm1, xmm3);
|
xmm1 = _mm_adds_epi16(xmm1, xmm3);
|
||||||
@ -1489,9 +1487,6 @@ static const lvec8 kUVToG_AVX = {
|
|||||||
static const lvec16 kYToRgb_AVX = {
|
static const lvec16 kYToRgb_AVX = {
|
||||||
YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG
|
YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG
|
||||||
};
|
};
|
||||||
static const lvec16 kYSub16_AVX = {
|
|
||||||
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16
|
|
||||||
};
|
|
||||||
static const lvec16 kUVBiasB_AVX = {
|
static const lvec16 kUVBiasB_AVX = {
|
||||||
BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB
|
BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB
|
||||||
};
|
};
|
||||||
@ -1527,7 +1522,6 @@ static const lvec16 kUVBiasR_AVX = {
|
|||||||
__asm lea eax, [eax + 16] \
|
__asm lea eax, [eax + 16] \
|
||||||
__asm vpermq ymm3, ymm3, 0xd8 \
|
__asm vpermq ymm3, ymm3, 0xd8 \
|
||||||
__asm vpunpcklbw ymm3, ymm3, ymm4 \
|
__asm vpunpcklbw ymm3, ymm3, ymm4 \
|
||||||
__asm vpsubsw ymm3, ymm3, kYSub16_AVX \
|
|
||||||
__asm vpmullw ymm3, ymm3, kYToRgb_AVX \
|
__asm vpmullw ymm3, ymm3, kYToRgb_AVX \
|
||||||
__asm vpaddsw ymm0, ymm0, ymm3 /* B += Y */ \
|
__asm vpaddsw ymm0, ymm0, ymm3 /* B += Y */ \
|
||||||
__asm vpaddsw ymm1, ymm1, ymm3 /* G += Y */ \
|
__asm vpaddsw ymm1, ymm1, ymm3 /* G += Y */ \
|
||||||
@ -1727,7 +1721,7 @@ void I422ToABGRRow_AVX2(const uint8* y_buf,
|
|||||||
}
|
}
|
||||||
#endif // HAS_I422TOABGRROW_AVX2
|
#endif // HAS_I422TOABGRROW_AVX2
|
||||||
|
|
||||||
#ifdef HAS_I422TOARGBROW_SSSE3
|
#if defined(HAS_I422TOARGBROW_SSSE3)
|
||||||
// TODO(fbarchard): Read that does half size on Y and treats 420 as 444.
|
// TODO(fbarchard): Read that does half size on Y and treats 420 as 444.
|
||||||
|
|
||||||
// Read 8 UV from 444.
|
// Read 8 UV from 444.
|
||||||
@ -1781,7 +1775,6 @@ void I422ToABGRRow_AVX2(const uint8* y_buf,
|
|||||||
__asm movq xmm3, qword ptr [eax] /* NOLINT */ \
|
__asm movq xmm3, qword ptr [eax] /* NOLINT */ \
|
||||||
__asm lea eax, [eax + 8] \
|
__asm lea eax, [eax + 8] \
|
||||||
__asm punpcklbw xmm3, xmm4 \
|
__asm punpcklbw xmm3, xmm4 \
|
||||||
__asm psubsw xmm3, kYSub16 \
|
|
||||||
__asm pmullw xmm3, kYToRgb \
|
__asm pmullw xmm3, kYToRgb \
|
||||||
__asm paddsw xmm0, xmm3 /* B += Y */ \
|
__asm paddsw xmm0, xmm3 /* B += Y */ \
|
||||||
__asm paddsw xmm1, xmm3 /* G += Y */ \
|
__asm paddsw xmm1, xmm3 /* G += Y */ \
|
||||||
@ -1809,7 +1802,6 @@ void I422ToABGRRow_AVX2(const uint8* y_buf,
|
|||||||
__asm movq xmm3, qword ptr [eax] /* NOLINT */ \
|
__asm movq xmm3, qword ptr [eax] /* NOLINT */ \
|
||||||
__asm lea eax, [eax + 8] \
|
__asm lea eax, [eax + 8] \
|
||||||
__asm punpcklbw xmm3, xmm4 \
|
__asm punpcklbw xmm3, xmm4 \
|
||||||
__asm psubsw xmm3, kYSub16 \
|
|
||||||
__asm pmullw xmm3, kYToRgb \
|
__asm pmullw xmm3, kYToRgb \
|
||||||
__asm paddsw xmm0, xmm3 /* B += Y */ \
|
__asm paddsw xmm0, xmm3 /* B += Y */ \
|
||||||
__asm paddsw xmm1, xmm3 /* G += Y */ \
|
__asm paddsw xmm1, xmm3 /* G += Y */ \
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user